human_expt <- sm(create_expt(metadata="sample_sheets/all_samples-combined.xlsx", gene_info=hs_annotations, file_column="humanfile"))
biopsy_expt <- expt_subset(human_expt, subset="experimentname=='biopsy'")
biopsy_expt <- set_expt_colors(expt=biopsy_expt, colors=c("darkblue", "darkred", "lightblue", "red"))
##biopsy_expt$colors <- c("lightblue","lightblue","darkblue","darkblue","red","darkred")
biopsy_expt$batches
## HPGL0644 HPGL0645 HPGL0646 HPGL0647 HPGL0648 HPGL0649
## "d1016" "d1016" "d1021" "d1021" "d2040" "d2040"
biopsy_metrics <- graph_metrics(biopsy_expt)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, setting them to 0.5.
## Changed 170832 zero count features.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Plotting a density plot.
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, setting them to 0.5.
## Changed 170832 zero count features.
## Printing a color to condition legend.
biopsy_metrics$libsize
biopsy_metrics$pcaplot
biopsy_metrics$corheat
biopsy_metrics$disheat
biopsy_norm <- normalize_expt(biopsy_expt, transform="log2", filter=TRUE, batch="fsva")
## This function will replace the expt$expressionset slot with:
## log2(fsva(cbcb(data)))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 36873 low-count genes (14168 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 146 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with fsva.
## Note to self: If you get an error like 'x contains missing values'; I think this
## means that the data has too many 0's and needs to have a better low-count filter applied.
## batch_counts: Before batch correction, 146 entries are >= 0.
## batch_counts: Using sva::fsva for batch correction.
## The number of elements which are < 0 after batch correction is: 45
## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE
biopsynorm_metrics <- graph_metrics(biopsy_norm)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Plotting a density plot.
## Printing a color to condition legend.
biopsynorm_metrics$pcaplot
info <- pca_information(biopsy_norm, expt_factors=c("condition","batch","time","rnaqcpassed","rnangul"))
## More shallow curves in these plots suggest more genes in this principle component.
## condition
## batch
## time
## rnaqcpassed
## rnangul
## The u and v components of SVD have only 4 columns, but the list of factors is 5 long.
## Therefore, only searching for 4 PCs.
biopsynorm_metrics$corheat
biopsynorm_metrics$disheat
library('pander')
pander(sessionInfo())
R version 3.3.3 (2017-03-06)
**Platform:** x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: stats4, parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: pander(v.0.6.0), hpgltools(v.2017.01), ruv(v.0.9.6), Homo.sapiens(v.1.3.1), TxDb.Hsapiens.UCSC.hg19.knownGene(v.3.2.2), org.Hs.eg.db(v.3.4.0), Leishmania.braziliensis.MHOMBR75M2904(v.28.0), TxDb.lbraziliensis.MHOMBR75M2904.TriTrypDB28(v.28.0), org.Lbraziliensis.MHOMBR75M2904.eg.db(v.28.0), Leishmania.panamensis.MHOMCOL81L13(v.27.0), GO.db(v.3.4.0), OrganismDbi(v.1.16.0), TxDb.lpanamensis.MHOMCOL81L13.TriTrypDB27(v.27.0), GenomicFeatures(v.1.26.4), GenomicRanges(v.1.26.4), GenomeInfoDb(v.1.10.3), org.Lpanamensis.MHOMCOL81L13.eg.db(v.27.0), AnnotationDbi(v.1.36.2), IRanges(v.2.8.2), S4Vectors(v.0.12.2), Biobase(v.2.34.0), AnnotationHub(v.2.6.5) and BiocGenerics(v.0.20.0)
loaded via a namespace (and not attached): colorspace(v.1.3-2), rprojroot(v.1.2), corpcor(v.1.6.9), XVector(v.0.14.1), base64enc(v.0.1-3), roxygen2(v.6.0.1), ggrepel(v.0.6.5), interactiveDisplayBase(v.1.12.0), prodlim(v.1.6.1), xml2(v.1.1.1), codetools(v.0.2-15), splines(v.3.3.3), knitr(v.1.16), SuppDists(v.1.1-9.4), Rsamtools(v.1.26.2), annotate(v.1.52.1), graph(v.1.52.0), shiny(v.1.0.3), compiler(v.3.3.3), httr(v.1.2.1), backports(v.1.0.5), Matrix(v.1.2-10), lazyeval(v.0.2.0), limma(v.3.30.13), htmltools(v.0.3.6), tools(v.3.3.3), ecodist(v.1.2.9), gtable(v.0.2.0), reshape2(v.1.4.2), Rcpp(v.0.12.11), survcomp(v.1.24.0), Biostrings(v.2.42.1), preprocessCore(v.1.36.0), nlme(v.3.1-131), rtracklayer(v.1.34.2), iterators(v.1.0.8), stringr(v.1.2.0), openxlsx(v.4.0.17), testthat(v.1.0.2), mime(v.0.5), gtools(v.3.5.0), devtools(v.1.13.2), XML(v.3.98-1.7), survJamda(v.1.1.4), edgeR(v.3.16.5), zlibbioc(v.1.20.0), scales(v.0.4.1), BiocInstaller(v.1.24.0), SummarizedExperiment(v.1.4.0), RBGL(v.1.50.0), RColorBrewer(v.1.1-2), yaml(v.2.1.14), curl(v.2.6), memoise(v.1.1.0), ggplot2(v.2.2.1), biomaRt(v.2.30.0), stringi(v.1.1.5), RSQLite(v.1.1-2), rmeta(v.2.16), genefilter(v.1.56.0), foreach(v.1.4.3), BiocParallel(v.1.8.2), lava(v.1.5), rlang(v.0.1.1), commonmark(v.1.2), matrixStats(v.0.52.2), bitops(v.1.0-6), evaluate(v.0.10), lattice(v.0.20-35), GenomicAlignments(v.1.10.1), labeling(v.0.3), survivalROC(v.1.0.3), plyr(v.1.8.4), magrittr(v.1.5), R6(v.2.2.1), bootstrap(v.2017.2), DBI(v.0.6-1), withr(v.1.0.2), mgcv(v.1.8-17), survival(v.2.41-3), RCurl(v.1.95-4.8), tibble(v.1.3.3), survJamda.data(v.1.0.2), crayon(v.1.3.2), KernSmooth(v.2.23-15), rmarkdown(v.1.5), locfit(v.1.5-9.1), grid(v.3.3.3), sva(v.3.22.0), data.table(v.1.10.4), digest(v.0.6.12), xtable(v.1.8-2), httpuv(v.1.3.3) and munsell(v.0.4.3)