At this point, I am reasonably sure that the data is not crazytown or replete with difficult to assay batch effects. We have (I think) a reasonable factor in the experimental design to include as batch, so I will allow my differential expression toys to use that.
Grab the data structure from sample_estimation_mmusculus and see what happens.
## Drop low count genes
isc_filtered <- sm(normalize_expt(isc_mm, filter=TRUE))
## Do a default no-batch assessment.
isc_mm_de_default <- sm(all_pairwise(isc_filtered, model_batch=FALSE, parallel=FALSE))
isc_mm_written_default <- sm(combine_de_tables(
isc_mm_de_default,
excel=paste0("excel/mm_de_voom_default-v", ver, ".xlsx")))
## Do a slightly different assessment via limma
isc_mm_de <- sm(all_pairwise(isc_mm, model_batch=FALSE,
which_voom="limma", limma_robust=TRUE))
isc_mm_written <- sm(combine_de_tables(isc_mm_de,
excel=paste0("excel/mm_de_voom-v", ver, ".xlsx")))
isc_mm_sig <- sm(extract_significant_genes(isc_mm_written,
excel=paste0("excel/mm_sig-v", ver, ".xlsx")))
## Try adding in batch once
isc_mm_de_batch <- sm(all_pairwise(isc_filtered, model_batch=TRUE))
isc_mm_written_batch <- sm(combine_de_tables(
isc_mm_de_batch,
excel=paste0("excel/mm_de_voom_batch-v", ver, ".xlsx")))
isc_mm_sig_batch <- sm(extract_significant_genes(
isc_mm_written_batch,
excel=paste0("excel/mm_sig_batch-v", ver, ".xlsx")))
## Try again using sva
isc_mm_de_sva <- sm(all_pairwise(
isc_filtered, model_batch="svaseq", voom_norm="quant", parallel=FALSE,
which_voom="limma", limma_robust=TRUE))
isc_mm_written_sva <- sm(combine_de_tables(
isc_mm_de_sva,
excel=paste0("excel/mm_de_voom_sva-v", ver, ".xlsx")))
isc_mm_sig_sva <- sm(extract_significant_genes(
isc_mm_written_sva,
excel=paste0("excel/mm_sig_sva-v", ver, ".xlsx")))
isc_mm_written$limma_plots$wt_vs_mut$scatter
## Warning: Removed 1 rows containing missing values (geom_vline).
## Warning: Removed 1 rows containing missing values (geom_hline).
plot(isc_mm_written$venns$wt_vs_mut$up_venn)
pander::pander(sessionInfo())
R version 3.4.4 (2018-03-15)
**Platform:** x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: ruv(v.0.9.7), foreach(v.1.4.4), Vennerable(v.3.1.0.9000), edgeR(v.3.20.9) and hpgltools(v.2018.03)
loaded via a namespace (and not attached): minqa(v.1.2.4), colorspace(v.1.3-2), colorRamps(v.2.3), rprojroot(v.1.3-2), htmlTable(v.1.11.2), corpcor(v.1.6.9), XVector(v.0.18.0), GenomicRanges(v.1.30.3), base64enc(v.0.1-3), rstudioapi(v.0.7), roxygen2(v.6.0.1), ggrepel(v.0.7.0), bit64(v.0.9-7), AnnotationDbi(v.1.40.0), xml2(v.1.2.0), codetools(v.0.2-15), splines(v.3.4.4), doParallel(v.1.0.11), robustbase(v.0.92-8), geneplotter(v.1.56.0), knitr(v.1.20), Formula(v.1.2-2), nloptr(v.1.0.4), pbkrtest(v.0.4-7), annotate(v.1.56.1), cluster(v.2.0.6), graph(v.1.56.0), compiler(v.3.4.4), backports(v.1.1.2), Matrix(v.1.2-12), lazyeval(v.0.2.1), limma(v.3.34.9), acepack(v.1.4.1), htmltools(v.0.3.6), tools(v.3.4.4), gtable(v.0.2.0), GenomeInfoDbData(v.1.0.0), reshape2(v.1.4.3), Rcpp(v.0.12.16), Biobase(v.2.38.0), gdata(v.2.18.0), preprocessCore(v.1.40.0), nlme(v.3.1-131.1), iterators(v.1.0.9), stringr(v.1.3.0), openxlsx(v.4.0.17), lme4(v.1.1-15), gtools(v.3.5.0), devtools(v.1.13.5), statmod(v.1.4.30), XML(v.3.98-1.10), DEoptimR(v.1.0-8), directlabels(v.2017.03.31), zlibbioc(v.1.24.0), MASS(v.7.3-49), scales(v.0.5.0.9000), doSNOW(v.1.0.16), parallel(v.3.4.4), SummarizedExperiment(v.1.8.1), RBGL(v.1.54.0), RColorBrewer(v.1.1-2), yaml(v.2.1.18), memoise(v.1.1.0), gridExtra(v.2.3), pander(v.0.6.1), ggplot2(v.2.2.1), rpart(v.4.1-13), latticeExtra(v.0.6-28), stringi(v.1.1.7), RSQLite(v.2.0), genefilter(v.1.60.0), S4Vectors(v.0.16.0), checkmate(v.1.8.5), caTools(v.1.17.1), BiocGenerics(v.0.24.0), BiocParallel(v.1.12.0), GenomeInfoDb(v.1.14.0), rlang(v.0.2.0.9001), commonmark(v.1.4), matrixStats(v.0.53.1), bitops(v.1.0-6), evaluate(v.0.10.1), lattice(v.0.20-35), htmlwidgets(v.1.0), labeling(v.0.3), bit(v.1.1-12), plyr(v.1.8.4), magrittr(v.1.5), variancePartition(v.1.8.1), DESeq2(v.1.18.1), R6(v.2.2.2), IRanges(v.2.12.0), snow(v.0.4-2), gplots(v.3.0.1), Hmisc(v.4.1-1), DelayedArray(v.0.4.1), DBI(v.0.8), mgcv(v.1.8-23), pillar(v.1.2.1), foreign(v.0.8-69), withr(v.2.1.2), survival(v.2.41-3), RCurl(v.1.95-4.10), nnet(v.7.3-12), tibble(v.1.4.2), KernSmooth(v.2.23-15), rmarkdown(v.1.9), locfit(v.1.5-9.1), grid(v.3.4.4), sva(v.3.26.0), data.table(v.1.10.4-3), blob(v.1.1.0), digest(v.0.6.15), xtable(v.1.8-2), stats4(v.3.4.4), munsell(v.0.4.3) and quadprog(v.1.5-5)
message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 7de4503f6bb5724c28cce24af5dbee22bb1c0cae
## R> packrat::restore()
## This is hpgltools commit: Thu Apr 12 22:08:53 2018 -0400: 7de4503f6bb5724c28cce24af5dbee22bb1c0cae
message(paste0("Saving to ", savefile))
## Saving to differential_expression_mmusculus_v20170614.rda.xz
tmp <- sm(saveme(filename=savefile))