This document should make clear the suitability of this Pseudomonas data for differential expression analyses. It should also give some ideas about the depth and distribution of the data.
rownames(pa14_annot) <- make.names(pa14_annot[["sysName"]], unique=TRUE)
pa_expt <- create_expt(metadata="sample_sheets/rna_new_old_samples.xlsx",
gene_info=all_annot)
## Reading the sample metadata.
## The sample definitions comprises: 20 rows(samples) and 27 columns(metadata fields).
## Reading count tables.
## Reading count tables with read.table().
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/new_large/hpgl0975/outputs/bowtie2_paeruginosa_pa14/hpgl0975.count.xz contains 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/new_large/hpgl0976/outputs/bowtie2_paeruginosa_pa14/hpgl0976.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/new_large/hpgl0977/outputs/bowtie2_paeruginosa_pa14/hpgl0977.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/new_large/hpgl0978/outputs/bowtie2_paeruginosa_pa14/hpgl0978.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/new_large/hpgl0979/outputs/bowtie2_paeruginosa_pa14/hpgl0979.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/new_large/hpgl0980/outputs/bowtie2_paeruginosa_pa14/hpgl0980.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/new_large/hpgl0981/outputs/bowtie2_paeruginosa_pa14/hpgl0981.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/new_large/hpgl0982/outputs/bowtie2_paeruginosa_pa14/hpgl0982.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/new_large/hpgl0983/outputs/bowtie2_paeruginosa_pa14/hpgl0983.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/new_large/hpgl0984/outputs/bowtie2_paeruginosa_pa14/hpgl0984.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/new_large/hpgl0985/outputs/bowtie2_paeruginosa_pa14/hpgl0985.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/new_large/hpgl0986/outputs/bowtie2_paeruginosa_pa14/hpgl0986.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/previous_large/vl05_pa14/outputs/bowtie2_paeruginosa_pa14/vl05_pa14.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/previous_large/vl06_pa14/outputs/bowtie2_paeruginosa_pa14/vl06_pa14.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/previous_large/vl07_pa14/outputs/bowtie2_paeruginosa_pa14/vl07_pa14.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/previous_large/vl08_pa14/outputs/bowtie2_paeruginosa_pa14/vl08_pa14.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/previous_large/vl09_orn/outputs/bowtie2_paeruginosa_pa14/vl09_orn.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/previous_large/vl10_orn/outputs/bowtie2_paeruginosa_pa14/vl10_orn.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/previous_large/vl11_orn/outputs/bowtie2_paeruginosa_pa14/vl11_orn.count.xz contains 5984 rows and merges to 5984 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/paeruginosa_201710/preprocessing/previous_large/vl12_orn/outputs/bowtie2_paeruginosa_pa14/vl12_orn.count.xz contains 5984 rows and merges to 5984 rows.
## Finished reading count tables.
## Matched 5972 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
new_expt <- subset_expt(pa_expt, subset="batch=='new'")
## There were 20, now there are 12 samples.
old_expt <- subset_expt(pa_expt, subset="batch=='previous'")
## There were 20, now there are 8 samples.
written_expt <- write_expt(pa_expt, excel=paste0("excel/rna_reads-v", ver, ".xlsx"))
## Writing the legend.
## Writing the raw reads.
## Graphing the raw reads.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Writing the normalized reads.
## Graphing the normalized reads.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Writing the median reads by factor.
## The factor mt_st has 3 rows.
## The factor wt_st has 3 rows.
## The factor mt_ex has 3 rows.
## The factor wt_ex has 3 rows.
## The factor wt_undef has 4 rows.
## The factor mt_undef has 4 rows.
all_nobatch <- write_expt(pa_expt, transform="log2", convert="cpm", filter=TRUE,
norm="raw", batch=FALSE,
excel=paste0("excel/rna_reads_l2cpmfilt-v", ver, ".xlsx"))
## Writing the legend.
## Writing the raw reads.
## Graphing the raw reads.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Writing the normalized reads.
## Graphing the normalized reads.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Writing the median reads by factor.
## The factor mt_st has 3 rows.
## The factor wt_st has 3 rows.
## The factor mt_ex has 3 rows.
## The factor wt_ex has 3 rows.
## The factor wt_undef has 4 rows.
## The factor mt_undef has 4 rows.
The following blocks will plot and print a few common metrics of the new data.
new_raw <- sm(graph_metrics(new_expt))
new_norm <- sm(normalize_expt(new_expt, transform="log2", norm="quant", filter=TRUE))
new_plots <- sm(graph_metrics(new_norm))
pp(file="images/legend.png", image=new_raw$legend)
## Writing the image to: images/legend.png and calling dev.off().
pp(file="images/new_libsize.png", image=new_raw$libsize)
## Writing the image to: images/new_libsize.png and calling dev.off().
pp(file="images/new_raw_corheat.png", image=new_raw$corheat)
## Writing the image to: images/new_raw_corheat.png and calling dev.off().
pp(file="images/new_boxplot.png", image=new_raw$boxplot)
## Writing the image to: images/new_boxplot.png and calling dev.off().
pp(file="images/norm_pca.png", image=new_plots$pcaplot)
## Writing the image to: images/norm_pca.png and calling dev.off().
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
Spoiler alert: I already looked at these plots and it seems to me that the previous data has a much more subtle split between the wt/delta samples than the new data. This will likely limit the sensitivity of any analyses performed using both sets of data. We can perform a bunch of other analyses (variance partition etc) to try to get further into it and understand where the variance is coming from; but that can be difficult and time consuming.
pa_graph_raw <- sm(graph_metrics(pa_expt))
pa_norm <- sm(normalize_expt(pa_expt, transform="log2", convert="cpm",
norm="quant", filter=TRUE))
pa_graph_norm <- sm(graph_metrics(pa_norm))
pa_nb <- sm(normalize_expt(pa_expt, transform="log2", convert="cpm",
norm="quant", filter=TRUE, batch="limma"))
pa_nb_graph <- sm(graph_metrics(pa_nb))
pa_graph_norm$pcaplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
pa_graph_norm$tsneplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
pa_nb_graph$pcaplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
pp(file="images/all_libsize.png", image=pa_graph_raw$libsize)
## Writing the image to: images/all_libsize.png and calling dev.off().
pp(file="images/all_density.png", image=pa_graph_raw$density)
## Writing the image to: images/all_density.png and calling dev.off().
pp(file="images/norm_corheat.png", image=pa_graph_norm$corheat)
## Writing the image to: images/norm_corheat.png and calling dev.off().
pp(file="images/all_legend.png", image=pa_graph_raw$legend)
## Writing the image to: images/all_legend.png and calling dev.off().
pp(file="images/all_pca.png", image=pa_graph_norm$pcaplot)
## Writing the image to: images/all_pca.png and calling dev.off().
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
pp(file="images/all_batch_pca.png", image=pa_nb_graph$pcaplot)
## Writing the image to: images/all_batch_pca.png and calling dev.off().
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
pander::pander(sessionInfo())
R version 3.5.1 (2018-07-02)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: ruv(v.0.9.7), bindrcpp(v.0.2.2) and hpgltools(v.2018.03)
loaded via a namespace (and not attached): nlme(v.3.1-137), bitops(v.1.0-6), matrixStats(v.0.54.0), devtools(v.1.13.6), bit64(v.0.9-7), RColorBrewer(v.1.1-2), progress(v.1.2.0), httr(v.1.3.1), rprojroot(v.1.3-2), GenomeInfoDb(v.1.16.0), tools(v.3.5.1), backports(v.1.1.2), R6(v.2.2.2), KernSmooth(v.2.23-15), mgcv(v.1.8-24), DBI(v.1.0.0), lazyeval(v.0.2.1), BiocGenerics(v.0.26.0), colorspace(v.1.3-2), withr(v.2.1.2), gridExtra(v.2.3), tidyselect(v.0.2.4), prettyunits(v.1.0.2), bit(v.1.1-14), compiler(v.3.5.1), preprocessCore(v.1.42.0), Biobase(v.2.40.0), xml2(v.1.2.0), DelayedArray(v.0.6.4), rtracklayer(v.1.40.5), labeling(v.0.3), scales(v.1.0.0), genefilter(v.1.62.0), quadprog(v.1.5-5), commonmark(v.1.5), stringr(v.1.3.1), digest(v.0.6.15), Rsamtools(v.1.32.2), rmarkdown(v.1.10), XVector(v.0.20.0), base64enc(v.0.1-3), pkgconfig(v.2.0.1), htmltools(v.0.3.6), limma(v.3.36.2), rlang(v.0.2.1), RSQLite(v.2.1.1), bindr(v.0.1.1), BiocParallel(v.1.14.2), gtools(v.3.8.1), dplyr(v.0.7.6), zip(v.1.0.0), RCurl(v.1.95-4.11), magrittr(v.1.5), GenomeInfoDbData(v.1.1.0), Matrix(v.1.2-14), Rcpp(v.0.12.18), munsell(v.0.5.0), S4Vectors(v.0.18.3), stringi(v.1.2.4), yaml(v.2.2.0), edgeR(v.3.22.3), MASS(v.7.3-50), SummarizedExperiment(v.1.10.1), zlibbioc(v.1.26.0), Rtsne(v.0.13), plyr(v.1.8.4), grid(v.3.5.1), blob(v.1.1.1), parallel(v.3.5.1), ggrepel(v.0.8.0), crayon(v.1.3.4), lattice(v.0.20-35), splines(v.3.5.1), Biostrings(v.2.48.0), pander(v.0.6.2), annotate(v.1.58.0), GenomicFeatures(v.1.32.2), hms(v.0.4.2), locfit(v.1.5-9.1), knitr(v.1.20), pillar(v.1.3.0), GenomicRanges(v.1.32.6), corpcor(v.1.6.9), reshape2(v.1.4.3), codetools(v.0.2-15), biomaRt(v.2.36.1), stats4(v.3.5.1), XML(v.3.98-1.16), glue(v.1.3.0), evaluate(v.0.11), data.table(v.1.11.4), foreach(v.1.4.4), gtable(v.0.2.0), purrr(v.0.2.5), assertthat(v.0.2.0), ggplot2(v.3.0.0), openxlsx(v.4.1.0), xtable(v.1.8-2), roxygen2(v.6.1.0), survival(v.2.42-6), tibble(v.1.4.2), iterators(v.1.0.10), GenomicAlignments(v.1.16.0), AnnotationDbi(v.1.42.1), memoise(v.1.1.0), IRanges(v.2.14.10), sva(v.3.28.0) and directlabels(v.2018.05.22)
message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 2266d4aa924d9d04edcd360e0ca8216aceb8c9ae
## R> packrat::restore()
## This is hpgltools commit: Thu Aug 23 17:56:20 2018 -0400: 2266d4aa924d9d04edcd360e0ca8216aceb8c9ae
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to 02_sample_estimation_20180718-v20180718.rda.xz
tmp <- sm(saveme(filename=this_save))