This document is concerned with analyzing RNAseq data of iscapularis ixodes (tick).
rownames(wanted_annotations) <- make.names(wanted_annotations$ensembl_gene_id, unique=TRUE)
isc_mm <- sm(create_expt(metadata="sample_sheets/all_samples.xlsx",
file_column="mousefile",
gene_info=wanted_annotations))
isc_mm_raw_metrics <- graph_metrics(isc_mm)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, adding 1 to the data.
## Changed 72211 zero count features.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, setting them to 0.5.
## Changed 72211 zero count features.
## Plotting the representation of the top-n genes.
## Printing a color to condition legend.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
isc_mm_norm <- sm(normalize_expt(isc_mm, transform="log2", norm="quant", batch="limma", convert="cpm", filter=TRUE))
isc_mm_norm_metrics <- sm(graph_metrics(isc_mm_norm))
isc_filt <- sm(normalize_expt(isc_mm, filter=TRUE))
isc_written <- sm(write_expt(expt=isc_filt,
excel=paste0("tables/mm_written-v", ver, ".xlsx"), batch=FALSE))
## Error in names(x) <- value: 'names' attribute [8] must be the same length as the vector [2]
pp(file="images/mm_libsize.pdf", image=isc_mm_raw_metrics$libsize)
## Wrote the image to: images/mm_libsize.pdf
pp(file="images/mm_density.pdf", image=isc_mm_raw_metrics$density)
## Wrote the image to: images/mm_density.pdf
pp(file="images/mm_raw_corheat.pdf", image=isc_mm_raw_metrics$corheat)
## Wrote the image to: images/mm_raw_corheat.pdf
pp(file="images/mm_norm_disheat.pdf", image=isc_mm_norm_metrics$disheat)
## Wrote the image to: images/mm_norm_disheat.pdf
pp(file="images/mm_norm_corheat.pdf", image=isc_mm_norm_metrics$corheat)
## Wrote the image to: images/mm_norm_corheat.pdf
pp(file="images/mm_pca.pdf", image=isc_mm_norm_metrics$pcaplot)
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Wrote the image to: images/mm_pca.pdf
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
Use the next block to poke at that a little.
isc_mm_batch_limmaresid <- sm(normalize_expt(isc_mm, transform="log2", convert="cpm",
filter=TRUE, batch="limmaresid"))
plot_pca(isc_mm_batch_limmaresid)$plot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
isc_mm_batch_limma <- sm(normalize_expt(isc_mm, transform="log2", convert="cpm",
filter=TRUE, batch="limma"))
plot_pca(isc_mm_batch_limma)$plot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
isc_mm_batch_sva <- sm(normalize_expt(isc_mm, transform="log2", convert="cpm",
filter=TRUE, batch="sva"))
plot_pca(isc_mm_batch_sva)$plot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
isc_mm_batch_combat <- sm(normalize_expt(isc_mm, transform="log2", convert="cpm",
filter=TRUE, batch="combat_scale"))
plot_pca(isc_mm_batch_combat)$plot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Ouch, segmentation fault!
##isc_mm_batch_combat <- normalize_expt(isc_mm, transform="log2", convert="cpm", filter=TRUE, batch="ruv")
##plot_pca(isc_mm_batch_ruv)$plot
varpart_test <- varpart(expt=isc_mm, predictor=NULL, factors=c("condition","batch"))
## Attempting mixed linear model with: ~ (1|condition) + (1|batch)
## Fitting the expressionset to the model, this is slow.
## Projected run time: ~ 1 min
## Placing factor: condition at the beginning of the model.
varpart_test$partition_plot
varpart_test$percent_plot
isc_mm_filt <- sm(normalize_expt(isc_mm, filter=TRUE))
surrogate_test <- compare_surrogate_estimates(isc_mm_filt)
## The be method chose 2 surrogate variable(s).
## Attempting pca surrogate estimation with 2 surrogates.
## The be method chose 2 surrogate variable(s).
## Attempting sva supervised surrogate estimation with 2 surrogates.
## The be method chose 2 surrogate variable(s).
## Attempting sva unsupervised surrogate estimation with 2 surrogates.
## The be method chose 2 surrogate variable(s).
## Attempting ruvseq supervised surrogate estimation with 2 surrogates.
## The be method chose 2 surrogate variable(s).
## Attempting ruvseq residual surrogate estimation with 2 surrogates.
## The be method chose 2 surrogate variable(s).
## Attempting ruvseq empirical surrogate estimation with 2 surrogates.
## 1/8: Performing lmFit(data) etc. with null in the model.
## 2/8: Performing lmFit(data) etc. with + batch_adjustments$batch in the model.
## Coefficients not estimable: batch_adjustments$batchab batch_adjustments$batchac batch_adjustments$batchb batch_adjustments$batchc batch_adjustments$batchj1
## Warning: Partial NA coefficients for 15624 probe(s)
## Coefficients not estimable: batch_adjustments$batchab batch_adjustments$batchac batch_adjustments$batchb batch_adjustments$batchc batch_adjustments$batchj1
## Warning: Partial NA coefficients for 15624 probe(s)
## Warning in .ebayes(fit = fit, proportion = proportion, stdev.coef.lim = stdev.coef.lim, :
## Estimation of var.prior failed - set to default value
## 3/8: Performing lmFit(data) etc. with + batch_adjustments$pca in the model.
## 4/8: Performing lmFit(data) etc. with + batch_adjustments$sva_sup in the model.
## 5/8: Performing lmFit(data) etc. with + batch_adjustments$sva_unsup in the model.
## 6/8: Performing lmFit(data) etc. with + batch_adjustments$ruv_sup in the model.
## 7/8: Performing lmFit(data) etc. with + batch_adjustments$ruv_resid in the model.
## 8/8: Performing lmFit(data) etc. with + batch_adjustments$ruv_emp in the model.
surrogate_test$sva_unsupervised_adjust$svs_sample
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
surrogate_test$sva_unsupervised_adjust$factor_svs
For the moment I will continue to assume a limited/no batch effect – but that might be wrong.
pander::pander(sessionInfo())
R version 3.4.4 (2018-03-15)
**Platform:** x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: hpgltools(v.2018.03) and ruv(v.0.9.7)
loaded via a namespace (and not attached): backports(v.1.1.2), corrplot(v.0.84), aroma.light(v.3.8.0), plyr(v.1.8.4), lazyeval(v.0.2.1), splines(v.3.4.4), BiocParallel(v.1.12.0), GenomeInfoDb(v.1.14.0), ggplot2(v.2.2.1), sva(v.3.26.0), digest(v.0.6.15), htmltools(v.0.3.6), foreach(v.1.4.4), BiocInstaller(v.1.28.0), gdata(v.2.18.0), magrittr(v.1.5), memoise(v.1.1.0), doParallel(v.1.0.11), sfsmisc(v.1.1-2), openxlsx(v.4.0.17), limma(v.3.34.9), readr(v.1.1.1), Biostrings(v.2.46.0), annotate(v.1.56.1), matrixStats(v.0.53.1), ffpe(v.1.22.0), R.utils(v.2.6.0), xts(v.0.10-2), siggenes(v.1.52.0), prettyunits(v.1.0.2), colorspace(v.1.3-2), blob(v.1.1.0), ggrepel(v.0.7.0), dplyr(v.0.7.4), RCurl(v.1.95-4.10), roxygen2(v.6.0.1), genefilter(v.1.60.0), lme4(v.1.1-15), bindr(v.0.1.1), GEOquery(v.2.46.15), glue(v.1.2.0), survival(v.2.41-3), zoo(v.1.8-1), iterators(v.1.0.9), registry(v.0.5), gtable(v.0.2.0), lumi(v.2.30.0), zlibbioc(v.1.24.0), XVector(v.0.18.0), DelayedArray(v.0.4.1), BiocGenerics(v.0.24.0), scales(v.0.5.0), DESeq(v.1.30.0), rngtools(v.1.2.4), DBI(v.0.8), edgeR(v.3.20.9), Rcpp(v.0.12.16), xtable(v.1.8-2), progress(v.1.1.2), bumphunter(v.1.20.0), bit(v.1.1-12), mclust(v.5.4), preprocessCore(v.1.40.0), stats4(v.3.4.4), httr(v.1.3.1), gplots(v.3.0.1), RColorBrewer(v.1.1-2), pkgconfig(v.2.0.1), reshape(v.0.8.7), XML(v.3.98-1.10), R.methodsS3(v.1.7.1), locfit(v.1.5-9.1), labeling(v.0.3), rlang(v.0.2.0), reshape2(v.1.4.3), AnnotationDbi(v.1.40.0), munsell(v.0.4.3), tools(v.3.4.4), RSQLite(v.2.0), devtools(v.1.13.5), evaluate(v.0.10.1), stringr(v.1.3.0), yaml(v.2.1.18), knitr(v.1.20), bit64(v.0.9-7), pander(v.0.6.1), beanplot(v.1.2), caTools(v.1.17.1), methylumi(v.2.24.1), purrr(v.0.2.4), bindrcpp(v.0.2), EDASeq(v.2.12.0), nlme(v.3.1-131.1), doRNG(v.1.6.6), nor1mix(v.1.2-3), R.oo(v.1.21.0), xml2(v.1.2.0), biomaRt(v.2.34.2), compiler(v.3.4.4), pbkrtest(v.0.4-7), curl(v.3.1), variancePartition(v.1.8.1), testthat(v.2.0.0), affyio(v.1.48.0), tibble(v.1.4.2), statmod(v.1.4.30), geneplotter(v.1.56.0), stringi(v.1.1.7), GenomicFeatures(v.1.30.3), minfi(v.1.24.0), lattice(v.0.20-35), Matrix(v.1.2-12), commonmark(v.1.4), nloptr(v.1.0.4), multtest(v.2.34.0), pillar(v.1.2.1), data.table(v.1.10.4-3), bitops(v.1.0-6), corpcor(v.1.6.9), rtracklayer(v.1.38.3), GenomicRanges(v.1.30.3), colorRamps(v.2.3), R6(v.2.2.2), latticeExtra(v.0.6-28), directlabels(v.2017.03.31), affy(v.1.56.0), hwriter(v.1.3.2), RMySQL(v.0.10.14), ShortRead(v.1.36.1), KernSmooth(v.2.23-15), gridExtra(v.2.3), nleqslv(v.3.3.1), IRanges(v.2.12.0), codetools(v.0.2-15), MASS(v.7.3-49), gtools(v.3.5.0), assertthat(v.0.2.0), SummarizedExperiment(v.1.8.1), rprojroot(v.1.3-2), openssl(v.1.0.1), pkgmaker(v.0.22), withr(v.2.1.2), RUVSeq(v.1.12.0), GenomicAlignments(v.1.14.1), Rsamtools(v.1.30.0), S4Vectors(v.0.16.0), GenomeInfoDbData(v.1.0.0), hms(v.0.4.2), mgcv(v.1.8-23), parallel(v.3.4.4), quadprog(v.1.5-5), grid(v.3.4.4), tidyr(v.0.8.0), base64(v.2.0), minqa(v.1.2.4), rmarkdown(v.1.9), illuminaio(v.0.20.0), Rtsne(v.0.13), TTR(v.0.23-3), base64enc(v.0.1-3) and Biobase(v.2.38.0)
message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 5d8c266e48bb9f73cdac8300e5c7c9f5baf003dc
## R> packrat::restore()
## This is hpgltools commit: Wed Mar 21 15:55:32 2018 -0400: 5d8c266e48bb9f73cdac8300e5c7c9f5baf003dc
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to 02_sample_estimation_mmusculus-v20170703.rda.xz
tmp <- sm(saveme(filename=this_save))