This document is concerned with analyzing RNAseq data of iscapularis ixodes (tick).
rownames(wanted_annotations) <- make.names(wanted_annotations$ensembl_gene_id, unique=TRUE)
isc_mm <- sm(create_expt(metadata="sample_sheets/all_samples.xlsx",
file_column="mousefile",
gene_info=wanted_annotations))
isc_mm_raw_metrics <- sm(graph_metrics(isc_mm))
isc_mm_norm <- sm(normalize_expt(
isc_mm, transform="log2", norm="quant", batch="limma", convert="cpm", filter=TRUE))
isc_mm_norm_metrics <- sm(graph_metrics(isc_mm_norm))
isc_filt <- sm(normalize_expt(isc_mm, filter=TRUE))
isc_written <- write_expt(expt=isc_filt,
excel=paste0("excel/mm_written-v", ver, ".xlsx"), batch=FALSE)
## Writing the legend.
## Writing the raw reads.
## Graphing the raw reads.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Writing the normalized reads.
## Graphing the normalized reads.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Writing the median reads by factor.
## Warning in median_by_factor(exprs(norm_data), fact =
## norm_data[["conditions"]]): The level uninfected_gut of the factor has no
## columns.
## Warning in median_by_factor(exprs(norm_data), fact =
## norm_data[["conditions"]]): The level infected_gut of the factor has no
## columns.
## Warning in median_by_factor(exprs(norm_data), fact =
## norm_data[["conditions"]]): The level infected_salivary of the factor has
## no columns.
## Warning in median_by_factor(exprs(norm_data), fact =
## norm_data[["conditions"]]): The level uninfected_salivary of the factor has
## no columns.
## Warning in median_by_factor(exprs(norm_data), fact =
## norm_data[["conditions"]]): The level infected_mouse_skin of the factor has
## no columns.
## Warning in median_by_factor(exprs(norm_data), fact =
## norm_data[["conditions"]]): The level uninfected_guinea_skin of the factor
## has no columns.
## The factor wt has 3 rows.
## The factor mut has 3 rows.
isc_mm_raw_metrics$libsize
isc_mm_raw_metrics$density
isc_mm_raw_metrics$corheat
isc_mm_norm_metrics$disheat
isc_mm_norm_metrics$corheat
isc_mm_norm_metrics$pcaplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
Use the next block to poke at that a little.
isc_mm_batch_limmaresid <- sm(normalize_expt(isc_mm, transform="log2", convert="cpm",
filter=TRUE, batch="limmaresid"))
plot_pca(isc_mm_batch_limmaresid)$plot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
isc_mm_batch_limma <- sm(normalize_expt(isc_mm, transform="log2", convert="cpm",
filter=TRUE, batch="limma"))
plot_pca(isc_mm_batch_limma)$plot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
isc_mm_batch_sva <- sm(normalize_expt(isc_mm, transform="log2", convert="cpm",
filter=TRUE, batch="sva"))
plot_pca(isc_mm_batch_sva)$plot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
isc_mm_batch_combat <- sm(normalize_expt(isc_mm, transform="log2", convert="cpm",
filter=TRUE, batch="combat_scale"))
plot_pca(isc_mm_batch_combat)$plot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Ouch, segmentation fault!
##isc_mm_batch_combat <- normalize_expt(isc_mm, transform="log2", convert="cpm", filter=TRUE, batch="ruv")
##plot_pca(isc_mm_batch_ruv)$plot
varpart_test <- varpart(expt=isc_mm, predictor=NULL, factors=c("condition","batch"))
## Attempting mixed linear model with: ~ (1|condition) + (1|batch)
## Fitting the expressionset to the model, this is slow.
## Projected run time: ~ 30 min
## Placing factor: condition at the beginning of the model.
varpart_test$partition_plot
varpart_test$percent_plot
isc_mm_filt <- sm(normalize_expt(isc_mm, filter=TRUE))
surrogate_test <- compare_surrogate_estimates(isc_mm_filt)
## The be method chose 2 surrogate variable(s).
## Attempting pca surrogate estimation with 2 surrogates.
## The be method chose 2 surrogate variable(s).
## Attempting sva supervised surrogate estimation with 2 surrogates.
## The be method chose 2 surrogate variable(s).
## Attempting sva unsupervised surrogate estimation with 2 surrogates.
## The be method chose 2 surrogate variable(s).
## Attempting ruvseq supervised surrogate estimation with 2 surrogates.
## The be method chose 2 surrogate variable(s).
## Attempting ruvseq residual surrogate estimation with 2 surrogates.
## The be method chose 2 surrogate variable(s).
## Attempting ruvseq empirical surrogate estimation with 2 surrogates.
## 1/8: Performing lmFit(data) etc. with null in the model.
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
## 2/8: Performing lmFit(data) etc. with + batch_adjustments$batch in the model.
## Coefficients not estimable: batch_adjustments$batchab batch_adjustments$batchac batch_adjustments$batchb batch_adjustments$batchc batch_adjustments$batchj1
## Warning: Partial NA coefficients for 15624 probe(s)
## Coefficients not estimable: batch_adjustments$batchab batch_adjustments$batchac batch_adjustments$batchb batch_adjustments$batchc batch_adjustments$batchj1
## Warning: Partial NA coefficients for 15624 probe(s)
## Warning in .ebayes(fit = fit, proportion = proportion, stdev.coef.lim =
## stdev.coef.lim, : Estimation of var.prior failed - set to default value
## 3/8: Performing lmFit(data) etc. with + batch_adjustments$pca in the model.
## 4/8: Performing lmFit(data) etc. with + batch_adjustments$sva_sup in the model.
## 5/8: Performing lmFit(data) etc. with + batch_adjustments$sva_unsup in the model.
## 6/8: Performing lmFit(data) etc. with + batch_adjustments$ruv_sup in the model.
## 7/8: Performing lmFit(data) etc. with + batch_adjustments$ruv_resid in the model.
## 8/8: Performing lmFit(data) etc. with + batch_adjustments$ruv_emp in the model.
surrogate_test$sva_unsupervised_adjust$svs_sample
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
surrogate_test$sva_unsupervised_adjust$factor_svs
For the moment I will continue to assume a limited/no batch effect – but that might be wrong.
pander::pander(sessionInfo())
R version 3.4.4 (2018-03-15)
**Platform:** x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: ruv(v.0.9.7) and hpgltools(v.2018.03)
loaded via a namespace (and not attached): backports(v.1.1.2), corrplot(v.0.84), aroma.light(v.3.8.0), plyr(v.1.8.4), lazyeval(v.0.2.1), splines(v.3.4.4), BiocParallel(v.1.12.0), GenomeInfoDb(v.1.14.0), ggplot2(v.2.2.1), sva(v.3.26.0), digest(v.0.6.15), foreach(v.1.4.4), BiocInstaller(v.1.28.0), htmltools(v.0.3.6), gdata(v.2.18.0), magrittr(v.1.5), memoise(v.1.1.0), doParallel(v.1.0.11), sfsmisc(v.1.1-2), openxlsx(v.4.0.17), limma(v.3.34.9), readr(v.1.1.1), Biostrings(v.2.46.0), annotate(v.1.56.1), matrixStats(v.0.53.1), ffpe(v.1.22.0), R.utils(v.2.6.0), xts(v.0.10-2), siggenes(v.1.52.0), prettyunits(v.1.0.2), colorspace(v.1.3-2), blob(v.1.1.0), ggrepel(v.0.7.0), dplyr(v.0.7.4), RCurl(v.1.95-4.10), roxygen2(v.6.0.1), genefilter(v.1.60.0), lme4(v.1.1-15), bindr(v.0.1.1), GEOquery(v.2.46.15), glue(v.1.2.0), survival(v.2.41-3), zoo(v.1.8-1), iterators(v.1.0.9), registry(v.0.5), gtable(v.0.2.0), lumi(v.2.30.0), zlibbioc(v.1.24.0), XVector(v.0.18.0), DelayedArray(v.0.4.1), BiocGenerics(v.0.24.0), scales(v.0.5.0.9000), DESeq(v.1.30.0), rngtools(v.1.2.4), DBI(v.0.8), edgeR(v.3.20.9), Rcpp(v.0.12.16), xtable(v.1.8-2), progress(v.1.1.2), bumphunter(v.1.20.0), bit(v.1.1-12), mclust(v.5.4), preprocessCore(v.1.40.0), stats4(v.3.4.4), httr(v.1.3.1), gplots(v.3.0.1), RColorBrewer(v.1.1-2), pkgconfig(v.2.0.1), reshape(v.0.8.7), XML(v.3.98-1.10), R.methodsS3(v.1.7.1), locfit(v.1.5-9.1), labeling(v.0.3), rlang(v.0.2.0.9001), reshape2(v.1.4.3), AnnotationDbi(v.1.40.0), munsell(v.0.4.3), tools(v.3.4.4), RSQLite(v.2.0), devtools(v.1.13.5), evaluate(v.0.10.1), stringr(v.1.3.0), yaml(v.2.1.18), knitr(v.1.20), bit64(v.0.9-7), pander(v.0.6.1), beanplot(v.1.2), caTools(v.1.17.1), methylumi(v.2.24.1), purrr(v.0.2.4), bindrcpp(v.0.2), doRNG(v.1.6.6), EDASeq(v.2.12.0), nlme(v.3.1-131.1), nor1mix(v.1.2-3), R.oo(v.1.21.0), xml2(v.1.2.0), biomaRt(v.2.34.2), compiler(v.3.4.4), pbkrtest(v.0.4-7), curl(v.3.1), variancePartition(v.1.8.1), testthat(v.2.0.0), affyio(v.1.48.0), tibble(v.1.4.2), statmod(v.1.4.30), geneplotter(v.1.56.0), stringi(v.1.1.7), GenomicFeatures(v.1.30.3), minfi(v.1.24.0), lattice(v.0.20-35), Matrix(v.1.2-12), commonmark(v.1.4), nloptr(v.1.0.4), multtest(v.2.34.0), pillar(v.1.2.1), data.table(v.1.10.4-3), bitops(v.1.0-6), corpcor(v.1.6.9), rtracklayer(v.1.38.3), GenomicRanges(v.1.30.3), colorRamps(v.2.3), R6(v.2.2.2), latticeExtra(v.0.6-28), directlabels(v.2017.03.31), affy(v.1.56.0), hwriter(v.1.3.2), RMySQL(v.0.10.14), ShortRead(v.1.36.1), KernSmooth(v.2.23-15), gridExtra(v.2.3), nleqslv(v.3.3.1), IRanges(v.2.12.0), codetools(v.0.2-15), MASS(v.7.3-49), gtools(v.3.5.0), assertthat(v.0.2.0), SummarizedExperiment(v.1.8.1), pkgmaker(v.0.22), openssl(v.1.0.1), rprojroot(v.1.3-2), withr(v.2.1.2), RUVSeq(v.1.12.0), GenomicAlignments(v.1.14.1), Rsamtools(v.1.30.0), S4Vectors(v.0.16.0), GenomeInfoDbData(v.1.0.0), hms(v.0.4.2), mgcv(v.1.8-23), parallel(v.3.4.4), quadprog(v.1.5-5), grid(v.3.4.4), tidyr(v.0.8.0), base64(v.2.0), minqa(v.1.2.4), rmarkdown(v.1.9), illuminaio(v.0.20.0), Rtsne(v.0.13), TTR(v.0.23-3), Biobase(v.2.38.0) and base64enc(v.0.1-3)
message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 7de4503f6bb5724c28cce24af5dbee22bb1c0cae
## R> packrat::restore()
## This is hpgltools commit: Thu Apr 12 22:08:53 2018 -0400: 7de4503f6bb5724c28cce24af5dbee22bb1c0cae
message(paste0("Saving to ", savefile))
## Saving to sample_estimation_mmusculus_v20170614.rda.xz
tmp <- sm(saveme(filename=savefile))