In sample_estimation, I created sc_filt which is precisely what I want.
keepers <- list(
"mcwu_vs_wcwu" = c("mtc_wtu", "wtc_wtu"),
"wcmu_vs_wcwu" = c("wtc_mtu", "wtc_wtu"),
"mcmu_vs_wcwu" = c("mtc_mtu", "wtc_wtu"))
batch <- all_pairwise(input=sc_filt)
## Using limma's removeBatchEffect to test before/after batch correction.
## Finished running DE analyses, collecting outputs.
## Comparing analyses 1/6: mtc_wtu_vs_mtc_mtu
## Comparing analyses 2/6: wtc_mtu_vs_mtc_mtu
## Comparing analyses 3/6: wtc_wtu_vs_mtc_mtu
## Comparing analyses 4/6: wtc_mtu_vs_mtc_wtu
## Comparing analyses 5/6: wtc_wtu_vs_mtc_wtu
## Comparing analyses 6/6: wtc_wtu_vs_wtc_mtu
fsva <- all_pairwise(input=sc_filt, model_batch="sva")
## The be method chose 2 surrogate variable(s).
## Estimate type 'sva' is shorthand for 'sva_unsupervised'.
## Other sva options include: sva_supervised and svaseq.
## Attempting sva unsupervised surrogate estimation.
## Using sva to test before/after batch correction.
## Finished running DE analyses, collecting outputs.
## Comparing analyses 1/6: mtc_wtu_vs_mtc_mtu
## Comparing analyses 2/6: wtc_mtu_vs_mtc_mtu
## Comparing analyses 3/6: wtc_wtu_vs_mtc_mtu
## Comparing analyses 4/6: wtc_mtu_vs_mtc_wtu
## Comparing analyses 5/6: wtc_wtu_vs_mtc_wtu
## Comparing analyses 6/6: wtc_wtu_vs_wtc_mtu
batch_write <- combine_de_tables(all_pairwise_result=batch,
keepers=keepers,
excel=paste0("excel/batch_in_model_differential-v", ver, ".xlsx"))
## Deleting the file excel/batch_in_model_differential-v20170515.xlsx before writing the tables.
## Writing a legend of columns.
## Working on 1/3: mcwu_vs_wcwu
## Found inverse table with wtc_wtu_vs_mtc_wtu
## Working on 2/3: wcmu_vs_wcwu
## Found inverse table with wtc_wtu_vs_wtc_mtu
## Working on 3/3: mcmu_vs_wcwu
## Found inverse table with wtc_wtu_vs_mtc_mtu
## Adding venn plots for mcwu_vs_wcwu.
## Limma expression coefficients for mcwu_vs_wcwu; R^2: 0.902; equation: y = 0.908x + 0.659
## Edger expression coefficients for mcwu_vs_wcwu; R^2: 0.901; equation: y = 0.907x + 1.01
## DESeq2 expression coefficients for mcwu_vs_wcwu; R^2: 0.927; equation: y = 1.01x - 0.0323
## Adding venn plots for wcmu_vs_wcwu.
## Limma expression coefficients for wcmu_vs_wcwu; R^2: 0.961; equation: y = 0.936x + 0.464
## Edger expression coefficients for wcmu_vs_wcwu; R^2: 0.963; equation: y = 0.941x + 0.484
## DESeq2 expression coefficients for wcmu_vs_wcwu; R^2: 0.967; equation: y = 0.938x + 0.653
## Adding venn plots for mcmu_vs_wcwu.
## Limma expression coefficients for mcmu_vs_wcwu; R^2: 0.884; equation: y = 0.828x + 1.39
## Edger expression coefficients for mcmu_vs_wcwu; R^2: 0.89; equation: y = 0.825x + 1.61
## DESeq2 expression coefficients for mcmu_vs_wcwu; R^2: 0.907; equation: y = 0.909x + 0.996
## Writing summary information.
## The sheet: pairwise_summary is in legend, mcwu_vs_wcwu, wcmu_vs_wcwu, mcmu_vs_wcwu, pairwise_summary.
## Attempting to add the comparison plot to pairwise_summary at row: 21 and column: 1
## Performing save of the workbook.
batch_sig <- sm(extract_significant_genes(combined=batch_write,
excel=paste0("excel/batch_in_model_significant-v", ver, ".xlsx")))
onehalf_batch_sig <- sm(extract_significant_genes(combined=batch_write, fc=0.6,
excel=paste0("excel/differential_default-onehalf_significant-v", ver, ".xlsx")))
fsva_write <- combine_de_tables(all_pairwise_result=fsva,
keepers=keepers,
excel=paste0("excel/sva_in_model_differential-v", ver, ".xlsx"))
## Deleting the file excel/sva_in_model_differential-v20170515.xlsx before writing the tables.
## Writing a legend of columns.
## Working on 1/3: mcwu_vs_wcwu
## Found inverse table with wtc_wtu_vs_mtc_wtu
## Working on 2/3: wcmu_vs_wcwu
## Found inverse table with wtc_wtu_vs_wtc_mtu
## Working on 3/3: mcmu_vs_wcwu
## Found inverse table with wtc_wtu_vs_mtc_mtu
## Adding venn plots for mcwu_vs_wcwu.
## Limma expression coefficients for mcwu_vs_wcwu; R^2: 0.932; equation: y = 0.989x + 0.264
## Edger expression coefficients for mcwu_vs_wcwu; R^2: 0.933; equation: y = 0.992x + 0.154
## DESeq2 expression coefficients for mcwu_vs_wcwu; R^2: 0.964; equation: y = 0.989x + 0.0581
## Adding venn plots for wcmu_vs_wcwu.
## Limma expression coefficients for wcmu_vs_wcwu; R^2: 0.959; equation: y = 0.927x + 0.602
## Edger expression coefficients for wcmu_vs_wcwu; R^2: 0.959; equation: y = 0.928x + 0.579
## DESeq2 expression coefficients for wcmu_vs_wcwu; R^2: 0.975; equation: y = 0.95x + 0.52
## Adding venn plots for mcmu_vs_wcwu.
## Limma expression coefficients for mcmu_vs_wcwu; R^2: 0.863; equation: y = 0.872x + 1.13
## Edger expression coefficients for mcmu_vs_wcwu; R^2: 0.865; equation: y = 0.872x + 1.01
## DESeq2 expression coefficients for mcmu_vs_wcwu; R^2: 0.948; equation: y = 0.919x + 0.799
## Writing summary information.
## The sheet: pairwise_summary is in legend, mcwu_vs_wcwu, wcmu_vs_wcwu, mcmu_vs_wcwu, pairwise_summary.
## Attempting to add the comparison plot to pairwise_summary at row: 21 and column: 1
## Performing save of the workbook.
fsva_sig <- sm(extract_significant_genes(combined=fsva_write,
excel=paste0("excel/sva_in_model_significant-v", ver, ".xlsx")))
batch_fsva <- batch_write$data$mcmu_vs_wcwu[, c("limma_logfc", "limma_adjp")]
batch_fsva <- merge(batch_fsva, fsva_write$data$mcmu_vs_wcwu[, c("limma_logfc", "limma_adjp")], by="row.names")
batch_fsva[, 1] <- as.numeric(batch_fsva[, 2])
batch_fsva[, 3] <- as.numeric(batch_fsva[, 4])
test_batch_fsva <- plot_linear_scatter(df=batch_fsva[, c(2, 4)], loess=TRUE)
## Used Bon Ferroni corrected t test(s) between columns.
test_batch_fsva$scatter
Carol has a sheet of data from the Jacobson lab against which she wishes to cross reference this data.
jac <- read.csv("external_data/jacobson.csv")
batch_table <- batch_write$data$wcmu_vs_wcwu
merged <- merge(jac, batch_table, by.x="Gene", by.y="geneid")
dim(merged)
## [1] 1533 46
merged <- merged[, c("B.upf1", "limma_logfc", "deseq_logfc", "edger_logfc")]
merged[[1]] <- as.numeric(merged[[1]])
merged[[1]] <- log2(x=merged[[1]] + 0.5)
merged[[2]] <- as.numeric(merged[[2]])
merged[[3]] <- as.numeric(merged[[3]])
merged[[4]] <- as.numeric(merged[[4]])
limma_plot <- plot_linear_scatter(df=merged[, c(1,2)], loess=TRUE)
## Used Bon Ferroni corrected t test(s) between columns.
pdf(file="images/linear_scatter_limma_jacobson.pdf")
limma_plot$scatter
dev.off()
## png
## 2
limma_plot$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 27, df = 1500, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5307 0.5989
## sample estimates:
## cor
## 0.5658
deseq_plot <- plot_linear_scatter(df=merged[, c(1,3)], loess=TRUE)
## Used Bon Ferroni corrected t test(s) between columns.
deseq_plot$scatter
deseq_plot$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 25, df = 1500, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5023 0.5734
## sample estimates:
## cor
## 0.5389
edger_plot <- plot_linear_scatter(df=merged[, c(1,4)], loess=TRUE)
## Used Bon Ferroni corrected t test(s) between columns.
edger_plot$scatter
edger_plot$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 24, df = 1500, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4916 0.5638
## sample estimates:
## cor
## 0.5287
jac <- read.csv("external_data/jacobson.csv")
sva_table <- fsva_write$data$wcmu_vs_wcwu
merged <- merge(jac, sva_table, by.x="Gene", by.y="geneid")
dim(merged)
## [1] 1533 46
merged <- merged[, c("B.upf1", "limma_logfc", "deseq_logfc", "edger_logfc")]
merged[[1]] <- as.numeric(merged[[1]])
merged[[1]] <- log2(x=merged[[1]] + 0.5)
merged[[2]] <- as.numeric(merged[[2]])
merged[[3]] <- as.numeric(merged[[3]])
merged[[4]] <- as.numeric(merged[[4]])
limma_plot <- plot_linear_scatter(df=merged[, c(1,2)], loess=TRUE)
## Used Bon Ferroni corrected t test(s) between columns.
limma_plot$scatter
limma_plot$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 22, df = 1500, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4519 0.5280
## sample estimates:
## cor
## 0.4909
deseq_plot <- plot_linear_scatter(df=merged[, c(1,3)], loess=TRUE)
## Used Bon Ferroni corrected t test(s) between columns.
deseq_plot$scatter
deseq_plot$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 23, df = 1500, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4707 0.5450
## sample estimates:
## cor
## 0.5088
edger_plot <- plot_linear_scatter(df=merged[, c(1,4)], loess=TRUE)
## Used Bon Ferroni corrected t test(s) between columns.
edger_plot$scatter
edger_plot$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 25, df = 1500, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5008 0.5721
## sample estimates:
## cor
## 0.5374
To do this we need to load the differential expression data file into a separate environment.
v1_env <- new.env()
load(paste0("../scerevisiae_cbf5v1/savefiles/differential_expression-v", ver, ".rda.xz"),
envir=v1_env)
v1_de <- v1_env$batch_write$data$mtc_vs_wtc
##v1_de <- nobatch_write$data$wtc_wtu_vs_mtc_wtu
##rm(list=c("v1_env"))
v2_de <- batch_write$data$mcwu_vs_wcwu
v1v2_merged <- merge(v1_de, v2_de, by="row.names")
v1v2_merged <- v1v2_merged[, c("limma_logfc.x", "limma_logfc.y")]
v1v2_scatter <- plot_linear_scatter(df=v1v2_merged, loess=TRUE)
## Used Bon Ferroni corrected t test(s) between columns.
v1v2_scatter$scatter
v1v2_scatter$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 2.4, df = 5700, p-value = 0.02
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.00563 0.05761
## sample estimates:
## cor
## 0.03164
cor.test(v1v2_merged[, 1], v1v2_merged[, 2], method="spearman")
## Warning in cor.test.default(v1v2_merged[, 1], v1v2_merged[, 2], method = "spearman"):
## Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: v1v2_merged[, 1] and v1v2_merged[, 2]
## S = 3.1e+10, p-value = 0.4
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.01145
## Well at least the direction is correct...
library("pander")
pander(sessionInfo())
R version 3.3.3 (2017-03-06)
**Platform:** x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: pander(v.0.6.0), Vennerable(v.3.1.0.9000), ruv(v.0.9.6) and hpgltools(v.2017.01)
loaded via a namespace (and not attached): nlme(v.3.1-131), bitops(v.1.0-6), devtools(v.1.13.2), doParallel(v.1.0.10), RColorBrewer(v.1.1-2), rprojroot(v.1.2), GenomeInfoDb(v.1.10.3), tools(v.3.3.3), backports(v.1.0.5), R6(v.2.2.1), rpart(v.4.1-11), Hmisc(v.4.0-3), DBI(v.0.6-1), lazyeval(v.0.2.0), BiocGenerics(v.0.20.0), mgcv(v.1.8-17), colorspace(v.1.3-2), nnet(v.7.3-12), withr(v.1.0.2), gridExtra(v.2.2.1), DESeq2(v.1.14.1), compiler(v.3.3.3), preprocessCore(v.1.36.0), graph(v.1.52.0), Biobase(v.2.34.0), htmlTable(v.1.9), xml2(v.1.1.1), rtracklayer(v.1.34.2), labeling(v.0.3), scales(v.0.4.1), checkmate(v.1.8.2), DEoptimR(v.1.0-8), robustbase(v.0.92-7), genefilter(v.1.56.0), RBGL(v.1.50.0), commonmark(v.1.2), stringr(v.1.2.0), digest(v.0.6.12), Rsamtools(v.1.26.2), foreign(v.0.8-68), rmarkdown(v.1.5), XVector(v.0.14.1), base64enc(v.0.1-3), htmltools(v.0.3.6), limma(v.3.30.13), htmlwidgets(v.0.8), rlang(v.0.1.1), RSQLite(v.1.1-2), BiocParallel(v.1.8.2), gtools(v.3.5.0), acepack(v.1.4.1), RCurl(v.1.95-4.8), magrittr(v.1.5), Formula(v.1.2-1), Matrix(v.1.2-10), Rcpp(v.0.12.11), munsell(v.0.4.3), S4Vectors(v.0.12.2), stringi(v.1.1.5), yaml(v.2.1.14), edgeR(v.3.16.5), SummarizedExperiment(v.1.4.0), zlibbioc(v.1.20.0), plyr(v.1.8.4), grid(v.3.3.3), parallel(v.3.3.3), ggrepel(v.0.6.5), crayon(v.1.3.2), lattice(v.0.20-35), Biostrings(v.2.42.1), splines(v.3.3.3), GenomicFeatures(v.1.26.4), annotate(v.1.52.1), locfit(v.1.5-9.1), knitr(v.1.16), GenomicRanges(v.1.26.4), corpcor(v.1.6.9), reshape2(v.1.4.2), geneplotter(v.1.52.0), codetools(v.0.2-15), biomaRt(v.2.30.0), stats4(v.3.3.3), XML(v.3.98-1.7), evaluate(v.0.10), latticeExtra(v.0.6-28), data.table(v.1.10.4), foreach(v.1.4.3), testthat(v.1.0.2), gtable(v.0.2.0), ggplot2(v.2.2.1), openxlsx(v.4.0.17), xtable(v.1.8-2), roxygen2(v.6.0.1), survival(v.2.41-3), tibble(v.1.3.3), iterators(v.1.0.8), GenomicAlignments(v.1.10.1), AnnotationDbi(v.1.36.2), memoise(v.1.1.0), IRanges(v.2.8.2), cluster(v.2.0.6) and sva(v.3.22.0)