It appears that it is possible though somewhat difficult to apply batch estimations generated by sva to the model given to DESeq/EdgeR/limma. In the case of limma it is fairly simple, but in the other two it is a bit more difficult. There is a nice discussion of this at: https://www.biostars.org/p/156186/ I am attempting to apply that logic to this data with limited success.
hs_contrasts <- list(
"macro_chr-sh" = c("chr","sh"),
"macro_chr-nil" = c("chr","uninf"),
"macro_sh-nil" = c("sh", "uninf"))
## Set up the data used in the comparative contrast sets.
Print a reminder of what we can expect when doing this with no batch information.
hs_macr_lowfilt <- sm(normalize_expt(hs_cds_macr, filter=TRUE))
hs_lowfilt_pca <- sm(plot_pca(hs_cds_macr, transform="log2"))
hs_lowfilt_pca$plot
hs_macr_nobatch <- sm(all_pairwise(input=hs_cds_macr, model_batch=FALSE, parallel=FALSE,
limma_method="robust"))
## wow, all tools including basic agree almost completely
medians_by_condition <- hs_macr_nobatch$basic$medians
excel_file <- glue::glue("excel/{rundate}_hs_macr_nobatch_contr-v{ver}.xlsx")
hs_macr_nobatch_tables <- sm(combine_de_tables(hs_macr_nobatch,
excel=excel_file,
keepers=hs_contrasts,
extra_annot=medians_by_condition))
hs_lowfilt_batch_pca <- sm(plot_pca(hs_cds_macr, transform="log2", batch="limma"))
hs_lowfilt_batch_pca$plot
In this attempt, we add a batch factor in the experimental model and see how it does.
## Here just let all_pairwise run on filtered data and do its normal ~ 0 + condition + batch analyses
hs_macr_batch <- sm(all_pairwise(input=hs_cds_macr, limma_method="robust", parallel=FALSE))
medians_by_condition <- hs_macr_batch$basic$medians
excel_file <- glue::glue("excel/{rundate}_hs_macr_batchmodel_contr-v{ver}.xlsx")
hs_macr_batch_tables <- sm(combine_de_tables(
hs_macr_batch,
keepers=hs_contrasts,
extra_annot=medians_by_condition,
include_limma=FALSE, include_edger=FALSE, include_basic=FALSE, include_ebseq=FALSE,
excel=excel_file))
excel_file <- glue::glue("excel/{rundate}_hs_macr_batchmodel_sig-v{ver}.xlsx")
hs_macr_batch_sig <- sm(extract_significant_genes(
hs_macr_batch_tables, excel=excel_file,
according_to="deseq"))
excel_file <- glue::glue("excel/{rundate}_hs_macr_batchmodel_abund-v{ver}.xlsx")
hs_macr_batch_abun <- sm(extract_abundant_genes(
hs_macr_batch_tables, excel=excel_file,
according_to="deseq"))
s2_contrasts <- list(
"macro_chr-sh" = c("chr","sh"))
excel_file <- glue::glue("excel/{rundate}_table-s2_hs_macr_batchmodel_contr-v{ver}.xlsx")
table_s2 <- sm(combine_de_tables(
hs_macr_batch,
excel=excel_file,
keepers=s2_contrasts,
include_basic=FALSE, include_limma=FALSE,
include_ebseq=FALSE, include_edger=FALSE))
excel_file <- glue::glue("excel/{rundate}_table-s3_hs_macr_batchmodel_sig-v{ver}.xlsx")
table_s3 <- sm(extract_significant_genes(
table_s2,
excel=excel_file,
according_to="deseq"))
chosen_table <- table_s2[["data"]][[1]]
head(chosen_table)
## hgncsymbol ensembltranscriptid ensemblgeneid
## ENSG00000000003 TSPAN6 ENST00000373020 ENSG00000000003
## ENSG00000000005 TNMD ENST00000373031 ENSG00000000005
## ENSG00000000419 DPM1 ENST00000371582 ENSG00000000419
## ENSG00000000457 SCYL3 ENST00000367770 ENSG00000000457
## ENSG00000000460 C1orf112 ENST00000286031 ENSG00000000460
## ENSG00000000938 FGR ENST00000374003 ENSG00000000938
## description
## ENSG00000000003 tetraspanin 6 [Source:HGNC Symbol;Acc:HGNC:11858]
## ENSG00000000005 tenomodulin [Source:HGNC Symbol;Acc:HGNC:17757]
## ENSG00000000419 dolichyl-phosphate mannosyltransferase subunit 1, catalytic [Source:HGNC Symbol;Acc:HGNC:3005]
## ENSG00000000457 SCY1 like pseudokinase 3 [Source:HGNC Symbol;Acc:HGNC:19285]
## ENSG00000000460 chromosome 1 open reading frame 112 [Source:HGNC Symbol;Acc:HGNC:25565]
## ENSG00000000938 FGR proto-oncogene, Src family tyrosine kinase [Source:HGNC Symbol;Acc:HGNC:3697]
## genebiotype found adc adipocytes astrocytes bcells
## ENSG00000000003 protein_coding FALSE FALSE FALSE FALSE FALSE
## ENSG00000000005 protein_coding FALSE FALSE FALSE FALSE FALSE
## ENSG00000000419 protein_coding 0 FALSE FALSE FALSE FALSE
## ENSG00000000457 protein_coding FALSE FALSE FALSE FALSE FALSE
## ENSG00000000460 protein_coding 0 FALSE FALSE FALSE FALSE
## ENSG00000000938 protein_coding 0 FALSE FALSE FALSE FALSE
## basophils cd4.memory.tcells cd4.naive.tcells cd4.tcells
## ENSG00000000003 FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE TRUE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE FALSE FALSE FALSE
## ENSG00000000938 TRUE FALSE FALSE FALSE
## cd4.tcm cd4.tem cd8.naive.tcells cd8.tcells cd8.tcm
## ENSG00000000003 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000938 FALSE FALSE FALSE FALSE FALSE
## cd8.tem cdc chondrocytes classswitched.memory.bcells
## ENSG00000000003 FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE FALSE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE FALSE FALSE FALSE
## ENSG00000000938 FALSE FALSE FALSE FALSE
## clp cmp dc endothelial.cells eosinophils
## ENSG00000000003 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000938 FALSE FALSE FALSE FALSE FALSE
## epithelial.cells erythrocytes fibroblasts gmp
## ENSG00000000003 FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE FALSE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE TRUE FALSE FALSE
## ENSG00000000938 FALSE FALSE FALSE FALSE
## hepatocytes hsc idc keratinocytes ly.endothelial.cells
## ENSG00000000003 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000938 FALSE FALSE FALSE FALSE FALSE
## macrophages macrophages.m1 macrophages.m2 mast.cells
## ENSG00000000003 FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE FALSE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE FALSE FALSE FALSE
## ENSG00000000938 TRUE FALSE TRUE FALSE
## megakaryocytes melanocytes memory.bcells mep
## ENSG00000000003 FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE FALSE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE FALSE FALSE FALSE
## ENSG00000000938 FALSE FALSE FALSE FALSE
## mesangial.cells monocytes mpp msc mv.endothelial.cells
## ENSG00000000003 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000938 FALSE TRUE FALSE FALSE FALSE
## myocytes naive.bcells neurons neutrophils nk.cells nkt
## ENSG00000000003 FALSE FALSE FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE FALSE FALSE FALSE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE FALSE FALSE FALSE FALSE FALSE
## ENSG00000000938 FALSE FALSE FALSE FALSE FALSE FALSE
## osteoblast pdc pericytes plasma.cells platelets
## ENSG00000000003 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000938 FALSE FALSE FALSE FALSE FALSE
## preadipocytes pro.bcells sebocytes skeletal.muscle
## ENSG00000000003 FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE FALSE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE FALSE FALSE FALSE
## ENSG00000000938 FALSE FALSE FALSE FALSE
## smooth.muscle tgd.cells th1.cells th2.cells tregs
## ENSG00000000003 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000005 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000419 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000457 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000460 FALSE FALSE FALSE FALSE FALSE
## ENSG00000000938 FALSE FALSE FALSE FALSE FALSE
## xcelltypes deseq_logfc deseq_adjp deseq_basemean
## ENSG00000000003 FALSE -1.4690 1.00000 1.347
## ENSG00000000005 FALSE 0.0000 1.00000 0.000
## ENSG00000000419 FALSE 0.0913 0.73270 368.500
## ENSG00000000457 FALSE -0.1052 0.79820 188.800
## ENSG00000000460 FALSE -0.3300 0.29490 156.600
## ENSG00000000938 FALSE 0.8099 0.01497 4846.000
## deseq_lfcse deseq_stat deseq_p deseq_adjp_fdr
## ENSG00000000003 1.8780 -0.7826 0.4339 1.000e+00
## ENSG00000000005 0.0000 0.0000 1.0000 1.000e+00
## ENSG00000000419 0.1413 0.6460 0.5183 1.000e+00
## ENSG00000000457 0.2085 -0.5045 0.6139 1.000e+00
## ENSG00000000460 0.2085 -1.5830 0.1135 1.000e+00
## ENSG00000000938 0.2535 3.1950 0.0014 5.743e-02
vol <- plot_volcano_de(table=chosen_table,
color_by="state",
fc_col="deseq_logfc",
p_col="deseq_adjp",
shapes_by_state=FALSE,
line_position="top")
## The color list must have 4, setting it to the default.
## Going to write the image to: images/Figure_1c.pdf when dev.off() is called.
## png
## 2
hs_lowfilt_svaseq_pca <- sm(plot_pca(hs_cds_macr, transform="log2", batch="svaseq", filter=TRUE))
hs_lowfilt_svaseq_pca$plot
hs_cds_macr_lowfilt <- sm(normalize_expt(hs_cds_macr, filter=TRUE))
## Here just let all_pairwise run on filtered data and do its normal ~ 0 + condition + batch analyses
hs_macr_sva <- sm(all_pairwise(
input=hs_cds_macr_lowfilt,
model_batch="svaseq",
limma_method="robust"))
medians_by_condition <- hs_macr_sva$basic$medians
excel_file <- glue::glue("excel/{rundate}_hs_macr_sva_contr-v{ver}.xlsx")
hs_macr_sva_tables <- sm(combine_de_tables(
hs_macr_sva,
excel=excel_file,
keepers=hs_contrasts,
extra_annot=medians_by_condition))
excel_file <- glue::glue("excel/{rundate}_hs_macr_sva_sig-v{ver}.xlsx")
hs_macr_sva_sig <- sm(extract_significant_genes(
hs_macr_sva_tables,
excel=excel_file))
hs_macr_sva_ma_limma <- extract_de_plots(
pairwise=hs_macr_sva,
type="limma",
table="sh_vs_chr")
## hmm I got the RUVr error again, but when I ran it manually did not.
## Even more strangely, if I just run the same thing again, no error...
testme <- try(all_adjusters(input=hs_macr_lowfilt, estimate_type="ruv_residuals"), silent=TRUE)
## batch_counts: Before batch/surrogate estimation, 92 entries are x<=0.
## The be method chose 1 surrogate variable(s).
## Attempting ruvseq residual surrogate estimation with 1 surrogates.
hs_lowfilt_ruvresid_pca <- sm(plot_pca(hs_macr_lowfilt, transform="log2", batch="ruv_residuals"))
hs_lowfilt_ruvresid_pca$plot
## Here just let all_pairwise run on filtered data and do its normal ~ 0 + condition + batch analyses
## Bizarrely, sometimes if one runs this, it gives an error "Error in (function (classes, fdef, mtable) : unable to find an inherited method for function 'RUVr' for signature '"matrix", "logical", "numeric", "NULL"'" -- however, if one then simply runs it again it works fine.
## I am going to assume that it is because I do not explicitly invoke the library.
## library(ruv) ## hopefully a small code change made this not needed.
testme <- all_adjusters(input=hs_macr_lowfilt, estimate_type="ruv_residuals")
## batch_counts: Before batch/surrogate estimation, 92 entries are x<=0.
## The be method chose 1 surrogate variable(s).
## Attempting ruvseq residual surrogate estimation with 1 surrogates.
hs_macr_ruvres <- sm(all_pairwise(
input=hs_macr_lowfilt,
model_batch="ruv_residuals",
limma_method="robust"))
hs_lowfilt_pca_pca <- sm(plot_pca(hs_macr_lowfilt, transform="log2", batch="pca"))
hs_lowfilt_pca_pca$plot
## Here just let all_pairwise run on filtered data and do its normal ~ 0 + condition + batch analyses
hs_macr_pca <- sm(all_pairwise(
input=hs_macr_lowfilt,
model_batch="pca",
limma_method="robust"))
hs_lowfilt_ruvemp_pca <- sm(plot_pca(hs_macr_lowfilt, transform="log2", batch="ruv_empirical"))
hs_lowfilt_ruvemp_pca$plot
hs_macr_ruvemp <- sm(all_pairwise(
input=hs_macr_lowfilt,
model_batch="ruv_empirical",
limma_method="robust"))
Then repeat with the batch-corrected data and see the differences.
hs_lowfilt_combat_pca <- sm(plot_pca(hs_macr_lowfilt, transform="log2", batch="combat_noprior"))
hs_lowfilt_combat_pca$plot
hs_macr_combat_norm <- sm(normalize_expt(hs_macr_lowfilt, batch="combat_noscale"))
hs_macr_combat <- sm(all_pairwise(
input=hs_macr_combat_norm,
force=TRUE,
limma_method="robust"))
medians_by_condition <- hs_macr_combat$basic$medians
excel_file <- glue::glue("excel/{rundate}_hs_macr_combat_contr-v{ver}.xlsx")
hs_macr_combat_tables <- sm(combine_de_tables(
hs_macr_combat,
excel=excel_file,
keepers=hs_contrasts,
extra_annot=medians_by_condition))
excel_file <- glue::glue("excel/{rundate}_hs_macr_combat_contr-v{ver}.xlsx")
hs_macr_combat_sig <- sm(extract_significant_genes(
hs_macr_combat_tables,
excel=excel_file))
hs_macr_combat_ma_limma <- extract_de_plots(
pairwise=hs_macr_combat,
type="limma",
table="sh_vs_chr")
hs_macr_combat_ma_limma$ma$plot
hs_macr_combat_ma_edger <- extract_de_plots(
pairwise=hs_macr_combat,
type="edger",
table="sh_vs_chr")
hs_macr_combat_ma_edger$ma$plot
hs_macr_combat_ma_deseq <- extract_de_plots(
pairwise=hs_macr_combat,
type="deseq",
table="sh_vs_chr")
hs_macr_combat_ma_deseq$ma$plot
I have 4 methods of performing this differential expression analysis. Each one comes with a set of metrics defining ‘significant’. Perhaps I can make a table of the # of genes defined as significant by contrast for each. In addition it may be worth while to do a scatter plots of the logFCs between these comparisons and see how well they agree?
## change_counts_up change_counts_down
## macro_chr-sh 2036 560
## macro_chr-nil 949 1176
## macro_sh-nil 458 1791
##hs_macr_batch_tables$significant$limma$counts
##hs_macr_sva_tables$significant$limma$counts
##hs_macr_ruvres_tables$significant$limma$counts
##hs_macr_pca_tables$significant$limma$counts
##hs_macr_ruvemp_tables$significant$limma$counts
##hs_macr_combat_tables$significant$limma$counts
hs_macr_nobatch_basic <- merge(
hs_macr_nobatch$deseq$all_tables$sh_vs_chr,
hs_macr_batch$basic$all_tables$sh_vs_chr,
by="row.names")
rownames(hs_macr_nobatch_basic) <- hs_macr_nobatch_basic[["Row.names"]]
hs_macr_nobatch_logfc <- hs_macr_nobatch_basic[, c("logFC.x", "logFC.y")]
colnames(hs_macr_nobatch_logfc) <- c("nobatch", "basic")
lfc_nb_b <- sm(plot_linear_scatter(hs_macr_nobatch_logfc, pretty_colors=FALSE))
lfc_nb_b$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 370, df = 13000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9545 0.9574
## sample estimates:
## cor
## 0.956
hs_macr_nobatch_p <- hs_macr_nobatch_basic[, c("P.Value","p")]
hs_macr_nobatch_p[[2]] <- as.numeric(hs_macr_nobatch_p[[2]])
colnames(hs_macr_nobatch_p) <- c("nobatch","basic")
hs_macr_nobatch_p <- -1 * log(hs_macr_nobatch_p)
hs_macr_p_nb_b <- sm(plot_linear_scatter(hs_macr_nobatch_p, pretty_colors=FALSE))
hs_macr_p_nb_b$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 81, df = 13000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5683 0.5911
## sample estimates:
## cor
## 0.5798
hs_macr_sva_batch <- merge(
hs_macr_sva$deseq$all_tables$sh_vs_chr,
hs_macr_batch$deseq$all_tables$sh_vs_chr,
by="row.names")
rownames(hs_macr_sva_batch) <- hs_macr_sva_batch[["Row.names"]]
hs_macr_sva_logfc <- hs_macr_sva_batch[, c("logFC.x","logFC.y")]
colnames(hs_macr_sva_logfc) <- c("sva","batch")
hs_macr_lfc_b_s <- sm(plot_linear_scatter(hs_macr_sva_logfc, pretty_colors=FALSE))
hs_macr_lfc_b_s$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 71, df = 13000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5176 0.5423
## sample estimates:
## cor
## 0.5301
Try putting some information of the p-values with the comparative log2fc
lfc_b_s <- hs_macr_sva_batch[, c("logFC.x", "logFC.y", "P.Value.x", "P.Value.y")]
colnames(lfc_b_s) <- c("l2fcsva", "l2fcbatch", "psva", "pbatch")
hs_macr_lfc_b_s$scatter
cutoff <- 0.1
lfc_b_s$state <- ifelse(lfc_b_s$psva > cutoff & lfc_b_s$pbatch > cutoff, "bothinsig",
ifelse(lfc_b_s$psva <= cutoff & lfc_b_s$pbatch <= cutoff, "bothsig",
ifelse(lfc_b_s$psva <= cutoff, "svasig", "batchsig")))
##lfcp_b_s$lfcstate <- ifelse(lfcp_b_s$l2fcsva >= 0.75 & lfcp_b_s$l2fcbatch, "", "")
num_bothinsig <- sum(lfc_b_s$state == "bothinsig")
num_bothsig <- sum(lfc_b_s$state == "bothsig")
num_svasig <- sum(lfc_b_s$state == "svasig")
num_batchsig <- sum(lfc_b_s$state == "batchsig")
library(ggplot2)
aes_color = "(l2fcsva >= 0.75 | l2fcsva <= -0.75 | l2fcbatch >= 0.75 | l2fcbatch <= -0.75)"
plt <- ggplot2::ggplot(lfc_b_s, aes_string(x="l2fcsva", y="l2fcbatch")) +
## ggplot2::geom_point(stat="identity", size=2, alpha=0.2, aes_string(shape="as.factor(aes_color)", colour="as.factor(state)", fill="as.factor(state)")) +
ggplot2::geom_abline(colour="blue", slope=1, intercept=0, size=0.5) +
ggplot2::geom_hline(yintercept=c(-0.75, 0.75), color="red", size=0.5) +
ggplot2::geom_vline(xintercept=c(-0.75, 0.75), color="red", size=0.5) +
ggplot2::geom_point(stat="identity", size=2, alpha=0.2, aes_string(colour="as.factor(state)", fill="as.factor(state)")) +
ggplot2::scale_color_manual(name="state", values=c("bothinsig"="grey", "bothsig"="forestgreen", "svasig"="darkred", "batchsig"="darkblue")) +
ggplot2::scale_fill_manual(name="state", values=c("bothinsig"="grey", "bothsig"="forestgreen", "svasig"="darkred", "batchsig"="darkblue"),
labels=c(
paste0("Both InSig.: ", num_bothinsig),
paste0("Both Sig.: ", num_bothsig),
paste0("Sva Sig.: ", num_svasig),
paste0("Batch Sig.: ", num_batchsig)),
guide=ggplot2::guide_legend(override.aes=aes(size=3, fill="grey"))) +
ggplot2::guides(fill=ggplot2::guide_legend(override.aes=list(size=3))) +
ggplot2::theme_bw()
plt
hs_macr_batch_ruvresid_deseq <- merge(
hs_macr_ruvres$deseq$all_tables$sh_vs_chr,
hs_macr_batch$basic$all_tables$sh_vs_chr,
by="row.names")
rownames(hs_macr_batch_ruvresid_deseq) <- hs_macr_batch_ruvresid_deseq[["Row.names"]]
hs_macr_batch_ruvresid_logfc <- hs_macr_batch_ruvresid_deseq[, c("logFC.x","logFC.y")]
colnames(hs_macr_batch_ruvresid_logfc) <- c("nobatch","basic")
lfc_ruv_bat <- plot_linear_scatter(hs_macr_batch_ruvresid_logfc, pretty_colors=FALSE)
## Warning in plot_multihistogram(df): NAs introduced by coercion
## Used Bon Ferroni corrected t test(s) between columns.
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 400, df = 13000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9601 0.9627
## sample estimates:
## cor
## 0.9614
hs_macr_nobatch_batch <- merge(
hs_macr_nobatch$limma$all_tables$sh_vs_chr,
hs_macr_batch$limma$all_tables$sh_vs_chr,
by="row.names")
rownames(hs_macr_nobatch_batch) <- hs_macr_nobatch_batch[["Row.names"]]
hs_macr_nobatch_batch <- hs_macr_nobatch_batch[, c("logFC.x","logFC.y")]
colnames(hs_macr_nobatch_batch) <- c("nobatch","batch")
nb_b <- plot_linear_scatter(hs_macr_nobatch_batch, pretty_colors=FALSE)
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): find_scale() did not converge
## in 'maxit.scale' (= 200) iterations with tol=1e-10, last rel.diff=0
## Warning in lmrob.S(x, y, control = control): S refinements did not converge
## (to refine.tol=1e-07) in 200 (= k.max) steps
## Warning in lmrob.S(x, y, control = control): S refinements did not converge
## (to refine.tol=1e-07) in 200 (= k.max) steps
## Warning in plot_multihistogram(df): NAs introduced by coercion
## Used Bon Ferroni corrected t test(s) between columns.
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 110, df = 51000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4196 0.4338
## sample estimates:
## cor
## 0.4267
hs_macr_batch_sva <- merge(
hs_macr_batch$limma$all_tables$sh_vs_chr,
hs_macr_sva$limma$all_tables$sh_vs_chr,
by="row.names")
rownames(hs_macr_batch_sva) <- hs_macr_batch_sva[["Row.names"]]
hs_macr_batch_sva <- hs_macr_batch_sva[, c("logFC.x","logFC.y")]
colnames(hs_macr_batch_sva) <- c("batch","sva")
b_s <- plot_linear_scatter(hs_macr_batch_sva, pretty_colors=FALSE)
## Warning in plot_multihistogram(df): NAs introduced by coercion
## Used Bon Ferroni corrected t test(s) between columns.
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 70, df = 13000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5092 0.5342
## sample estimates:
## cor
## 0.5218
hs_macr_nobatch_batch <- merge(
hs_macr_nobatch$edger$all_tables$sh_vs_chr,
hs_macr_batch$edger$all_tables$sh_vs_chr,
by="row.names")
rownames(hs_macr_nobatch_batch) <- hs_macr_nobatch_batch[["Row.names"]]
hs_macr_nobatch_batch <- hs_macr_nobatch_batch[, c("logFC.x","logFC.y")]
colnames(hs_macr_nobatch_batch) <- c("nobatch","batch")
nb_b <- sm(plot_linear_scatter(hs_macr_nobatch_batch, pretty_colors=FALSE))
nb_b$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 260, df = 51000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7567 0.7640
## sample estimates:
## cor
## 0.7604
hs_macr_batch_sva <- merge(
hs_macr_batch$edger$all_tables$sh_vs_chr,
hs_macr_sva$edger$all_tables$sh_vs_chr,
by="row.names")
rownames(hs_macr_batch_sva) <- hs_macr_batch_sva[["Row.names"]]
hs_macr_batch_sva <- hs_macr_batch_sva[, c("logFC.x","logFC.y")]
colnames(hs_macr_batch_sva) <- c("batch","sva")
b_s <- plot_linear_scatter(hs_macr_batch_sva, pretty_colors=FALSE)
## Warning in plot_multihistogram(df): NAs introduced by coercion
## Used Bon Ferroni corrected t test(s) between columns.
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 71, df = 13000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5183 0.5429
## sample estimates:
## cor
## 0.5307
hs_macr_nobatch_batch <- merge(
hs_macr_nobatch$deseq$all_tables$sh_vs_chr,
hs_macr_batch$deseq$all_tables$sh_vs_chr,
by="row.names")
rownames(hs_macr_nobatch_batch) <- hs_macr_nobatch_batch[["Row.names"]]
hs_macr_nobatch_batch <- hs_macr_nobatch_batch[, c("logFC.x","logFC.y")]
colnames(hs_macr_nobatch_batch) <- c("nobatch","batch")
nb_b <- sm(plot_linear_scatter(hs_macr_nobatch_batch, pretty_colors=FALSE))
nb_b$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 150, df = 51000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5443 0.5564
## sample estimates:
## cor
## 0.5503
hs_macr_batch_sva <- merge(
hs_macr_batch$deseq$all_tables$sh_vs_chr,
hs_macr_sva$deseq$all_tables$sh_vs_chr,
by="row.names")
rownames(hs_macr_batch_sva) <- hs_macr_batch_sva[["Row.names"]]
hs_macr_batch_sva <- hs_macr_batch_sva[, c("logFC.x", "logFC.y")]
colnames(hs_macr_batch_sva) <- c("batch", "sva")
b_s <- sm(plot_linear_scatter(hs_macr_batch_sva, pretty_colors=FALSE))
b_s$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 71, df = 13000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5176 0.5423
## sample estimates:
## cor
## 0.5301
In ‘macrophage_estimation’, we did a series of analyses to try to pick out some of the surrogate variables in the data. Now we will perform a set of differential expression analyses using the results from that. Since the ‘batch’ element of the data is reasonably well explained, we will not abuse the data with sva/combat, but instead include batch in the experimental model.
It appears that it is possible though somewhat difficult to apply batch estimations generated by sva to the model given to DESeq/EdgeR/limma. In the case of limma it is fairly simple, but in the other two it is a bit more difficult. There is a nice discussion of this at: https://www.biostars.org/p/156186/ I am attempting to apply that logic to this data with limited success.
lp_contrasts <- list(
"macro_chr-sh" = c("chr", "sh"))
lp_macr_norm <- sm(normalize_expt(lp_macr, filter=TRUE, convert="cpm", norm="quant"))
lp_macr_combat_norm <- sm(normalize_expt(lp_macr, filter=TRUE, norm="quant",
low_to_zero=TRUE, batch="combat"))
lp_macr_lowfilt <- sm(normalize_expt(lp_macr, filter=TRUE))
## Set up the data used in the 3 comparative contrast sets.
lp_macr_nobatch <- sm(all_pairwise(lp_macr_lowfilt, limma_method="robust", model_batch=FALSE))
## wow, all tools including basic agree almost completely
medians_by_condition <- lp_macr_nobatch$basic$medians
lp_macr_nobatch_tables <- sm(combine_de_tables(
lp_macr_nobatch,
excel=paste0("excel/lp_macr_nobatch-v", ver, ".xlsx"),
keepers=lp_contrasts,
extra_annot=medians_by_condition))
In this attempt, we add a batch factor in the experimental model and see how it does.
## Here just let all_pairwise run on filtered data and do its normal ~ 0 + condition + batch analyses
lp_macr_batch <- sm(all_pairwise(lp_macr_lowfilt, limma_method="robust"))
medians_by_condition <- lp_macr_batch$basic$medians
lp_macr_batch_tables <- sm(combine_de_tables(
lp_macr_batch,
excel=paste0("excel/lp_macr_batchmodel-v", ver, ".xlsx"),
keepers=lp_contrasts,
extra_annot=medians_by_condition))
## Here just let all_pairwise run on filtered data and do its normal ~ 0 + condition + batch analyses
lp_macr_sva <- sm(all_pairwise(lp_macr_lowfilt, limma_method="robust", model_batch="sva"))
medians_by_condition <- lp_macr_sva$basic$medians
lp_macr_sva_tables <- sm(combine_de_tables(
lp_macr_sva,
excel=paste0("excel/lp_macr_sva-v", ver, ".xlsx"),
keepers=lp_contrasts,
extra_annot=medians_by_condition))
I have 4 methods of performing this differential expression analysis. Each one comes with a set of metrics defining ‘significant’. Perhaps I can make a table of the # of genes defined as significant by contrast for each. In addition it may be worth while to do a scatter plots of the logFCs between these comparisons and see how well they agree?
lp_macr_nobatch_basic <- merge(
lp_macr_nobatch$deseq$all_tables$sh_vs_chr,
lp_macr_batch$basic$all_tables$sh_vs_chr,
by="row.names")
rownames(lp_macr_nobatch_basic) <- lp_macr_nobatch_basic[["Row.names"]]
lp_macr_nobatch_logfc <- lp_macr_nobatch_basic[, c("logFC.x", "logFC.y")]
colnames(lp_macr_nobatch_logfc) <- c("nobatch","basic")
lfc_nb_b <- sm(plot_linear_scatter(lp_macr_nobatch_logfc, pretty_colors=FALSE))
lfc_nb_b$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 130, df = 8700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7992 0.8139
## sample estimates:
## cor
## 0.8067
lp_macr_nobatch_p <- lp_macr_nobatch_basic[, c("P.Value","p")]
lp_macr_nobatch_p[[2]] <- as.numeric(lp_macr_nobatch_p[[2]])
colnames(lp_macr_nobatch_p) <- c("nobatch","basic")
lp_macr_nobatch_p <- -1 * log(lp_macr_nobatch_p)
p_nb_b <- sm(plot_linear_scatter(lp_macr_nobatch_p, pretty_colors=FALSE))
p_nb_b$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 100, df = 8700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7386 0.7572
## sample estimates:
## cor
## 0.748
lp_macr_sva_batch <- merge(
lp_macr_sva$deseq$all_tables$sh_vs_chr,
lp_macr_batch$deseq$all_tables$sh_vs_chr,
by="row.names")
rownames(lp_macr_sva_batch) <- lp_macr_sva_batch[["Row.names"]]
lp_macr_sva_logfc <- lp_macr_sva_batch[, c("logFC.x","logFC.y")]
colnames(lp_macr_sva_logfc) <- c("sva","batch")
lfc_b_s <- sm(plot_linear_scatter(lp_macr_sva_logfc, pretty_colors=FALSE))
lfc_b_s$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 80, df = 8700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6378 0.6622
## sample estimates:
## cor
## 0.6502
lp_macr_sva_p <- lp_macr_sva_batch[, c("P.Value.x","P.Value.y")]
lp_macr_sva_p[[2]] <- as.numeric(lp_macr_sva_p[[2]])
colnames(lp_macr_sva_p) <- c("sva","batch")
lp_macr_sva_p <- -1 * log(lp_macr_sva_p)
p_b_s <- sm(plot_linear_scatter(lp_macr_sva_p, pretty_colors=FALSE))
p_b_s$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 43, df = 8700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.4051 0.4397
## sample estimates:
## cor
## 0.4225
Try putting some information of the p-values with the comparative log2fc
lfcp_b_s <- lp_macr_sva_batch[, c("logFC.x", "logFC.y", "P.Value.x", "P.Value.y")]
colnames(lfcp_b_s) <- c("l2fcsva", "l2fcbatch", "psva", "pbatch")
lfc_b_s$scatter
cutoff <- 0.1
lfcp_b_s$state <- ifelse(lfcp_b_s$psva > cutoff & lfcp_b_s$pbatch > cutoff, "bothinsig",
ifelse(lfcp_b_s$psva <= cutoff & lfcp_b_s$pbatch <= cutoff, "bothsig",
ifelse(lfcp_b_s$psva <= cutoff, "svasig", "batchsig")))
##lfcp_b_s$lfcstate <- ifelse(lfcp_b_s$l2fcsva >= 0.75 & lfcp_b_s$l2fcbatch, "", "")
num_bothinsig <- sum(lfcp_b_s$state == "bothinsig")
num_bothsig <- sum(lfcp_b_s$state == "bothsig")
num_svasig <- sum(lfcp_b_s$state == "svasig")
num_batchsig <- sum(lfcp_b_s$state == "batchsig")
library(ggplot2)
aes_color = "(l2fcsva >= 0.75 | l2fcsva <= -0.75 | l2fcbatch >= 0.75 | l2fcbatch <= -0.75)"
plt <- ggplot2::ggplot(lfcp_b_s, aes_string(x="l2fcsva", y="l2fcbatch")) +
## ggplot2::geom_point(stat="identity", size=2, alpha=0.2, aes_string(shape="as.factor(aes_color)", colour="as.factor(state)", fill="as.factor(state)")) +
ggplot2::geom_abline(colour="blue", slope=1, intercept=0, size=0.5) +
ggplot2::geom_hline(yintercept=c(-0.75, 0.75), color="red", size=0.5) +
ggplot2::geom_vline(xintercept=c(-0.75, 0.75), color="red", size=0.5) +
ggplot2::geom_point(stat="identity", size=2, alpha=0.2, aes_string(colour="as.factor(state)", fill="as.factor(state)")) +
ggplot2::scale_color_manual(name="state", values=c("bothinsig"="grey", "bothsig"="forestgreen", "svasig"="darkred", "batchsig"="darkblue")) +
ggplot2::scale_fill_manual(name="state", values=c("bothinsig"="grey", "bothsig"="forestgreen", "svasig"="darkred", "batchsig"="darkblue"),
labels=c(
paste0("Both InSig.: ", num_bothinsig),
paste0("Both Sig.: ", num_bothsig),
paste0("Sva Sig.: ", num_svasig),
paste0("Batch Sig.: ", num_batchsig)),
guide=ggplot2::guide_legend(override.aes=aes(size=3, fill="grey"))) +
ggplot2::guides(fill=ggplot2::guide_legend(override.aes=list(size=3))) +
ggplot2::theme_bw()
plt
lp_macr_nobatch_batch <- merge(
lp_macr_nobatch$limma$all_tables$sh_vs_chr,
lp_macr_batch$limma$all_tables$sh_vs_chr,
by="row.names")
rownames(lp_macr_nobatch_batch) <- lp_macr_nobatch_batch[["Row.names"]]
lp_macr_nobatch_batch <- lp_macr_nobatch_batch[, c("logFC.x","logFC.y")]
colnames(lp_macr_nobatch_batch) <- c("nobatch","batch")
nb_b <- plot_linear_scatter(lp_macr_nobatch_batch, pretty_colors=FALSE)
## Warning in plot_multihistogram(df): NAs introduced by coercion
## Used Bon Ferroni corrected t test(s) between columns.
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 77, df = 8700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6235 0.6486
## sample estimates:
## cor
## 0.6362
lp_macr_batch_sva <- merge(
lp_macr_batch$limma$all_tables$sh_vs_chr,
lp_macr_sva$limma$all_tables$sh_vs_chr,
by="row.names")
rownames(lp_macr_batch_sva) <- lp_macr_batch_sva[["Row.names"]]
lp_macr_batch_sva <- lp_macr_batch_sva[, c("logFC.x","logFC.y")]
colnames(lp_macr_batch_sva) <- c("batch","sva")
b_s <- plot_linear_scatter(lp_macr_batch_sva, pretty_colors=FALSE)
## Warning in plot_multihistogram(df): NAs introduced by coercion
## Used Bon Ferroni corrected t test(s) between columns.
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 85, df = 8700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6608 0.6839
## sample estimates:
## cor
## 0.6725
lp_macr_nobatch_batch <- merge(
lp_macr_nobatch$edger$all_tables$sh_vs_chr,
lp_macr_batch$edger$all_tables$sh_vs_chr,
by="row.names")
rownames(lp_macr_nobatch_batch) <- lp_macr_nobatch_batch[["Row.names"]]
lp_macr_nobatch_batch <- lp_macr_nobatch_batch[, c("logFC.x","logFC.y")]
colnames(lp_macr_nobatch_batch) <- c("nobatch","batch")
nb_b <- sm(plot_linear_scatter(lp_macr_nobatch_batch, pretty_colors=FALSE))
nb_b$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 65, df = 8700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5599 0.5881
## sample estimates:
## cor
## 0.5741
lp_macr_batch_sva <- merge(
lp_macr_batch$edger$all_tables$sh_vs_chr,
lp_macr_sva$edger$all_tables$sh_vs_chr,
by="row.names")
rownames(lp_macr_batch_sva) <- lp_macr_batch_sva[["Row.names"]]
lp_macr_batch_sva <- lp_macr_batch_sva[, c("logFC.x","logFC.y")]
colnames(lp_macr_batch_sva) <- c("batch","sva")
b_s <- plot_linear_scatter(lp_macr_batch_sva, pretty_colors=FALSE)
## Warning in plot_multihistogram(df): NAs introduced by coercion
## Used Bon Ferroni corrected t test(s) between columns.
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 80, df = 8700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6391 0.6634
## sample estimates:
## cor
## 0.6514
lp_macr_nobatch_batch <- merge(
lp_macr_nobatch$deseq$all_tables$sh_vs_chr,
lp_macr_batch$deseq$all_tables$sh_vs_chr,
by="row.names")
rownames(lp_macr_nobatch_batch) <- lp_macr_nobatch_batch[["Row.names"]]
lp_macr_nobatch_batch <- lp_macr_nobatch_batch[, c("logFC.x","logFC.y")]
colnames(lp_macr_nobatch_batch) <- c("nobatch","batch")
nb_b <- sm(plot_linear_scatter(lp_macr_nobatch_batch, pretty_colors=FALSE))
nb_b$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 66, df = 8700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5634 0.5915
## sample estimates:
## cor
## 0.5776
lp_macr_batch_sva <- merge(
lp_macr_batch$deseq$all_tables$sh_vs_chr,
lp_macr_sva$deseq$all_tables$sh_vs_chr,
by="row.names")
rownames(lp_macr_batch_sva) <- lp_macr_batch_sva[["Row.names"]]
lp_macr_batch_sva <- lp_macr_batch_sva[, c("logFC.x","logFC.y")]
colnames(lp_macr_batch_sva) <- c("batch","sva")
b_s <- sm(plot_linear_scatter(lp_macr_batch_sva, pretty_colors=FALSE))
b_s$scatter
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 80, df = 8700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6378 0.6622
## sample estimates:
## cor
## 0.6502
R version 3.5.2 (2018-12-20)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: ggplot2(v.3.1.0), ruv(v.0.9.7), foreach(v.1.4.4), edgeR(v.3.24.3), bindrcpp(v.0.2.2), hpgltools(v.2018.11), Biobase(v.2.42.0) and BiocGenerics(v.0.28.0)
loaded via a namespace (and not attached): R.utils(v.2.7.0), tidyselect(v.0.2.5), lme4(v.1.1-19), htmlwidgets(v.1.3), RSQLite(v.2.1.1), AnnotationDbi(v.1.44.0), grid(v.3.5.2), BiocParallel(v.1.16.5), DESeq(v.1.34.1), devtools(v.2.0.1), munsell(v.0.5.0), codetools(v.0.2-16), preprocessCore(v.1.45.0), units(v.0.6-2), statmod(v.1.4.30), withr(v.2.1.2), colorspace(v.1.4-0), GOSemSim(v.2.8.0), OrganismDbi(v.1.24.0), knitr(v.1.21), rstudioapi(v.0.9.0), stats4(v.3.5.2), Vennerable(v.3.1.0.9000), robustbase(v.0.93-3), DOSE(v.3.8.2), labeling(v.0.3), urltools(v.1.7.1), GenomeInfoDbData(v.1.2.0), hwriter(v.1.3.2), bit64(v.0.9-7), farver(v.1.1.0), rprojroot(v.1.3-2), xfun(v.0.4), EDASeq(v.2.16.3), R6(v.2.3.0), doParallel(v.1.0.14), GenomeInfoDb(v.1.18.1), locfit(v.1.5-9.1), bitops(v.1.0-6), fgsea(v.1.8.0), gridGraphics(v.0.3-0), DelayedArray(v.0.8.0), assertthat(v.0.2.0), scales(v.1.0.0), ggraph(v.1.0.2), nnet(v.7.3-12), enrichplot(v.1.2.0), gtable(v.0.2.0), RUVSeq(v.1.16.1), sva(v.3.30.1), processx(v.3.2.1), rlang(v.0.3.1), genefilter(v.1.64.0), splines(v.3.5.2), rtracklayer(v.1.42.1), lazyeval(v.0.2.1), acepack(v.1.4.1), checkmate(v.1.9.1), europepmc(v.0.3), BiocManager(v.1.30.4), yaml(v.2.2.0), reshape2(v.1.4.3), GenomicFeatures(v.1.34.1), backports(v.1.1.3), qvalue(v.2.14.1), Hmisc(v.4.1-1), clusterProfiler(v.3.10.1), RBGL(v.1.58.1), tools(v.3.5.2), usethis(v.1.4.0), ggplotify(v.0.0.3), gplots(v.3.0.1), RColorBrewer(v.1.1-2), blockmodeling(v.0.3.4), sessioninfo(v.1.1.1), ggridges(v.0.5.1), Rcpp(v.1.0.0), plyr(v.1.8.4), base64enc(v.0.1-3), progress(v.1.2.0), zlibbioc(v.1.28.0), purrr(v.0.2.5), RCurl(v.1.95-4.11), ps(v.1.3.0), prettyunits(v.1.0.2), rpart(v.4.1-13), viridis(v.0.5.1), cowplot(v.0.9.4), S4Vectors(v.0.20.1), SummarizedExperiment(v.1.12.0), ggrepel(v.0.8.0), cluster(v.2.0.7-1), colorRamps(v.2.3), fs(v.1.2.6), variancePartition(v.1.12.1), magrittr(v.1.5), data.table(v.1.12.0), openxlsx(v.4.1.0), DO.db(v.2.9), triebeard(v.0.3.0), packrat(v.0.5.0), matrixStats(v.0.54.0), aroma.light(v.3.12.0), pkgload(v.1.0.2), hms(v.0.4.2), evaluate(v.0.12), xtable(v.1.8-3), pbkrtest(v.0.4-7), XML(v.3.98-1.16), IRanges(v.2.16.0), gridExtra(v.2.3), testthat(v.2.0.1), compiler(v.3.5.2), biomaRt(v.2.38.0), tibble(v.2.0.1), KernSmooth(v.2.23-15), crayon(v.1.3.4), R.oo(v.1.22.0), minqa(v.1.2.4), htmltools(v.0.3.6), mgcv(v.1.8-26), corpcor(v.1.6.9), snow(v.0.4-3), Formula(v.1.2-3), geneplotter(v.1.60.0), tidyr(v.0.8.2), DBI(v.1.0.0), tweenr(v.1.0.1), MASS(v.7.3-51.1), ShortRead(v.1.40.0), Matrix(v.1.2-15), cli(v.1.0.1), R.methodsS3(v.1.7.1), quadprog(v.1.5-5), gdata(v.2.18.0), bindr(v.0.1.1), igraph(v.1.2.2), GenomicRanges(v.1.34.0), pkgconfig(v.2.0.2), registry(v.0.5), rvcheck(v.0.1.3), GenomicAlignments(v.1.18.1), foreign(v.0.8-71), xml2(v.1.2.0), annotate(v.1.60.0), rngtools(v.1.3.1), pkgmaker(v.0.27), XVector(v.0.22.0), bibtex(v.0.4.2), doRNG(v.1.7.1), EBSeq(v.1.22.1), stringr(v.1.3.1), callr(v.3.1.1), digest(v.0.6.18), graph(v.1.60.0), Biostrings(v.2.50.2), rmarkdown(v.1.11), fastmatch(v.1.1-0), htmlTable(v.1.13.1), directlabels(v.2018.05.22), Rsamtools(v.1.34.0), gtools(v.3.8.1), nloptr(v.1.2.1), nlme(v.3.1-137), jsonlite(v.1.6), desc(v.1.2.0), viridisLite(v.0.3.0), limma(v.3.38.3), pillar(v.1.3.1), lattice(v.0.20-38), DEoptimR(v.1.0-8), httr(v.1.4.0), pkgbuild(v.1.0.2), survival(v.2.43-3), GO.db(v.3.7.0), glue(v.1.3.0), remotes(v.2.0.2), zip(v.1.0.0), UpSetR(v.1.3.3), iterators(v.1.0.10), pander(v.0.6.3), bit(v.1.1-14), ggforce(v.0.1.3), stringi(v.1.2.4), blob(v.1.1.1), DESeq2(v.1.22.2), doSNOW(v.1.0.16), latticeExtra(v.0.6-28), caTools(v.1.17.1.1), memoise(v.1.1.0) and dplyr(v.0.7.8)
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 7ec7a093f7ee8293e7a5326cb8f77479341502a7
## This is hpgltools commit: Thu Feb 7 16:10:02 2019 -0500: 7ec7a093f7ee8293e7a5326cb8f77479341502a7
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to 03_expression_macrophage_20190205-v20190205.rda.xz