This document turns to the infection of PBMC cells with L.panamensis. This data is particularly strangely affected by the different strains used to infect the cells, and as a result is both useful and troubling.
Given the observations above, we have some ideas of ways to pass the data for differential expression analyses which may or may not be ‘better’. Lets try some and see what happens.
Given the above ways to massage the data, lets use a few of them for limma/deseq/edger. The main caveat in this is that those tools really do expect specific distributions of data which we horribly violate if we use log2() data, which is why in the previous blocks I named them l2blahblah, thus we can do the same sets of normalization but without that and forcibly push the resulting data into limma/edger/deseq.
Everything I did in 02_estimation_infection.html suggests that there are no significant differences visible if one looks just at chronic/self-healing in this data. Further testing has seemingly proven this statement, as a result most of the following analyses will look at chronic/uninfected and self-healing/uninfected followed by attempts to reconcile those results.
To save some time and annoyance with sva, lets filter the data now. In addition, write down a small function used to extract the sets of significant genes across different contrasts (notably self/uninfected vs. chronic/uninfected).
In the following block, I will take the comparisons performed without any batch in the model/adjustment and use them to search for shared/unique genes among the self-healing vs. uninfected and the chronic vs. uninfected.
hs_pairwise_nobatch_pca <- sm(plot_pca(hs_inf_filt, convert="cpm", transform="log2"))
hs_pairwise_nobatch_pca$plot
excel_file <- glue::glue("excel/{rundate}_hs_infect_nobatch_contr-v{ver}.xlsx")
hs_combined_nobatch <- sm(combine_de_tables(
hs_pairwise_nobatch,
excel=excel_file,
keepers=keepers))
excel_file <- glue::glue("excel/{rundate}_hs_infect_nobatch_sig-v{ver}.xlsx")
hs_sig_nobatch <- sm(extract_significant_genes(
hs_combined_nobatch,
excel=excel_file))
hs_sig_nobatch$deseq$counts
## change_counts_up change_counts_down
## sh_nil 926 173
## ch_nil 927 167
## ch_sh 0 0
limma_deseq_order <- rank_order_scatter(
hs_pairwise_nobatch,
first_type="limma", second_type="deseq",
first_table=1, second_table=1)
limma_deseq_order$plot
up_lst <- list(
"sh_up" = rownames(hs_sig_nobatch[["deseq"]][["ups"]][["sh_nil"]]),
"ch_up" = rownames(hs_sig_nobatch[["deseq"]][["ups"]][["ch_nil"]]))
nobatch_up_venn <- Vennerable::Venn(Sets=up_lst)
Vennerable::plot(nobatch_up_venn, doWeights=FALSE)
## Maria-Adelaida and Najib asked about comparing the following pair of data against
## The intersection of a bunch of stuff later... So don't forget this!
## This gives me the genes only up in self vs. nil
nobatch_up_sh_solo <- nobatch_up_venn@IntersectionSets[["10"]]
## and ch vs nil
nobatch_up_ch_solo <- nobatch_up_venn@IntersectionSets[["01"]]
down_lst <- list(
"sh_down" = rownames(hs_sig_nobatch[["deseq"]][["downs"]][["sh_nil"]]),
"ch_down" = rownames(hs_sig_nobatch[["deseq"]][["downs"]][["ch_nil"]]))
nobatch_down_venn <- Vennerable::Venn(Sets=down_lst)
Vennerable::plot(nobatch_down_venn, doWeights=FALSE)
Repeat the previous set of analyses with d107/108/110 in the model.
hs_pairwise_batch_pca <- sm(plot_pca(hs_inf_filt, batch="limma", convert="cpm", transform="log2"))
hs_pairwise_batch_pca$plot
excel_file <- glue::glue("excel/{rundate}_hs_infect_patbatch_contr-v{ver}.xlsx")
hs_combined_batch <- sm(combine_de_tables(
hs_pairwise_batch,
excel=excel_file,
keepers=keepers))
excel_file <- glue::glue("excel/{rundate}_hs_infect_patbatch_sig-v{ver}.xlsx")
hs_sig_batch <- sm(extract_significant_genes(
hs_combined_batch,
excel=excel_file))
hs_sig_batch[["deseq"]][["counts"]]
## change_counts_up change_counts_down
## sh_nil 906 298
## ch_nil 932 279
## ch_sh 0 0
up_lst <- list(
"sh_up" = rownames(hs_sig_batch[["deseq"]][["ups"]][["sh_nil"]]),
"ch_up" = rownames(hs_sig_batch[["deseq"]][["ups"]][["ch_nil"]]))
batch_up_venn <- Vennerable::Venn(Sets=up_lst)
summary(batch_up_venn@IntersectionSets)
## Length Class Mode
## 00 0 -none- NULL
## 10 56 -none- character
## 01 82 -none- character
## 11 850 -none- character
down_lst <- list(
"sh_down" = rownames(hs_sig_batch[["deseq"]][["downs"]][["sh_nil"]]),
"ch_down" = rownames(hs_sig_batch[["deseq"]][["downs"]][["ch_nil"]]))
batch_down_venn <- Vennerable::Venn(Sets=down_lst)
Vennerable::plot(batch_down_venn, doWeights=FALSE)
## Length Class Mode
## 00 0 -none- NULL
## 10 65 -none- character
## 01 46 -none- character
## 11 233 -none- character
## Testing method: limma.
## Adding method: limma to the set.
## Testing method: deseq.
## Adding method: deseq to the set.
## Testing method: edger.
## Adding method: edger to the set.
## Testing method: ebseq.
## The first datum is missing method: ebseq.
## Testing method: basic.
## Adding method: basic to the set.
## Warning in cor(x = fs[["x"]], y = fs[["y"]], method = cor_method): the
## standard deviation is zero
## Warning in cor(x = fs[["x"]], y = fs[["y"]], method = cor_method): the
## standard deviation is zero
## $sh_nil
## $sh_nil$logfc
## [1] 0.9916
##
## $sh_nil$p
## [1] 0.9353
##
## $sh_nil$adjp
## [1] 0.9353
##
##
## $ch_nil
## $ch_nil$logfc
## [1] 0.9925
##
## $ch_nil$p
## [1] 0.9374
##
## $ch_nil$adjp
## [1] 0.9374
##
##
## $ch_sh
## $ch_sh$logfc
## [1] 0.9873
##
## $ch_sh$p
## [1] 0.9304
##
## $ch_sh$adjp
## [1] 0.06891
The following block writes out the unique/shared genes observed among the contrasts which included donor in the model.
kept_columns <- c("ensembltranscriptid", "ensemblgeneid", "description",
"deseq_logfc", "deseq_adjp")
start_data <- hs_sig_batch[["deseq"]]
sh_up_solo_genes <- batch_up_venn@IntersectionSets[["10"]]
ch_up_solo_genes <- batch_up_venn@IntersectionSets[["01"]]
shch_up_shared_genes <- batch_up_venn@IntersectionSets[["11"]]
sh_down_solo_genes <- batch_down_venn@IntersectionSets[["10"]]
ch_down_solo_genes <- batch_down_venn@IntersectionSets[["01"]]
shch_down_shared_genes <- batch_down_venn@IntersectionSets[["11"]]
xls_result <- write_xls(
data=start_data[["ups"]][["sh_nil"]][sh_up_solo_genes, kept_columns],
sheet="sh_up_solo")
xls_result <- write_xls(
data=start_data[["ups"]][["ch_nil"]][ch_up_solo_genes, kept_columns],
sheet="ch_up_solo", wb=xls_result[["workbook"]])
xls_result <- write_xls(
data=start_data[["ups"]][["ch_nil"]][shch_up_shared_genes, kept_columns],
sheet="shch_up_shared", wb=xls_result[["workbook"]])
xls_result <- write_xls(
data=start_data[["downs"]][["sh_nil"]][sh_down_solo_genes, kept_columns],
sheet="sh_down_solo")
xls_result <- write_xls(
data=start_data[["downs"]][["ch_nil"]][ch_down_solo_genes, kept_columns],
sheet="ch_down_solo", wb=xls_result[["workbook"]])
xls_result <- write_xls(
data=start_data[["downs"]][["ch_nil"]][shch_down_shared_genes, kept_columns],
sheet="shch_down_shared", wb=xls_result[["workbook"]],
excel=paste0("excel/figure_5a_stuff-v", ver, ".xlsx"))
## Saving to: excel/figure_5a_stuff-v20180822.xlsx
Repeat, this time attmepting to tamp down the variance by person.
hs_pairwise_ssva_pca <- sm(plot_pca(hs_inf_filt, batch="ssva", norm="quant", transform="log2"))
hs_pairwise_ssva_pca$plot
hs_pairwise_ssva <- sm(all_pairwise(hs_uninf_filt, model_batch="ssva",
parallel=FALSE, do_ebseq=FALSE))
excel_file <- glue::glue("excel/{rundate}_hs_infect_fsva_contr-v{ver}.xlsx")
hs_combined_ssva <- sm(combine_de_tables(
hs_pairwise_ssva,
excel=excel_file,
keepers=keepers))
excel_file <- glue::glue("excel/{rundate}_hs_infect_fsva_sig-v{ver}.xlsx")
hs_sig_ssva <- sm(extract_significant_genes(
hs_combined_ssva,
excel=excel_file))
hs_sig_ssva$deseq$counts
## change_counts_up change_counts_down
## sh_nil 805 507
## ch_nil 991 553
## ch_sh 0 0
up_lst <- list(
"sh_up" = rownames(hs_sig_ssva[["deseq"]][["ups"]][["sh_nil"]]),
"ch_up" = rownames(hs_sig_ssva[["deseq"]][["ups"]][["ch_nil"]]))
ssva_up_venn <- Vennerable::Venn(Sets=up_lst)
summary(ssva_up_venn@IntersectionSets)
## Length Class Mode
## 00 0 -none- NULL
## 10 38 -none- character
## 01 224 -none- character
## 11 767 -none- character
down_lst <- list(
"sh_down" = rownames(hs_sig_ssva[["deseq"]][["downs"]][["sh_nil"]]),
"ch_down" = rownames(hs_sig_ssva[["deseq"]][["downs"]][["ch_nil"]]))
ssva_down_venn <- Vennerable::Venn(Sets=down_lst)
Vennerable::plot(ssva_down_venn, doWeights=FALSE)
## Length Class Mode
## 00 0 -none- NULL
## 10 33 -none- character
## 01 79 -none- character
## 11 474 -none- character
similar <- sm(compare_de_results(hs_combined_nobatch, hs_combined_ssva,
cor_method="spearman"))
similar$result$deseq
## $sh_nil
## $sh_nil$logfc
## [1] 0.2856
##
## $sh_nil$p
## [1] 0.0086
##
## $sh_nil$adjp
## [1] 0.008603
##
##
## $ch_nil
## $ch_nil$logfc
## [1] 0.356
##
## $ch_nil$p
## [1] 0.03392
##
## $ch_nil$adjp
## [1] 0.03391
##
##
## $ch_sh
## $ch_sh$logfc
## [1] 0.9571
##
## $ch_sh$p
## [1] 0.8855
##
## $ch_sh$adjp
## [1] 0.06088
similar <- sm(compare_de_results(hs_combined_batch, hs_combined_ssva,
cor_method="spearman"))
similar$result$deseq
## $sh_nil
## $sh_nil$logfc
## [1] 0.2982
##
## $sh_nil$p
## [1] 0.1252
##
## $sh_nil$adjp
## [1] 0.1252
##
##
## $ch_nil
## $ch_nil$logfc
## [1] 0.3653
##
## $ch_nil$p
## [1] 0.1417
##
## $ch_nil$adjp
## [1] 0.1417
##
##
## $ch_sh
## $ch_sh$logfc
## [1] 0.9735
##
## $ch_sh$p
## [1] 0.9305
##
## $ch_sh$adjp
## [1] 0.9305
Repeat again using fsva.
hs_pairwise_fsva_pca <- sm(plot_pca(hs_inf_filt, batch="fsva", norm="quant", transform="log2"))
hs_pairwise_fsva_pca$plot
hs_pairwise_fsva <- sm(all_pairwise(hs_uninf_filt, model_batch="fsva",
parallel=FALSE, do_ebseq=FALSE))
excel_file <- glue::glue("excel/{rundate}_hs_infect_fsva_contr-v{ver}.xlsx")
hs_combined_fsva <- sm(combine_de_tables(
hs_pairwise_fsva,
excel=excel_file,
keepers=keepers))
excel_file <- glue::glue("excel/{rundate}_hs_infect_fsva_sig-v{ver}.xlsx")
hs_sig_fsva <- sm(extract_significant_genes(
hs_combined_fsva,
excel=excel_file))
up_lst <- list(
"sh_up" = rownames(hs_sig_fsva[["deseq"]][["ups"]][["sh_nil"]]),
"ch_up" = rownames(hs_sig_fsva[["deseq"]][["ups"]][["ch_nil"]]))
fsva_up_venn <- Vennerable::Venn(Sets=up_lst)
summary(fsva_up_venn@IntersectionSets)
## Length Class Mode
## 00 0 -none- NULL
## 10 64 -none- character
## 01 96 -none- character
## 11 864 -none- character
down_lst <- list(
"sh_down" = rownames(hs_sig_fsva[["deseq"]][["downs"]][["sh_nil"]]),
"ch_down" = rownames(hs_sig_fsva[["deseq"]][["downs"]][["ch_nil"]]))
fsva_down_venn <- Vennerable::Venn(Sets=down_lst)
Vennerable::plot(fsva_down_venn, doWeights=FALSE)
## Length Class Mode
## 00 0 -none- NULL
## 10 70 -none- character
## 01 45 -none- character
## 11 225 -none- character
##hs_sig_fsva$deseq$counts$common_solos_fsva <- sm(subset_significants(hs_sig_fsva))
similar <- sm(compare_de_results(hs_combined_nobatch, hs_combined_fsva,
cor_method="spearman"))
similar$result$deseq
## $sh_nil
## $sh_nil$logfc
## [1] 0.9937
##
## $sh_nil$p
## [1] 0.922
##
## $sh_nil$adjp
## [1] 0.922
##
##
## $ch_nil
## $ch_nil$logfc
## [1] 0.9937
##
## $ch_nil$p
## [1] 0.9206
##
## $ch_nil$adjp
## [1] 0.9206
##
##
## $ch_sh
## $ch_sh$logfc
## [1] 0.9743
##
## $ch_sh$p
## [1] 0.9012
##
## $ch_sh$adjp
## [1] 0.06275
similar <- sm(compare_de_results(hs_combined_ssva, hs_combined_fsva,
cor_method="spearman"))
similar$result$deseq
## $sh_nil
## $sh_nil$logfc
## [1] 0.2808
##
## $sh_nil$p
## [1] 0.1256
##
## $sh_nil$adjp
## [1] 0.1256
##
##
## $ch_nil
## $ch_nil$logfc
## [1] 0.3348
##
## $ch_nil$p
## [1] 0.1355
##
## $ch_nil$adjp
## [1] 0.1355
##
##
## $ch_sh
## $ch_sh$logfc
## [1] 0.9896
##
## $ch_sh$p
## [1] 0.9686
##
## $ch_sh$adjp
## [1] 0.9686
Repeat once again, this time using combat to try to limit the contribution of the strain to the data. I do not think we will ever use this set of contrasts, so I will deactivate it but leave it here if it is required later.
old_condition <- hs_uninf$design$condition
names(old_condition) <- hs_uninf$design$sampleid
new_condition <- paste0(hs_uninf$design$state, '_', hs_uninf$design$donor)
hs_uninf_recond <- set_expt_factors(hs_uninf_filt,
batch="pathogenstrain",
condition=new_condition)
combat_input <- normalize_expt(hs_uninf_recond, norm="quant", batch="combat_scale")
## This function will replace the expt$expressionset slot with:
## combat_scale(quant(data))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Step 1: not doing count filtering.
## Step 2: normalizing the data with quant.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: doing batch correction with combat_scale.
## In norm_batch, after testing logic of surrogate method/number, the
## number of surrogates is: and the method is: be.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a better low-count filter applied.
## batch_counts: Before batch correction, 407 entries 0<x<1.
## batch_counts: Before batch correction, 2 entries are >= 0.
## After checking/setting the number of surrogates, it is: 1.
## batch_counts: Using combat with a prior and with scaling.
## The number of elements which are < 0 after batch correction is: 6181
## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE
combat_input <- set_expt_conditions(combat_input, fact=old_condition)
combat_input <- set_expt_colors(combat_input, colors=c("green","blue","red"))
## The new colors are a character, changing according to condition.
excel_file <- glue::glue("excel/{rundate}_hs_infect_combatpath_contr-v{ver}.xlsx")
hs_combined_combatpath <- sm(combine_de_tables(
hs_pairwise_combatpath,
excel=excel_file,
keepers=keepers))
excel_file <- glue::glue("excel/{rundate}_hs_infect_combatpath_sig-v{ver}.xlsx")
hs_sig_combatpath <- sm(extract_significant_genes(
hs_combined_combatpath,
excel=excel_file))
hs_sig_combatpath$deseq$counts
## change_counts_up change_counts_down
## sh_nil 1518 22
## ch_nil 2289 2056
## ch_sh 470 1091
up_lst <- list(
"sh_up" = rownames(hs_sig_combatpath[["deseq"]][["ups"]][["sh_nil"]]),
"ch_up" = rownames(hs_sig_combatpath[["deseq"]][["ups"]][["ch_nil"]]))
combatpath_up_venn <- Vennerable::Venn(Sets=up_lst)
summary(combatpath_up_venn@IntersectionSets)
## Length Class Mode
## 00 0 -none- NULL
## 10 0 -none- NULL
## 01 771 -none- character
## 11 1518 -none- character
down_lst <- list(
"sh_down" = rownames(hs_sig_combatpath[["deseq"]][["downs"]][["sh_nil"]]),
"ch_down" = rownames(hs_sig_combatpath[["deseq"]][["downs"]][["ch_nil"]]))
combatpath_down_venn <- Vennerable::Venn(Sets=down_lst)
Vennerable::plot(combatpath_down_venn, doWeights=FALSE)
## Length Class Mode
## 00 0 -none- NULL
## 10 0 -none- NULL
## 01 2034 -none- character
## 11 22 -none- character
similar <- sm(compare_de_results(hs_combined_nobatch, hs_combined_combatpath,
cor_method="spearman"))
similar$result$deseq
## $sh_nil
## $sh_nil$logfc
## [1] -0.07967
##
## $sh_nil$p
## [1] -0.04815
##
## $sh_nil$adjp
## [1] -0.04815
##
##
## $ch_nil
## $ch_nil$logfc
## [1] -0.1114
##
## $ch_nil$p
## [1] -0.02765
##
## $ch_nil$adjp
## [1] -0.02765
##
##
## $ch_sh
## $ch_sh$logfc
## [1] 0.01295
##
## $ch_sh$p
## [1] -0.1028
##
## $ch_sh$adjp
## [1] 0.002389
similar <- sm(compare_de_results(hs_combined_fsva, hs_combined_combatpath,
cor_method="spearman"))
similar$result$deseq
## $sh_nil
## $sh_nil$logfc
## [1] -0.08732
##
## $sh_nil$p
## [1] -0.004519
##
## $sh_nil$adjp
## [1] -0.004516
##
##
## $ch_nil
## $ch_nil$logfc
## [1] -0.1309
##
## $ch_nil$p
## [1] 0.04731
##
## $ch_nil$adjp
## [1] 0.04731
##
##
## $ch_sh
## $ch_sh$logfc
## [1] -0.04856
##
## $ch_sh$p
## [1] -0.02353
##
## $ch_sh$adjp
## [1] -0.0235
For each of the following, perform a simple DE and see what happens:
The data used in the following is the quantile(cpm(filter())) where the condition was set to a concatenation of patient and healing state, combat was also performed, so we no longer want batch in the experimental model and also we need to pass ‘force=TRUE’ because deseq/edger will need to be coerced into accepting these modified data.
## chr_5430_d108 chr_5397_d108 chr_2504_d108 sh_2272_d108 sh_1022_d108
## chr chr chr sh sh
## sh_2189_d108 chr_5430_d110 chr_5397_d110 chr_2504_d110 sh_2272_d110
## sh chr chr chr sh
## sh_1022_d110 sh_2189_d110 chr_5430_d107 chr_5397_d107 chr_2504_d107
## sh sh chr chr chr
## sh_2272_d107 sh_1022_d107 sh_2189_d107
## sh sh sh
## Levels: sh chr
## Start by leaving the data relatively alone, especially noting that we do not
## have a usable batch by default.
hs_uninf_filtv2 <- hs_uninf_filt
donor_state <- paste0(hs_uninf_filtv2$design$state, "_", hs_uninf_filtv2$design$donor)
hs_uninf_filtv2 <- set_expt_factors(hs_uninf_filtv2, condition=donor_state)
uninf_patient_keepers <- list(
"d107_chun" = c("chronic_d107", "uninfected_d107"),
"d107_shun" = c("self_heal_d107", "uninfected_d107"),
"d107_chsh" = c("chronic_d107", "self_heal_d107"),
"d108_chun" = c("chronic_d108", "uninfected_d108"),
"d108_shun" = c("self_heal_d108", "uninfected_d108"),
"d108_chsh" = c("chronic_d108", "self_heal_d108"),
"d110_chun" = c("chronic_d110", "uninfected_d110"),
"d110_shun" = c("self_heal_d110", "uninfected_d110"),
"d110_chsh" = c("chronic_d110", "self_heal_d110"))
hs_uninf_filtv2_pairwise <- sm(all_pairwise(hs_uninf_filtv2, parallel=FALSE,
model_batch=FALSE, do_ebseq=FALSE))
hs_uninf_filtv2_combined <- sm(combine_de_tables(
hs_uninf_filtv2_pairwise,
keepers=uninf_patient_keepers,
excel=paste0("excel/hs_infect_patient_nobatch-v", ver, ".xlsx")))
hs_uninf_filtv2_sig <- sm(extract_significant_genes(
hs_uninf_filtv2_combined, according_to="deseq",
excel=paste0("excel/hs_infect_patient_nobatch_sig-v", ver, ".xlsx")))
##hs_uninf_filtv3_pairwise <- all_pairwise(hs_uninf_filtv2, model_batch="svaseq", surrogates=1)
##hs_uninf_filtv3_combined <- sm(combine_de_tables(hs_uninf_filtv3_pairwise,
## keepers=uninf_patient_keepers,
## excel=paste0("excel/hs_infect_patient_fsva-v", ver, ".xlsx")))
##hs_uninf_filtv3_sig <- sm(extract_significant_genes(hs_uninf_filtv3_combined,
## excel=paste0("excel/hs_infect_patient_fsva_sig-v", ver, ".xlsx")))
Now we want to look at intersections from the perspective of contrasts performed comparing the self-healing/chronic vs. uninfected for the three donors separately.
This time for each of the three donors: self-healing up vs. uninfected.
up_sig <- hs_uninf_filtv2_sig$deseq$ups
## Do this in a way which is not stupid...
comp_lst <- list(
"donor_107" = rownames(up_sig[["d107_shun"]]),
"donor_108" = rownames(up_sig[["d108_shun"]]),
"donor_110" = rownames(up_sig[["d110_shun"]]))
comp_shun_up <- Vennerable::Venn(Sets=comp_lst)
up_res <- Vennerable::plot(comp_shun_up, doWeights=FALSE)
shared_shun_up <- comp_shun_up@IntersectionSets[["111"]]
de_table_shared_up_sh_first <- hs_uninf_filtv2_combined[["data"]][["d107_shun"]][shared_shun_up, ]
de_table_shared_up_sh_second <- hs_uninf_filtv2_combined[["data"]][["d108_shun"]][shared_shun_up, ]
de_table_shared_up_sh_third <- hs_uninf_filtv2_combined[["data"]][["d110_shun"]][shared_shun_up, ]
de_table_shared_up_sh_all <- merge(
de_table_shared_up_sh_first[, c("description", "deseq_logfc", "deseq_adjp")],
de_table_shared_up_sh_second[, c("deseq_logfc", "deseq_adjp")],
by="row.names")
de_table_shared_up_sh_all <- merge(
de_table_shared_up_sh_all,
de_table_shared_up_sh_third[, c("deseq_logfc", "deseq_adjp")],
by.x="Row.names", by.y="row.names")
rownames(de_table_shared_up_sh_all) <- de_table_shared_up_sh_all[["Row.names"]]
de_table_shared_up_sh_all <- de_table_shared_up_sh_all[, -1]
colnames(de_table_shared_up_sh_all) <- c("description", "logfc_107", "adjp_107",
"logfc_108", "adjp_108", "logfc_110", "adjp_110")
write.csv(de_table_shared_up_sh_all, file="images/de_table_shared_up_sh_all.csv")
This time for each of the three donors: chronic up vs. uninfected.
up_sig <- hs_uninf_filtv2_sig$deseq$ups
comp_lst <- list(
"donor_107" = rownames(up_sig[["d107_chun"]]),
"donor_108" = rownames(up_sig[["d108_chun"]]),
"donor_110" = rownames(up_sig[["d110_chun"]]))
comp_chun_up <- Vennerable::Venn(Sets=comp_lst)
up_res <- Vennerable::plot(comp_chun_up, doWeights=FALSE)
shared_chun_up <- comp_chun_up@IntersectionSets[["111"]]
de_table_shared_up_ch_first <- hs_uninf_filtv2_combined[["data"]][["d107_chun"]][shared_chun_up, ]
de_table_shared_up_ch_second <- hs_uninf_filtv2_combined[["data"]][["d108_chun"]][shared_chun_up, ]
de_table_shared_up_ch_third <- hs_uninf_filtv2_combined[["data"]][["d110_chun"]][shared_chun_up, ]
de_table_shared_up_ch_all <- merge(
de_table_shared_up_ch_first[, c("description", "deseq_logfc", "deseq_adjp")],
de_table_shared_up_ch_second[, c("deseq_logfc", "deseq_adjp")],
by="row.names")
de_table_shared_up_ch_all <- merge(
de_table_shared_up_ch_all,
de_table_shared_up_ch_third[, c("deseq_logfc", "deseq_adjp")],
by.x="Row.names", by.y="row.names")
rownames(de_table_shared_up_ch_all) <- de_table_shared_up_ch_all[["Row.names"]]
de_table_shared_up_ch_all <- de_table_shared_up_ch_all[, -1]
colnames(de_table_shared_up_ch_all) <- c("description", "logfc_107", "adjp_107",
"logfc_108", "adjp_108", "logfc_110", "adjp_110")
write.csv(de_table_shared_up_ch_all, file="images/de_table_shared_up_ch_all.csv")
This time for each of the three donors: self-healing down vs. uninfected.
down_sig <- hs_uninf_filtv2_sig$deseq$downs
comp_lst <- list(
"donor_107" = rownames(down_sig[["d107_shun"]]),
"donor_108" = rownames(down_sig[["d108_shun"]]),
"donor_110" = rownames(down_sig[["d110_shun"]]))
comp_shun_down <- Vennerable::Venn(Sets=comp_lst)
down_res <- Vennerable::plot(comp_shun_down, doWeights=FALSE)
shared_shun_down <- comp_shun_down@IntersectionSets[["111"]]
de_table_shared_down_sh_first <- hs_uninf_filtv2_combined[["data"]][["d107_shun"]][shared_shun_down, ]
de_table_shared_down_sh_second <- hs_uninf_filtv2_combined[["data"]][["d108_shun"]][shared_shun_down, ]
de_table_shared_down_sh_third <- hs_uninf_filtv2_combined[["data"]][["d110_shun"]][shared_shun_down, ]
de_table_shared_down_sh_all <- merge(
de_table_shared_down_sh_first[, c("description", "deseq_logfc", "deseq_adjp")],
de_table_shared_down_sh_second[, c("deseq_logfc", "deseq_adjp")],
by="row.names")
de_table_shared_down_sh_all <- merge(
de_table_shared_down_sh_all,
de_table_shared_down_sh_third[, c("deseq_logfc", "deseq_adjp")],
by.x="Row.names", by.y="row.names")
rownames(de_table_shared_down_sh_all) <- de_table_shared_down_sh_all[["Row.names"]]
de_table_shared_down_sh_all <- de_table_shared_down_sh_all[, -1]
colnames(de_table_shared_down_sh_all) <- c("description", "logfc_107",
"adjp_107", "logfc_108", "adjp_108",
"logfc_110", "adjp_110")
write.csv(de_table_shared_down_sh_all, file="images/de_table_shared_down_sh_all.csv")
This time for each of the three donors: chronic down vs. uninfected.
down_sig <- hs_uninf_filtv2_sig$deseq$downs
comp_lst <- list(
"donor_107" = rownames(down_sig[["d107_chun"]]),
"donor_108" = rownames(down_sig[["d108_chun"]]),
"donor_110" = rownames(down_sig[["d110_chun"]]))
comp_chun_down <- Vennerable::Venn(Sets=comp_lst)
down_res <- Vennerable::plot(comp_chun_down, doWeights=FALSE)
shared_chun_down <- comp_chun_down@IntersectionSets[["111"]]
de_table_shared_down_ch_first <- hs_uninf_filtv2_combined[["data"]][["d107_chun"]][shared_chun_down, ]
de_table_shared_down_ch_second <- hs_uninf_filtv2_combined[["data"]][["d108_chun"]][shared_chun_down, ]
de_table_shared_down_ch_third <- hs_uninf_filtv2_combined[["data"]][["d110_chun"]][shared_chun_down, ]
de_table_shared_down_ch_all <- merge(
de_table_shared_down_ch_first[, c("description", "deseq_logfc", "deseq_adjp")],
de_table_shared_down_ch_second[, c("deseq_logfc", "deseq_adjp")],
by="row.names")
de_table_shared_down_ch_all <- merge(
de_table_shared_down_ch_all,
de_table_shared_down_ch_third[, c("deseq_logfc", "deseq_adjp")],
by.x="Row.names", by.y="row.names")
rownames(de_table_shared_down_ch_all) <- de_table_shared_down_ch_all[["Row.names"]]
de_table_shared_down_ch_all <- de_table_shared_down_ch_all[, -1]
colnames(de_table_shared_down_ch_all) <- c("description", "logfc_107",
"adjp_107", "logfc_108", "adjp_108",
"logfc_110", "adjp_110")
write.csv(de_table_shared_down_ch_all, file="images/de_table_shared_down_ch_all.csv")
At this point, we should have a set of genes which are up/down in the self/uninfected and chronic/uninfected, kept in variables with names like: ‘shared_shun_down’ and ‘shared_chun_down’
Now we want a sense of what genes are shared/unique among the self-healing vs. uninfected and the chronic vs. uninfected comparisons performed above. One would assume that the most interesting genes in these sets will prove to be the the ones which are not shared.
shch_up_lst <- list(
"up_sh" = shared_shun_up,
"up_ch" = shared_chun_up)
shared_up <- Vennerable::Venn(Sets=shch_up_lst)
Vennerable::plot(shared_up, doWeights=FALSE)
shch_down_lst <- list(
"up_sh" = shared_shun_down,
"up_ch" = shared_chun_down)
shared_down <- Vennerable::Venn(Sets=shch_down_lst)
Vennerable::plot(shared_down, doWeights=FALSE)
upup_genes <- shared_up@IntersectionSets[["11"]]
upsh_notch <- shared_up@IntersectionSets[["10"]]
upch_notsh <- shared_up@IntersectionSets[["01"]]
downdown_genes <- shared_down@IntersectionSets[["11"]]
downsh_notch <- shared_down@IntersectionSets[["10"]]
downch_notsh <- shared_down@IntersectionSets[["01"]]
kept_columns <- c("ensembltranscriptid", "ensemblgeneid", "description",
"deseq_logfc", "deseq_adjp")
## Get the shared things in up_sh and up_ch, ergo can use either sheet
xls_result <- write_xls(data=up_sig[["d107_shun"]][upup_genes, kept_columns],
sheet="upsh_upch")
## Get the only-up_sh
xls_result <- write_xls(data=up_sig[["d107_shun"]][upsh_notch, kept_columns],
sheet="upsh_noch",
wb=xls_result[["workbook"]])
## Get the only-up_ch
xls_result <- write_xls(data=up_sig[["d107_chun"]][upch_notsh, kept_columns],
sheet="upch_nosh",
wb=xls_result[["workbook"]])
## Now repeat for the down: down-shared
## Get shared down down_sh down_ch
xls_result <- write_xls(data=down_sig[["d107_shun"]][downdown_genes, kept_columns],
sheet="downsh_downch",
wb=xls_result[["workbook"]])
## The down_sh only
xls_result <- write_xls(data=down_sig[["d107_shun"]][downsh_notch, kept_columns],
sheet="downsh_noch",
wb=xls_result[["workbook"]])
## The down_ch
xls_result <- write_xls(data=down_sig[["d107_chun"]][downch_notsh, kept_columns],
sheet="downch_nosh",
wb=xls_result[["workbook"]],
excel=paste0("excel/figure_5c_stuff-v", ver, ".xlsx"))
## Saving to: excel/figure_5c_stuff-v20180822.xlsx
Do I think the exact same thing as in the previous comparison, but now simplify the ‘condition’ factor to just self-healing vs. chronic and see what happens.
uninf_strainbatch_qcf <- set_expt_batches(expt=hs_cds_uninf, fact="pathogenstrain")
uninf_strainbatch_qcf <- set_expt_conditions(expt=uninf_strainbatch_qcf, fact="state")
withuninf_strainbatch_pairs_chsh <- all_pairwise(
parallel=FALSE, uninf_strainbatch_qcf,
model_batch=FALSE, force=TRUE, do_ebseq=FALSE)
chsh_keepers <- list(
"ch_sh" = c("chronic", "self_heal"),
"ch_nil" = c("chronic", "uninfected"),
"sh_nil" = c("self_heal", "uninfected"))
withuninf_strainbatch_tables_chsh <- sm(combine_de_tables(
withuninf_strainbatch_pairs_chsh,
keepers=chsh_keepers,
excel=paste0("excel/withuninf_strainbatch_chsh_pairwise-v", ver, ".xlsx")))
withuninf_strainbatch_sig_chsh <- extract_significant_genes(
withuninf_strainbatch_tables_chsh,
excel=paste0("excel/withuninf_strainbatch_chsh_sig-v", ver, ".xlsx"))
##withuninf_strainbatch_tables_chsh$limma_plots
##withuninf_strainbatch_tables_chsh$edger_plots
##withuninf_strainbatch_tables_chsh$deseq_plots
Generate DE lists of each donor for all contrasts for PBMCs.
I renamed these plots and am now hopelessly confused as to which is which. I think I will not run this for now but instead generate the worksheet without them and then return to this in the hopes that I can do a better job.
pp(file="images/fig_05a-sh_uninf_vs_chr_uninf_up.pdf")
Vennerable::plot(common_solos_batch$up_venn, doWeights=FALSE)
dev.off()
pp(file="images/fig_05b-sh_uninf_vs_chr_uninf_down.pdf")
Vennerable::plot(common_solos_batch$down_venn, doWeights=FALSE)
dev.off()
pp(file="images/fig_05c-sh_uninf_donors_up.pdf")
Vennerable::plot(sh_up_venn, doWeights=FALSE)
dev.off()
pp(file="images/fig_05d-sh_uninf_donors_down.pdf")
Vennerable::plot(sh_down_venn, doWeights=FALSE)
dev.off()
pp(file="images/fig_05e-chr_uninf_donors_up.pdf")
Vennerable::plot(chr_up_venn, doWeights=FALSE)
dev.off()
pp(file="images/fig_05f-chr_uninf_donors_down.pdf")
Vennerable::plot(chr_down_venn, doWeights=FALSE)
dev.off()
pp(file="images/fig_05g-up_common.pdf")
Vennerable::plot(shared_up, doWeights=FALSE)
dev.off()
pp(file="images/fig_05h-down_common.pdf")
Vennerable::plot(shared_down, doWeights=FALSE)
dev.off()
lp_pairwise_nobatch <- sm(all_pairwise(lp_inf_filt, model_batch=FALSE,
parallel=FALSE, do_ebseq=FALSE))
excel_file <- glue::glue("excel/{rundate}_lp_infect_nobatch_contr-v{ver}.xlsx")
lp_combined_nobatch <- sm(combine_de_tables(lp_pairwise_nobatch, excel=excel_file))
excel_file <- glue::glue("excel/{rundate}_lp_infect_nobatch_sig-v{ver}.xlsx")
lp_sig_nobatch <- sm(extract_significant_genes(lp_combined_nobatch, excel=excel_file))
lp_pairwise_batch <- sm(all_pairwise(lp_inf_filt, model_batch=TRUE))
excel_file <- glue::glue("excel/{rundate}_lp_infect_batch_contr-v{ver}.xlsx")
lp_combined_batch <- sm(combine_de_tables(lp_pairwise_batch, excel=excel_file))
excel_file <- glue::glue("excel/{rundate}_lp_infect_batch_sig-v{ver}.xlsx")
lp_sig_batch <- sm(extract_significant_genes(lp_combined_batch, excel=excel_file))
lp_pairwise_ssva <- sm(all_pairwise(lp_inf_filt, model_batch="ssva"))
excel_file <- glue::glue("excel/{rundate}_lp_infect_ssva_contr-v{ver}.xlsx")
lp_combined_ssva <- sm(combine_de_tables(lp_pairwise_ssva, excel=excel_file))
excel_file <- glue::glue("excel/{rundate}_lp_infect_ssva_sig-v{ver}.xlsx")
lp_sig_ssva <- sm(extract_significant_genes(lp_combined_ssva, excel=excel_file))
lp_pairwise_fsva <- sm(all_pairwise(lp_inf_filt, model_batch="fsva"))
excel_file <- glue::glue("excel/{rundate}_lp_infect_fsva_contr-v{ver}.xlsx")
lp_combined_fsva <- sm(combine_de_tables(lp_pairwise_fsva, excel=excel_file))
excel_file <- glue::glue("excel/{rundate}_lp_infect_fsva_sig-v{ver}.xlsx")
lp_sig_fsva <- sm(extract_significant_genes(lp_combined_fsva, excel=excel_file))
R version 3.5.1 (2018-07-02)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: edgeR(v.3.22.5), ruv(v.0.9.7), bindrcpp(v.0.2.2), variancePartition(v.1.10.4), scales(v.1.0.0), foreach(v.1.4.4), limma(v.3.36.5), ggplot2(v.3.1.0), hpgltools(v.2018.11), Biobase(v.2.40.0) and BiocGenerics(v.0.26.0)
loaded via a namespace (and not attached): tidyselect(v.0.2.5), lme4(v.1.1-19), RSQLite(v.2.1.1), AnnotationDbi(v.1.42.1), htmlwidgets(v.1.3), grid(v.3.5.1), BiocParallel(v.1.14.2), devtools(v.2.0.1), munsell(v.0.5.0), codetools(v.0.2-15), preprocessCore(v.1.42.0), units(v.0.6-1), withr(v.2.1.2), colorspace(v.1.3-2), GOSemSim(v.2.6.2), BiocInstaller(v.1.30.0), OrganismDbi(v.1.22.0), knitr(v.1.20), rstudioapi(v.0.8), stats4(v.3.5.1), Vennerable(v.3.1.0.9000), robustbase(v.0.93-3), DOSE(v.3.6.1), labeling(v.0.3), GenomeInfoDbData(v.1.1.0), bit64(v.0.9-7), farver(v.1.0), rprojroot(v.1.3-2), R6(v.2.3.0), doParallel(v.1.0.14), GenomeInfoDb(v.1.16.0), locfit(v.1.5-9.1), bitops(v.1.0-6), fgsea(v.1.6.0), DelayedArray(v.0.6.6), assertthat(v.0.2.0), ggraph(v.1.0.2), nnet(v.7.3-12), enrichplot(v.1.0.2), gtable(v.0.2.0), sva(v.3.28.0), processx(v.3.2.0), rlang(v.0.3.0.1), genefilter(v.1.62.0), splines(v.3.5.1), rtracklayer(v.1.40.6), lazyeval(v.0.2.1), acepack(v.1.4.1), checkmate(v.1.8.5), yaml(v.2.2.0), reshape2(v.1.4.3), GenomicFeatures(v.1.32.3), backports(v.1.1.2), qvalue(v.2.12.0), Hmisc(v.4.1-1), clusterProfiler(v.3.8.1), RBGL(v.1.56.0), tools(v.3.5.1), usethis(v.1.4.0), gplots(v.3.0.1), RColorBrewer(v.1.1-2), sessioninfo(v.1.1.1), ggridges(v.0.5.1), Rcpp(v.1.0.0), plyr(v.1.8.4), base64enc(v.0.1-3), progress(v.1.2.0), zlibbioc(v.1.26.0), purrr(v.0.2.5), RCurl(v.1.95-4.11), ps(v.1.2.1), prettyunits(v.1.0.2), rpart(v.4.1-13), viridis(v.0.5.1), cowplot(v.0.9.3), S4Vectors(v.0.18.3), SummarizedExperiment(v.1.10.1), ggrepel(v.0.8.0), cluster(v.2.0.7-1), colorRamps(v.2.3), fs(v.1.2.6), magrittr(v.1.5), data.table(v.1.11.8), DO.db(v.2.9), openxlsx(v.4.1.0), packrat(v.0.4.9-3), matrixStats(v.0.54.0), pkgload(v.1.0.2), hms(v.0.4.2), evaluate(v.0.12), xtable(v.1.8-3), pbkrtest(v.0.4-7), XML(v.3.98-1.16), IRanges(v.2.14.12), gridExtra(v.2.3), testthat(v.2.0.1), compiler(v.3.5.1), biomaRt(v.2.36.1), tibble(v.1.4.2), KernSmooth(v.2.23-15), crayon(v.1.3.4), minqa(v.1.2.4), htmltools(v.0.3.6), mgcv(v.1.8-25), corpcor(v.1.6.9), snow(v.0.4-3), Formula(v.1.2-3), tidyr(v.0.8.2), geneplotter(v.1.58.0), DBI(v.1.0.0), tweenr(v.1.0.0), MASS(v.7.3-51.1), Matrix(v.1.2-15), cli(v.1.0.1), quadprog(v.1.5-5), gdata(v.2.18.0), bindr(v.0.1.1), igraph(v.1.2.2), GenomicRanges(v.1.32.7), pkgconfig(v.2.0.2), rvcheck(v.0.1.1), GenomicAlignments(v.1.16.0), foreign(v.0.8-71), plotly(v.4.8.0), annotate(v.1.58.0), XVector(v.0.20.0), stringr(v.1.3.1), callr(v.3.0.0), digest(v.0.6.18), graph(v.1.58.2), Biostrings(v.2.48.0), rmarkdown(v.1.10), fastmatch(v.1.1-0), htmlTable(v.1.12), directlabels(v.2018.05.22), Rsamtools(v.1.32.3), gtools(v.3.8.1), nloptr(v.1.2.1), nlme(v.3.1-137), jsonlite(v.1.5), desc(v.1.2.0), viridisLite(v.0.3.0), pillar(v.1.3.0), lattice(v.0.20-38), DEoptimR(v.1.0-8), httr(v.1.3.1), pkgbuild(v.1.0.2), survival(v.2.43-1), GO.db(v.3.6.0), glue(v.1.3.0), remotes(v.2.0.2), zip(v.1.0.0), UpSetR(v.1.3.3), iterators(v.1.0.10), pander(v.0.6.3), bit(v.1.1-14), ggforce(v.0.1.3), stringi(v.1.2.4), blob(v.1.1.1), DESeq2(v.1.20.0), doSNOW(v.1.0.16), latticeExtra(v.0.6-28), caTools(v.1.17.1.1), memoise(v.1.1.0) and dplyr(v.0.7.8)
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 355f1065fb30b65f999b2e2a1aee18fdf8e6ebce
## This is hpgltools commit: Sun Nov 25 19:27:18 2018 -0500: 355f1065fb30b65f999b2e2a1aee18fdf8e6ebce
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to 03_expression_infection_20180822-v20180822.rda.xz