This document turns to the infection of PBMC cells with L.panamensis. This data is particularly strangely affected by the different strains used to infect the cells, and as a result is both useful and troubling.
Given the observations above, we have some ideas of ways to pass the data for differential expression analyses which may or may not be ‘better’. Lets try some and see what happens.
Given the above ways to massage the data, lets use a few of them for limma/deseq/edger. The main caveat in this is that those tools really do expect specific distributions of data which we horribly violate if we use log2() data, which is why in the previous blocks I named them l2blahblah, thus we can do the same sets of normalization but without that and forcibly push the resulting data into limma/edger/deseq.
Everything I did in 02_estimation_infection.html suggests that there are no significant differences visible if one looks just at chronic/self-healing in this data. Further testing has seemingly proven this statement, as a result most of the following analyses will look at chronic/uninfected and self-healing/uninfected followed by attempts to reconcile those results.
To save some time and annoyance with sva, lets filter the data now. In addition, write down a small function used to extract the sets of significant genes across different contrasts (notably self/uninfected vs. chronic/uninfected).
## There were 18, now there are 6 samples.
d107_pairwise <- sm(all_pairwise(d107, model_batch=FALSE))
d107_table <- sm(combine_de_tables(d107_pairwise))
summary(d107_table$data)
## Length Class Mode
## sh_vs_chr 46 data.frame list
d107_sig <- sm(extract_significant_genes(d107_table, according_to="deseq", p=0.1, p_type="raw"))
dim(d107_sig[["deseq"]][["ups"]][[1]])
## [1] 5 46
## There were 18, now there are 6 samples.
d108_pairwise <- sm(all_pairwise(d108, model_batch=FALSE))
d108_table <- sm(combine_de_tables(d108_pairwise))
summary(d108_table$data)
## Length Class Mode
## sh_vs_chr 46 data.frame list
d108_sig <- sm(extract_significant_genes(d108_table, according_to="deseq", p=0.1, p_type="raw"))
head(d108_sig$deseq$ups[[1]])
## ensembltranscriptid ensemblgeneid hgncsymbol
## ENSG00000119457 ENST00000374228 ENSG00000119457 SLC46A2
## ENSG00000162493 ENST00000294489 ENSG00000162493 PDPN
## description
## ENSG00000119457 solute carrier family 46 member 2 [Source:HGNC Symbol;Acc:HGNC:16055]
## ENSG00000162493 podoplanin [Source:HGNC Symbol;Acc:HGNC:29602]
## genebiotype deseq_logfc deseq_adjp edger_logfc
## ENSG00000119457 protein_coding 1.124 0.9999 1.110
## ENSG00000162493 protein_coding 1.054 0.9999 1.047
## edger_adjp limma_logfc limma_adjp basic_nummed
## ENSG00000119457 1 1.0310 0.9984 0.8026
## ENSG00000162493 1 0.9181 0.9984 1.5200
## basic_denmed basic_numvar basic_denvar basic_logfc basic_t
## ENSG00000119457 -0.09514 2.568e-01 6.194e-02 0.8977 3.281
## ENSG00000162493 0.80260 9.344e-01 5.950e-01 0.7173 1.414
## basic_p basic_adjp deseq_basemean deseq_lfcse deseq_stat
## ENSG00000119457 4.843e-02 1e+00 14.09 0.5510 2.039
## ENSG00000162493 2.335e-01 1e+00 22.92 0.5347 1.971
## deseq_p ebseq_fc ebseq_logfc ebseq_postfc ebseq_mean
## ENSG00000119457 0.04144 2.246 1.167 2.187 14.19
## ENSG00000162493 0.04873 2.177 1.122 2.143 23.12
## ebseq_ppee ebseq_ppde ebseq_adjp edger_logcpm edger_lr
## ENSG00000119457 1 2.896e-05 1 0.5923 6.374
## ENSG00000162493 1 4.423e-06 1 1.2200 4.785
## edger_p limma_ave limma_t limma_b limma_p limma_adjp_fdr
## ENSG00000119457 0.01158 0.2681 2.918 -4.453 0.01789 9.984e-01
## ENSG00000162493 0.02871 0.8417 1.655 -4.532 0.13370 9.984e-01
## deseq_adjp_fdr edger_adjp_fdr basic_adjp_fdr lfc_meta
## ENSG00000119457 1.000e+00 1.000e+00 1e+00 1.117
## ENSG00000162493 1.000e+00 1.000e+00 1e+00 1.017
## lfc_var lfc_varbymed p_meta p_var
## ENSG00000119457 0.000e+00 0.000e+00 2.364e-02 2.477e-04
## ENSG00000162493 3.373e-03 3.317e-03 7.038e-02 3.107e-03
## There were 18, now there are 6 samples.
d110_pairwise <- sm(all_pairwise(d110, model_batch=FALSE))
d110_table <- sm(combine_de_tables(d110_pairwise))
summary(d110_table$data)
## Length Class Mode
## sh_vs_chr 46 data.frame list
d110_sig <- sm(extract_significant_genes(d110_table, according_to="deseq", p=0.1, p_type="raw"))
head(d110_sig$deseq$ups[[1]])
## ensembltranscriptid ensemblgeneid hgncsymbol
## ENSG00000143333 ENST00000367558 ENSG00000143333 RGS16
## ENSG00000120436 ENST00000366834 ENSG00000120436 GPR31
## ENSG00000099985 ENST00000215781 ENSG00000099985 OSM
## ENSG00000137834 ENST00000288840 ENSG00000137834 SMAD6
## ENSG00000177807 ENST00000368089 ENSG00000177807 KCNJ10
## ENSG00000114115 ENST00000232219 ENSG00000114115 RBP1
## description
## ENSG00000143333 regulator of G-protein signaling 16 [Source:HGNC Symbol;Acc:HGNC:9997]
## ENSG00000120436 G protein-coupled receptor 31 [Source:HGNC Symbol;Acc:HGNC:4486]
## ENSG00000099985 oncostatin M [Source:HGNC Symbol;Acc:HGNC:8506]
## ENSG00000137834 SMAD family member 6 [Source:HGNC Symbol;Acc:HGNC:6772]
## ENSG00000177807 potassium voltage-gated channel subfamily J member 10 [Source:HGNC Symbol;Acc:HGNC:6256]
## ENSG00000114115 retinol binding protein 1 [Source:HGNC Symbol;Acc:HGNC:9919]
## genebiotype deseq_logfc deseq_adjp edger_logfc
## ENSG00000143333 protein_coding 1.510 6.358e-05 1.4950
## ENSG00000120436 protein_coding 1.478 1.000e+00 1.4550
## ENSG00000099985 protein_coding 1.226 5.618e-06 1.2100
## ENSG00000137834 protein_coding 1.155 1.000e+00 1.1340
## ENSG00000177807 protein_coding 1.035 1.593e-01 1.0190
## ENSG00000114115 protein_coding 1.012 1.000e+00 0.9872
## edger_adjp limma_logfc limma_adjp basic_nummed
## ENSG00000143333 4.147e-06 1.5450 0.2282 2.7930
## ENSG00000120436 5.293e-02 1.4270 0.2438 1.1730
## ENSG00000099985 1.314e-06 1.2660 0.2282 5.6940
## ENSG00000137834 8.498e-02 1.1040 0.2282 0.7448
## ENSG00000177807 1.213e-01 0.9465 0.3169 3.0800
## ENSG00000114115 1.662e-01 1.1950 0.2322 1.0860
## basic_denmed basic_numvar basic_denvar basic_logfc basic_t
## ENSG00000143333 1.47500 1.042e-01 1.044e-01 1.319 5.790
## ENSG00000120436 -0.77380 8.287e-01 1.687e-01 1.946 2.391
## ENSG00000099985 4.19700 6.884e-02 1.701e-01 1.497 4.498
## ENSG00000137834 -0.53530 1.577e-01 9.161e-02 1.280 3.796
## ENSG00000177807 1.91800 9.209e-01 1.252e-01 1.162 1.501
## ENSG00000114115 -0.04987 1.772e-02 8.064e-01 1.136 2.286
## basic_p basic_adjp deseq_basemean deseq_lfcse deseq_stat
## ENSG00000143333 4.422e-03 4.809e-01 68.76 0.2758 5.477
## ENSG00000120436 1.034e-01 4.811e-01 16.89 0.5436 2.719
## ENSG00000099985 1.573e-02 4.809e-01 484.20 0.2032 6.031
## ENSG00000137834 2.169e-02 4.809e-01 15.79 0.4878 2.368
## ENSG00000177807 2.461e-01 5.601e-01 73.35 0.3745 2.765
## ENSG00000114115 1.442e-01 4.928e-01 21.96 0.4624 2.188
## deseq_p ebseq_fc ebseq_logfc ebseq_postfc ebseq_mean
## ENSG00000143333 4.315e-08 2.874 1.523 2.851 69.31
## ENSG00000120436 6.544e-03 2.807 1.489 2.719 16.93
## ENSG00000099985 1.634e-09 2.396 1.261 2.394 488.89
## ENSG00000137834 1.787e-02 2.256 1.174 2.201 15.95
## ENSG00000177807 5.700e-03 2.060 1.043 2.050 73.55
## ENSG00000114115 2.866e-02 2.073 1.051 2.040 22.14
## ebseq_ppee ebseq_ppde ebseq_adjp edger_logcpm edger_lr
## ENSG00000143333 4.467e-10 1.000e+00 4.467e-10 2.2710 36.250
## ENSG00000120436 2.182e-01 7.818e-01 2.182e-01 0.3850 11.210
## ENSG00000099985 1.874e-06 1.000e+00 1.874e-06 5.0590 39.490
## ENSG00000137834 1.000e+00 1.816e-12 1.000e+00 0.2622 9.693
## ENSG00000177807 7.014e-01 2.986e-01 7.014e-01 2.3790 8.339
## ENSG00000114115 2.122e-01 7.878e-01 2.122e-01 0.7058 6.987
## edger_p limma_ave limma_t limma_b limma_p
## ENSG00000143333 1.736e-09 1.99000 6.492 -0.2393 0.0002045
## ENSG00000120436 8.155e-04 -0.09402 2.825 -3.3940 0.0227200
## ENSG00000099985 3.300e-10 4.85000 5.981 0.5318 0.0003530
## ENSG00000137834 1.850e-03 -0.02053 3.616 -2.8330 0.0070230
## ENSG00000177807 3.880e-03 2.09500 2.052 -3.9700 0.0748700
## ENSG00000114115 8.210e-03 0.38370 3.042 -3.1270 0.0163500
## limma_adjp_fdr deseq_adjp_fdr edger_adjp_fdr
## ENSG00000143333 2.282e-01 7.363e-05 4.147e-06
## ENSG00000120436 2.438e-01 1.888e-01 5.294e-02
## ENSG00000099985 2.282e-01 6.505e-06 1.314e-06
## ENSG00000137834 2.282e-01 2.751e-01 8.499e-02
## ENSG00000177807 3.169e-01 1.816e-01 1.213e-01
## ENSG00000114115 2.322e-01 3.192e-01 1.662e-01
## basic_adjp_fdr lfc_meta lfc_var lfc_varbymed p_meta
## ENSG00000143333 4.809e-01 1.5280 1.951e-03 1.277e-03 6.818e-05
## ENSG00000120436 4.811e-01 1.4780 4.320e-04 2.922e-04 1.003e-02
## ENSG00000099985 4.809e-01 1.3010 2.058e-02 1.582e-02 1.177e-04
## ENSG00000137834 4.809e-01 1.1050 4.602e-03 4.164e-03 8.914e-03
## ENSG00000177807 5.600e-01 0.9865 4.913e-03 4.980e-03 2.815e-02
## ENSG00000114115 4.928e-01 1.0720 1.590e-02 1.483e-02 1.774e-02
## p_var
## ENSG00000143333 1.394e-08
## ENSG00000120436 1.290e-04
## ENSG00000099985 4.154e-08
## ENSG00000137834 6.684e-05
## ENSG00000177807 1.638e-03
## ENSG00000114115 1.060e-04
hs_tmp <- set_expt_batches(hs_inf_filt, fact="pathogenstrain")
hs_tmp2 <- sm(normalize_expt(hs_tmp, transform="log2", convert="cpm", norm="quant", filter=TRUE))
plot_pca(hs_tmp2)$plot
This block will first remove strain 2504, then 2272. The resulting subsets will be used for another round of these differential expression analyses.
## There were 18, now there are 15 samples.
remove_2504_inf_filt <- sm(normalize_expt(remove_2504_inf, filter=TRUE))
remove_2272_inf <- subset_expt(hs_inf_filt, subset="pathogenstrain!='s2272'")
## There were 18, now there are 15 samples.
remove_2272_inf_filt <- sm(normalize_expt(remove_2272_inf, filter=TRUE))
remove_both_inf <- subset_expt(remove_2504_inf, subset="pathogenstrain!='s2272'")
## There were 15, now there are 12 samples.
remove_both_inf_filt <- sm(normalize_expt(remove_both_inf, filter=TRUE))
remove_2504_uninf <- subset_expt(hs_uninf_filt, subset="pathogenstrain!='s2504'")
## There were 21, now there are 18 samples.
remove_2504_uninf_filt <- sm(normalize_expt(remove_2504_uninf, filter=TRUE))
remove_2272_uninf <- subset_expt(hs_uninf_filt, subset="pathogenstrain!='s2272'")
## There were 21, now there are 18 samples.
remove_2272_uninf_filt <- sm(normalize_expt(remove_2272_uninf, filter=TRUE))
remove_both_uninf <- subset_expt(remove_2504_uninf, subset="pathogenstrain!='s2272'")
## There were 18, now there are 15 samples.
remove_both_uninf_filt <- sm(normalize_expt(remove_both_uninf, filter=TRUE))
remove_2504_d107 <- subset_expt(remove_2504_inf, subset="donor=='d107'")
## There were 15, now there are 5 samples.
## There were 15, now there are 5 samples.
## There were 15, now there are 5 samples.
## There were 15, now there are 5 samples.
## There were 15, now there are 5 samples.
## There were 15, now there are 5 samples.
## There were 12, now there are 4 samples.
## There were 12, now there are 4 samples.
## There were 12, now there are 4 samples.
Once using batch in model, once with svaseq. In my first round of doing this, I did not print out many metrics, let us be better about this.
In these plots, we can see that the other strain is still a bit weird, particularly in donor 108.
remove_2504_inf_norm <- sm(normalize_expt(remove_2504_inf_filt, transform="log2",
convert="cpm", norm="quant"))
plot_pca(remove_2504_inf_norm)$plot
remove_2504_inf_norm <- sm(normalize_expt(remove_2504_inf_filt, transform="log2",
batch="svaseq",
norm="quant"))
plot_pca(remove_2504_inf_norm)$plot
remove_2272_inf_norm <- sm(normalize_expt(remove_2272_inf_filt, transform="log2",
convert="cpm", norm="quant"))
plot_pca(remove_2272_inf_norm)$plot
remove_2272_inf_norm <- sm(normalize_expt(remove_2272_inf_filt, transform="log2",
batch="svaseq",
norm="quant"))
plot_pca(remove_2272_inf_norm)$plot
remove_both_inf_norm <- sm(normalize_expt(remove_both_inf_filt, transform="log2",
convert="cpm", norm="quant"))
plot_pca(remove_both_inf_norm)$plot
remove_both_inf_norm <- sm(normalize_expt(remove_both_inf_filt, transform="log2",
batch="svaseq", norm="quant"))
plot_pca(remove_both_inf_norm)$plot
remove_2504_uninf_norm <- sm(normalize_expt(remove_2504_uninf_filt, transform="log2",
convert="cpm", norm="quant"))
plot_pca(remove_2504_uninf_norm)$plot
remove_2504_uninf_norm <- sm(normalize_expt(remove_2504_uninf_filt, transform="log2",
batch="svaseq",
norm="quant"))
plot_pca(remove_2504_uninf_norm)$plot
remove_2272_uninf_norm <- sm(normalize_expt(remove_2272_uninf_filt, transform="log2",
convert="cpm", norm="quant"))
plot_pca(remove_2272_uninf_norm)$plot
remove_2272_uninf_norm <- sm(normalize_expt(remove_2272_uninf_filt, transform="log2",
batch="svaseq",
norm="quant"))
plot_pca(remove_2272_uninf_norm)$plot
remove_both_uninf_norm <- sm(normalize_expt(remove_both_uninf_filt, transform="log2",
convert="cpm", norm="quant"))
plot_pca(remove_both_uninf_norm)$plot
remove_both_uninf_norm <- sm(normalize_expt(remove_both_uninf_filt, transform="log2",
batch="svaseq", norm="quant"))
plot_pca(remove_both_uninf_norm)$plot
Now let us perform the likely differential expression analyses and see how they look.
remove_2504_inf_de <- sm(all_pairwise(remove_2504_inf_filt, model_batch=TRUE))
excel_file <- glue::glue("excel/{rundate}_hs_infect_remove2504inf_batchmodel_contr-v{ver}.xlsx")
remove_2504_inf_table <- sm(combine_de_tables(remove_2504_inf_de, excel=excel_file,
keepers=keepers_inf))
excel_file <- glue::glue("excel/{rundate}_hs_infect_remove2504inf_batchmodel_sig-v{ver}.xlsx")
remove_2504_inf_sig <- sm(extract_significant_genes(remove_2504_inf_table, excel=excel_file,
according_to="deseq", p=0.1, lfc=0.6))
remove_2504_inf_de_sva <- sm(all_pairwise(remove_2504_inf_filt, model_batch="svaseq"))
excel_file <- glue::glue("excel/{rundate}_hs_infect_remove2504inf_svaseq_contr-v{ver}.xlsx")
remove_2504_inf_table_sva <- sm(combine_de_tables(remove_2504_inf_de_sva, excel=excel_file,
keepers=keepers_inf))
excel_file <- glue::glue("excel/{rundate}_hs_uninfect_remove2504uninf_batchmodel_contr-v{ver}.xlsx")
remove_2504_uninf_table <- sm(combine_de_tables(remove_2504_uninf_de, excel=excel_file,
keepers=keepers_uninf))
excel_file <- glue::glue("excel/{rundate}_hs_uninfect_remove2504uninf_batchmodel_sig-v{ver}.xlsx")
remove_2504_uninf_sig <- sm(extract_significant_genes(remove_2504_uninf_table, excel=excel_file,
according_to="deseq", p=0.1, lfc=0.6))
remove_2504_uninf_de_sva <- sm(all_pairwise(remove_2504_uninf_filt, model_batch="svaseq"))
Print a few plots describing what just happened.
pvalues <- sm(plot_de_pvals(remove_2504_uninf_table_sva$data[["ch_sh"]], type="deseq"))
pvalues$plot
remove_2272_inf_de <- sm(all_pairwise(remove_2272_inf_filt, model_batch=TRUE))
excel_file <- glue::glue("excel/{rundate}_hs_infect_remove2272inf_batchmodel_contr-v{ver}.xlsx")
remove_2272_inf_table <- sm(combine_de_tables(remove_2272_inf_de, excel=excel_file,
keepers=keepers_inf))
excel_file <- glue::glue("excel/{rundate}_hs_infect_remove2272inf_batchmodel_sig-v{ver}.xlsx")
remove_2272_inf_sig <- sm(extract_significant_genes(remove_2272_inf_table, excel=excel_file,
according_to="deseq", p=0.1, lfc=0.6))
remove_2272_inf_de_sva <- sm(all_pairwise(remove_2272_inf_filt, model_batch="svaseq"))
excel_file <- glue::glue("excel/{rundate}_hs_infect_remove2272inf_svaseq_contr-v{ver}.xlsx")
remove_2272_inf_table_sva <- sm(combine_de_tables(remove_2272_inf_de_sva, excel=excel_file,
keepers=keepers_inf))
excel_file <- glue::glue("excel/{rundate}_hs_uninfect_remove2272uninf_batchmodel_contr-v{ver}.xlsx")
remove_2272_uninf_table <- sm(combine_de_tables(remove_2272_uninf_de, excel=excel_file,
keepers=keepers_uninf))
excel_file <- glue::glue("excel/{rundate}_hs_uninfect_remove2272uninf_batchmodel_sig-v{ver}.xlsx")
remove_2272_uninf_sig <- sm(extract_significant_genes(remove_2272_uninf_table, excel=excel_file,
according_to="deseq", p=0.1, lfc=0.6))
remove_2272_uninf_de_sva <- sm(all_pairwise(remove_2272_uninf_filt, model_batch="svaseq"))
Print a few plots describing what just happened.
pvalues <- sm(plot_de_pvals(remove_2272_uninf_table_sva$data[["ch_sh"]], type="deseq"))
pvalues$plot
Repeat as above, removing both weirdo samples.
When we plot the pca/corheat though, these look a bit less clear.
remove_both_inf_norm <- sm(normalize_expt(remove_both_inf_filt, transform="log2",
convert="cpm", norm="quant"))
plot_pca(remove_both_inf_norm)$plot
remove_both_inf_norm <- sm(normalize_expt(remove_both_inf_filt, transform="log2",
batch="svaseq",
norm="quant"))
plot_pca(remove_both_inf_norm)$plot
remove_both_uninf_norm <- sm(normalize_expt(remove_both_uninf_filt, transform="log2",
convert="cpm", norm="quant"))
plot_pca(remove_both_uninf_norm)$plot
remove_both_uninf_norm <- sm(normalize_expt(remove_both_uninf_filt, transform="log2",
batch="svaseq",
norm="quant"))
plot_pca(remove_both_uninf_norm)$plot
remove_both_inf_de <- sm(all_pairwise(remove_both_inf_filt, model_batch=TRUE))
excel_file <- glue::glue("excel/{rundate}_hs_infect_removebothinf_batchmodel_contr-v{ver}.xlsx")
remove_both_inf_table <- sm(combine_de_tables(remove_both_inf_de, excel=excel_file,
keepers=keepers_inf))
excel_file <- glue::glue("excel/{rundate}_hs_infect_removebothinf_batchmodel_sig-v{ver}.xlsx")
remove_both_inf_sig <- sm(extract_significant_genes(remove_both_inf_table, excel=excel_file,
according_to="deseq", p=0.1, lfc=0.6))
remove_both_inf_de_sva <- sm(all_pairwise(remove_both_inf_filt, model_batch="svaseq"))
excel_file <- glue::glue("excel/{rundate}_hs_infect_removebothinf_svaseq_contr-v{ver}.xlsx")
remove_both_inf_table_sva <- sm(combine_de_tables(remove_both_inf_de_sva, excel=excel_file,
keepers=keepers_inf))
Repeat the diagnostic analysis of d107/108/110, but this time without the 2504/2272/both samples. The goal here is to see if there is a similar p-value distribution after removing the putatively troublesome samples.
## This function will replace the expt$expressionset slot with:
## hpgl(data)
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: hpgl
## Removing 502 low-count genes (11442 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
## The be method chose 1 surrogate variable(s).
## Attempting svaseq estimation with 1 surrogates.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## Finished running DE analyses, collecting outputs.
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Writing a legend of columns.
## Working on table 1/1: sh_vs_chr
## 20181210 a pthread error in normalize.quantiles leads me to robust.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## This function will replace the expt$expressionset slot with:
## hpgl(data)
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: hpgl
## Removing 553 low-count genes (11391 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
## The be method chose 1 surrogate variable(s).
## Attempting svaseq estimation with 1 surrogates.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## Finished running DE analyses, collecting outputs.
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Comparing analyses 1/1: sh_vs_chr
## Writing a legend of columns.
## Working on table 1/1: sh_vs_chr
## 20181210 a pthread error in normalize.quantiles leads me to robust.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
excel_file <- glue::glue("excel/{rundate}_hs_uninfect_removebothuninf_batchmodel_contr-v{ver}.xlsx")
remove_both_uninf_table <- sm(combine_de_tables(remove_both_uninf_de, excel=excel_file,
keepers=keepers_uninf))
excel_file <- glue::glue("excel/{rundate}_hs_uninfect_removebothuninf_batchmodel_sig-v{ver}.xlsx")
remove_both_uninf_sig <- sm(extract_significant_genes(remove_both_uninf_table, excel=excel_file,
according_to="deseq", p=0.1, lfc=0.6))
remove_both_uninf_de_sva <- sm(all_pairwise(remove_both_uninf_filt, model_batch="svaseq"))
## Testing method: limma.
## Adding method: limma to the set.
## Testing method: deseq.
## Adding method: deseq to the set.
## Testing method: edger.
## Adding method: edger to the set.
## Testing method: ebseq.
## Adding method: ebseq to the set.
## Testing method: basic.
## Adding method: basic to the set.
## $result
## $result$limma
## $result$limma$ch_sh
## $result$limma$ch_sh$logfc
## [1] 0.856
##
## $result$limma$ch_sh$p
## [1] 0.624
##
## $result$limma$ch_sh$adjp
## [1] 0.624
##
##
##
## $result$deseq
## $result$deseq$ch_sh
## $result$deseq$ch_sh$logfc
## [1] 0.8652
##
## $result$deseq$ch_sh$p
## [1] 0.6525
##
## $result$deseq$ch_sh$adjp
## [1] 0.5991
##
##
##
## $result$edger
## $result$edger$ch_sh
## $result$edger$ch_sh$logfc
## [1] 0.8637
##
## $result$edger$ch_sh$p
## [1] 0.6532
##
## $result$edger$ch_sh$adjp
## [1] 0.6532
##
##
##
## $result$ebseq
## $result$ebseq$ch_sh
## $result$ebseq$ch_sh$logfc
## [1] 0.8895
##
##
##
## $result$basic
## $result$basic$ch_sh
## $result$basic$ch_sh$logfc
## [1] 0.7831
##
## $result$basic$ch_sh$p
## [1] 0.6981
##
## $result$basic$ch_sh$adjp
## [1] 0.595
##
##
##
##
## $logfc
## ch_sh
## 0.7831
##
## $p
## ch_sh
## 0.6981
##
## $adjp
## ch_sh
## 0.595
## Testing method: limma.
## Adding method: limma to the set.
## Testing method: deseq.
## Adding method: deseq to the set.
## Testing method: edger.
## Adding method: edger to the set.
## Testing method: ebseq.
## Adding method: ebseq to the set.
## Testing method: basic.
## Adding method: basic to the set.
## $result
## $result$limma
## $result$limma$ch_sh
## $result$limma$ch_sh$logfc
## [1] 0.8979
##
## $result$limma$ch_sh$p
## [1] 0.7291
##
## $result$limma$ch_sh$adjp
## [1] 0.7291
##
##
##
## $result$deseq
## $result$deseq$ch_sh
## $result$deseq$ch_sh$logfc
## [1] 0.9039
##
## $result$deseq$ch_sh$p
## [1] 0.7496
##
## $result$deseq$ch_sh$adjp
## [1] 0.7614
##
##
##
## $result$edger
## $result$edger$ch_sh
## $result$edger$ch_sh$logfc
## [1] 0.9039
##
## $result$edger$ch_sh$p
## [1] 0.75
##
## $result$edger$ch_sh$adjp
## [1] 0.75
##
##
##
## $result$ebseq
## $result$ebseq$ch_sh
## $result$ebseq$ch_sh$logfc
## [1] 0.9347
##
##
##
## $result$basic
## $result$basic$ch_sh
## $result$basic$ch_sh$logfc
## [1] 0.8741
##
## $result$basic$ch_sh$p
## [1] 0.8109
##
## $result$basic$ch_sh$adjp
## [1] 0.7849
##
##
##
##
## $logfc
## ch_sh
## 0.8741
##
## $p
## ch_sh
## 0.8109
##
## $adjp
## ch_sh
## 0.7849
pvalues <- sm(plot_de_pvals(remove_both_uninf_table_sva$data[["ch_sh"]], type="deseq"))
pvalues$plot
Try building a donor*state interaction model.
I think I realized my confusion in this: the only interaction models I have used before were a experimental factor*experimental floating-point observation. If it is the case, that the following models are all x vs. factor_reference, then I guess it makes sense; but if not, then I have no clue what is going on in these.
First make sure that self-healing is the reference.
Then make some designs of potential interest.
data <- edgeR::DGEList(exprs(hs_inf_filt))
data <- edgeR::calcNormFactors(data)
design <- pData(hs_inf_filt)
design[["condition"]] <- relevel(design[["condition"]], ref="sh")
design[["donor"]] <- as.factor(design[["donor"]])
design[["donor"]] <- relevel(design[["donor"]], ref="d107")
chsh <- model.matrix(object=~0+condition, data=design)
head(chsh) ## Looks good to me.
## conditionsh conditionchr
## chr_5430_d108 0 1
## chr_5397_d108 0 1
## chr_2504_d108 0 1
## sh_2272_d108 1 0
## sh_1022_d108 1 0
## sh_2189_d108 1 0
## donord107 donord108 donord110
## chr_5430_d108 0 1 0
## chr_5397_d108 0 1 0
## chr_2504_d108 0 1 0
## sh_2272_d108 0 1 0
## sh_1022_d108 0 1 0
## sh_2189_d108 0 1 0
cond_donor_inter <- model.matrix(object=~0+condition+donor+condition:donor, data=design)
head(cond_donor_inter)
## conditionsh conditionchr donord108 donord110
## chr_5430_d108 0 1 1 0
## chr_5397_d108 0 1 1 0
## chr_2504_d108 0 1 1 0
## sh_2272_d108 1 0 1 0
## sh_1022_d108 1 0 1 0
## sh_2189_d108 1 0 1 0
## conditionchr:donord108 conditionchr:donord110
## chr_5430_d108 1 0
## chr_5397_d108 1 0
## chr_2504_d108 1 0
## sh_2272_d108 0 0
## sh_1022_d108 0 0
## sh_2189_d108 0 0
## Why are there no conditionchr:donor107 conditionsh:donor107 columns?
## I know that if I relevel the donor factor to set the reference to another donor, that changes
## but this still does not make sense to me.
## I think the above model should be the same as:
cond_donor_interv2 <- model.matrix(object=~0+condition*donor, data=design)
head(cond_donor_interv2)
## conditionsh conditionchr donord108 donord110
## chr_5430_d108 0 1 1 0
## chr_5397_d108 0 1 1 0
## chr_2504_d108 0 1 1 0
## sh_2272_d108 1 0 1 0
## sh_1022_d108 1 0 1 0
## sh_2189_d108 1 0 1 0
## conditionchr:donord108 conditionchr:donord110
## chr_5430_d108 1 0
## chr_5397_d108 1 0
## chr_2504_d108 1 0
## sh_2272_d108 0 0
## sh_1022_d108 0 0
## sh_2189_d108 0 0
testthat::expect_equal(cond_donor_inter, cond_donor_interv2)
## So as I understand it, if I do a test of coef='conditionchr', this is looking at
## chr vs. sh
## If I do coef='conditionchr:donor110' this is the donor effect for chr/sh for donor110?
## If this is true, then how do I get this for donor107?
## Perhaps my understanding is backwards?
donor_cond_inter <- model.matrix(object=~donor+condition+donor:condition, data=design)
donor_cond_interv2 <- model.matrix(object=~donor*condition, data=design)
testthat::expect_equal(donor_cond_inter, donor_cond_interv2)
head(donor_cond_inter)
## (Intercept) donord108 donord110 conditionchr
## chr_5430_d108 1 1 0 1
## chr_5397_d108 1 1 0 1
## chr_2504_d108 1 1 0 1
## sh_2272_d108 1 1 0 0
## sh_1022_d108 1 1 0 0
## sh_2189_d108 1 1 0 0
## donord108:conditionchr donord110:conditionchr
## chr_5430_d108 1 0
## chr_5397_d108 1 0
## chr_2504_d108 1 0
## sh_2272_d108 0 0
## sh_1022_d108 0 0
## sh_2189_d108 0 0
disp <- edgeR::estimateDisp(data, design=cond_donor_inter)
fit <- edgeR::glmQLFit(disp, cond_donor_inter)
## I think this is my global chr/sh
chr_sh_qlf <- edgeR::glmQLFTest(fit, coef="conditionchr")
chr_sh_result <- edgeR::topTags(chr_sh_qlf)
summary(chr_sh_result$table)
## logFC logCPM F PValue
## Min. :-14.9 Min. :5.04 Min. :21787 Min. :2.58e-28
## 1st Qu.:-14.3 1st Qu.:5.70 1st Qu.:22428 1st Qu.:7.45e-28
## Median :-13.6 Median :6.35 Median :23358 Median :1.01e-27
## Mean :-13.8 Mean :6.12 Mean :23693 Mean :1.05e-27
## 3rd Qu.:-13.4 3rd Qu.:6.51 3rd Qu.:24216 3rd Qu.:1.42e-27
## Max. :-13.1 Max. :6.87 Max. :27493 Max. :1.80e-27
## FDR
## Min. :8.93e-25
## 1st Qu.:8.93e-25
## Median :8.93e-25
## Mean :8.93e-25
## 3rd Qu.:8.93e-25
## Max. :8.93e-25
## [1] "conditionchr:donord108"
d110_ch_qlf <- edgeR::glmQLFTest(fit, coef="conditionchr:donord110")
d110_ch_result <- edgeR::topTags(d110_ch_qlf)
summary(d110_ch_result$table)
## logFC logCPM F PValue
## Min. :-1.305 Min. :0.762 Min. :10.6 Min. :0.000729
## 1st Qu.:-0.659 1st Qu.:2.330 1st Qu.:10.9 1st Qu.:0.001731
## Median :-0.484 Median :3.884 Median :12.9 Median :0.002407
## Mean :-0.393 Mean :3.428 Mean :13.0 Mean :0.002769
## 3rd Qu.:-0.395 3rd Qu.:4.252 3rd Qu.:13.9 3rd Qu.:0.004282
## Max. : 1.133 Max. :5.837 Max. :17.0 Max. :0.004813
## FDR
## Min. :1
## 1st Qu.:1
## Median :1
## Mean :1
## 3rd Qu.:1
## Max. :1
summarized <- DESeq2::DESeqDataSetFromMatrix(countData=exprs(hs_inf_filt),
colData=design,
design=~condition*donor)
## converting counts to integer mode
dataset <- DESeq2::DESeqDataSet(se=summarized, design=~condition*donor)
run <- DESeq2::DESeq(dataset)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## [1] "Intercept" "condition_chr_vs_sh"
## [3] "donor_d108_vs_d107" "donor_d110_vs_d107"
## [5] "conditionchr.donord108" "conditionchr.donord110"
## Length Class Mode
## 6 DESeqResults S4
## log2 fold change (MLE): condition chr vs sh
## Wald test p-value: condition chr vs sh
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE
## <numeric> <numeric> <numeric>
## ENSG00000000419 214.886619915588 -0.0934303786250254 0.12300393077645
## ENSG00000000457 318.115512380986 -0.068815027486115 0.120178565849457
## ENSG00000000460 102.031345099905 0.197578912145072 0.203833151567596
## ENSG00000000938 5676.81912456484 0.0269256146826752 0.197204584725487
## ENSG00000000971 53.5624271330181 0.344010769796456 0.293375111918845
## ENSG00000001036 460.076692835547 0.0156360632261453 0.148896600722685
## stat pvalue padj
## <numeric> <numeric> <numeric>
## ENSG00000000419 -0.759572300131027 0.447510281800118 0.997804083570582
## ENSG00000000457 -0.572606496006257 0.566911160357569 0.997804083570582
## ENSG00000000460 0.969316868358142 0.332387115533936 0.997804083570582
## ENSG00000000938 0.136536453856568 0.891397208363037 0.997804083570582
## ENSG00000000971 1.17259697847723 0.240957461669005 0.997804083570582
## ENSG00000001036 0.105012895863667 0.916365575881328 0.997804083570582
## Maybe my thinking is just a little wrong: is this the interaction of chr/sh with d110/d107?
## That would make more sense I think.
res_donor110_chrsh <- DESeq2::results(run, name="conditionchr.donord110")
summary(res_donor110_chrsh)
## Length Class Mode
## 6 DESeqResults S4
## log2 fold change (MLE): conditionchr.donord110
## Wald test p-value: conditionchr.donord110
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE
## <numeric> <numeric> <numeric>
## ENSG00000000419 214.886619915588 0.295162897632013 0.172516639204641
## ENSG00000000457 318.115512380986 0.108194027119009 0.172637547612825
## ENSG00000000460 102.031345099905 0.376710409795487 0.293516835681214
## ENSG00000000938 5676.81912456484 0.0530822519196353 0.278697424287239
## ENSG00000000971 53.5624271330181 -0.388785003193931 0.436191950074646
## ENSG00000001036 460.076692835547 0.10657602357023 0.206872518783626
## stat pvalue padj
## <numeric> <numeric> <numeric>
## ENSG00000000419 1.71092422732562 0.0870951015388544 0.999936246663048
## ENSG00000000457 0.626712025368067 0.530848019781775 0.999936246663048
## ENSG00000000460 1.2834371456792 0.199338967059447 0.999936246663048
## ENSG00000000938 0.19046552746367 0.848944353796083 0.999936246663048
## ENSG00000000971 -0.891316318715644 0.372759496477825 0.999936246663048
## ENSG00000001036 0.515177289844383 0.606429137146659 0.999936246663048
res_donor108_chrsh <- DESeq2::results(run, name="conditionchr.donord108")
summary(res_donor110_chrsh)
## Length Class Mode
## 6 DESeqResults S4
## log2 fold change (MLE): conditionchr.donord110
## Wald test p-value: conditionchr.donord110
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE
## <numeric> <numeric> <numeric>
## ENSG00000000419 214.886619915588 0.295162897632013 0.172516639204641
## ENSG00000000457 318.115512380986 0.108194027119009 0.172637547612825
## ENSG00000000460 102.031345099905 0.376710409795487 0.293516835681214
## ENSG00000000938 5676.81912456484 0.0530822519196353 0.278697424287239
## ENSG00000000971 53.5624271330181 -0.388785003193931 0.436191950074646
## ENSG00000001036 460.076692835547 0.10657602357023 0.206872518783626
## stat pvalue padj
## <numeric> <numeric> <numeric>
## ENSG00000000419 1.71092422732562 0.0870951015388544 0.999936246663048
## ENSG00000000457 0.626712025368067 0.530848019781775 0.999936246663048
## ENSG00000000460 1.2834371456792 0.199338967059447 0.999936246663048
## ENSG00000000938 0.19046552746367 0.848944353796083 0.999936246663048
## ENSG00000000971 -0.891316318715644 0.372759496477825 0.999936246663048
## ENSG00000001036 0.515177289844383 0.606429137146659 0.999936246663048
## Length Class Mode
## 6 DESeqResults S4
## log2 fold change (MLE): donor d110 vs d107
## Wald test p-value: donor d110 vs d107
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE
## <numeric> <numeric> <numeric>
## ENSG00000000419 214.886619915588 -0.113766010123853 0.120483955620923
## ENSG00000000457 318.115512380986 -0.456556848180769 0.121122399221616
## ENSG00000000460 102.031345099905 -0.5010401182924 0.210579406027488
## ENSG00000000938 5676.81912456484 0.505694298463597 0.197026331645792
## ENSG00000000971 53.5624271330181 -0.842424805273729 0.308759375189539
## ENSG00000001036 460.076692835547 0.568390991546202 0.145482651751354
## stat pvalue
## <numeric> <numeric>
## ENSG00000000419 -0.944241990873823 0.345046001807488
## ENSG00000000457 -3.76938411982257 0.000163650866259048
## ENSG00000000460 -2.3793405430491 0.0173436449623682
## ENSG00000000938 2.56663307000365 0.0102691214578726
## ENSG00000000971 -2.72841854520721 0.00636388048892608
## ENSG00000001036 3.90693312710334 9.3475010176665e-05
## padj
## <numeric>
## ENSG00000000419 0.476062375176819
## ENSG00000000457 0.000739090391073861
## ENSG00000000460 0.0422654009174772
## ENSG00000000938 0.0271711897470546
## ENSG00000000971 0.0181060512075926
## ENSG00000001036 0.000448806683671478
## Comp.1 Comp.2
## [1,] "31.7:ENSG00000129824" "22.16:ENSG00000100079"
## [2,] "30.31:ENSG00000012817" "16.86:ENSG00000128283"
## [3,] "30:ENSG00000185736" "15.92:ENSG00000170439"
## [4,] "29.65:ENSG00000067048" "13.44:ENSG00000156113"
## [5,] "27.68:ENSG00000114374" "13.39:ENSG00000007350"
## [6,] "25.19:ENSG00000183878" "13.25:ENSG00000123219"
## [7,] "21.54:ENSG00000198692" "13.11:ENSG00000205809"
## [8,] "20.86:ENSG00000067646" "13.01:ENSG00000165810"
## [9,] "18.84:ENSG00000074410" "12.6:ENSG00000150687"
## [10,] "16.59:ENSG00000179344" "12.6:ENSG00000167880"
## [11,] "15.01:ENSG00000134184" "12.36:ENSG00000150510"
## [12,] "14.9:ENSG00000196735" "12.1:ENSG00000167094"
## [13,] "14.46:ENSG00000122641" "12.09:ENSG00000146918"
## [14,] "14.37:ENSG00000121380" "11.77:ENSG00000173372"
## [15,] "14.37:ENSG00000161055" "11.67:ENSG00000214944"
## [16,] "13.88:ENSG00000154620" "11.34:ENSG00000174945"
## [17,] "13.76:ENSG00000166923" "11.01:ENSG00000240247"
## [18,] "13.5:ENSG00000123453" "10.99:ENSG00000205810"
## [19,] "13.06:ENSG00000123454" "10.95:ENSG00000149635"
## [20,] "13.02:ENSG00000108688" "10.72:ENSG00000102837"
## [21,] "12.49:ENSG00000131435" "10.69:ENSG00000109956"
## [22,] "12.47:ENSG00000136449" "10.68:ENSG00000061337"
## [23,] "12.22:ENSG00000110092" "10.6:ENSG00000206047"
## [24,] "12.05:ENSG00000120645" "10.51:ENSG00000205846"
## [25,] "11.83:ENSG00000205057" "10.5:ENSG00000183542"
## [26,] "11.54:ENSG00000123689" "10.04:ENSG00000113721"
## [27,] "11.34:ENSG00000170160" "9.932:ENSG00000101188"
## [28,] "11.32:ENSG00000113302" "9.836:ENSG00000113657"
## [29,] "11.2:ENSG00000102970" "9.783:ENSG00000196421"
## [30,] "11.08:ENSG00000198203" "9.572:ENSG00000145708"
## [31,] "11.08:ENSG00000184371" "9.241:ENSG00000133962"
## [32,] "11.07:ENSG00000149635" "9.213:ENSG00000148488"
## [33,] "10.94:ENSG00000204644" "8.953:ENSG00000125780"
## [34,] "10.83:ENSG00000050165" "8.802:ENSG00000243772"
## [35,] "10.66:ENSG00000150510" "8.796:ENSG00000170160"
## [36,] "10.47:ENSG00000182871" "8.76:ENSG00000162383"
## [37,] "10.42:ENSG00000134668" "8.719:ENSG00000108370"
## [38,] "10.23:ENSG00000164530" "8.705:ENSG00000054793"
## [39,] "10.22:ENSG00000178860" "8.564:ENSG00000168329"
## [40,] "10.19:ENSG00000109471" "8.503:ENSG00000184349"
## [41,] "10.17:ENSG00000197121" "8.377:ENSG00000100450"
## [42,] "10.15:ENSG00000164400" "8.224:ENSG00000173239"
## [43,] "9.967:ENSG00000180616" "8.088:ENSG00000086548"
## [44,] "9.874:ENSG00000069482" "8.001:ENSG00000111199"
## [45,] "9.8:ENSG00000133687" "7.934:ENSG00000239839"
## [46,] "9.786:ENSG00000134716" "7.928:ENSG00000139567"
## [47,] "9.778:ENSG00000115009" "7.858:ENSG00000188820"
## [48,] "9.737:ENSG00000197822" "7.744:ENSG00000101425"
## [49,] "9.705:ENSG00000172724" "7.624:ENSG00000176083"
## [50,] "9.686:ENSG00000181634" "7.616:ENSG00000169908"
annot <- fData(hs_inf_filt)
comp1_genes <- unlist(strsplit(x=as.character(scores$highest[, 1]), split=":"))
idx <- grep(pattern="^ENSG", x=comp1_genes)
comp1_genes <- comp1_genes[idx]
comp1_high_scores <- exclude_genes_expt(hs_inf_filt, ids=comp1_genes, method="keep")
## Before removal, there were 11944 entries.
## Now there are 50 entries.
## Percent kept: 0.140, 0.119, 0.075, 0.131, 0.083, 0.119, 0.017, 0.016, 0.009, 0.019, 0.010, 0.012, 0.210, 0.213, 0.183, 0.252, 0.162, 0.221
## Percent removed: 99.860, 99.881, 99.925, 99.869, 99.917, 99.881, 99.983, 99.984, 99.991, 99.981, 99.990, 99.988, 99.790, 99.787, 99.817, 99.748, 99.838, 99.779
ensembl_gene_id | description | |
---|---|---|
ENSG00000129824 | ENSG00000129824 | ribosomal protein S4, Y-linked 1 [Source:HGNC Symbol;Acc:HGNC:10425] |
ENSG00000012817 | ENSG00000012817 | lysine demethylase 5D [Source:HGNC Symbol;Acc:HGNC:11115] |
ENSG00000185736 | ENSG00000185736 | adenosine deaminase, RNA specific B2 (inactive) [Source:HGNC Symbol;Acc:HGNC:227] |
ENSG00000067048 | ENSG00000067048 | DEAD-box helicase 3, Y-linked [Source:HGNC Symbol;Acc:HGNC:2699] |
ENSG00000114374 | ENSG00000114374 | ubiquitin specific peptidase 9, Y-linked [Source:HGNC Symbol;Acc:HGNC:12633] |
ENSG00000183878 | ENSG00000183878 | ubiquitously transcribed tetratricopeptide repeat containing, Y-linked [Source:HGNC Symbol;Acc:HGNC:12638] |
ENSG00000198692 | ENSG00000198692 | eukaryotic translation initiation factor 1A, Y-linked [Source:HGNC Symbol;Acc:HGNC:3252] |
ENSG00000067646 | ENSG00000067646 | zinc finger protein, Y-linked [Source:HGNC Symbol;Acc:HGNC:12870] |
ENSG00000074410 | ENSG00000074410 | carbonic anhydrase 12 [Source:HGNC Symbol;Acc:HGNC:1371] |
ENSG00000179344 | ENSG00000179344 | major histocompatibility complex, class II, DQ beta 1 [Source:HGNC Symbol;Acc:HGNC:4944] |
ENSG00000134184 | ENSG00000134184 | glutathione S-transferase mu 1 [Source:HGNC Symbol;Acc:HGNC:4632] |
ENSG00000196735 | ENSG00000196735 | major histocompatibility complex, class II, DQ alpha 1 [Source:HGNC Symbol;Acc:HGNC:4942] |
ENSG00000122641 | ENSG00000122641 | inhibin beta A subunit [Source:HGNC Symbol;Acc:HGNC:6066] |
ENSG00000121380 | ENSG00000121380 | BCL2 like 14 [Source:HGNC Symbol;Acc:HGNC:16657] |
ENSG00000161055 | ENSG00000161055 | secretoglobin family 3A member 1 [Source:HGNC Symbol;Acc:HGNC:18384] |
ENSG00000154620 | ENSG00000154620 | thymosin beta 4, Y-linked [Source:HGNC Symbol;Acc:HGNC:11882] |
ENSG00000166923 | ENSG00000166923 | gremlin 1, DAN family BMP antagonist [Source:HGNC Symbol;Acc:HGNC:2001] |
ENSG00000123453 | ENSG00000123453 | sarcosine dehydrogenase [Source:HGNC Symbol;Acc:HGNC:10536] |
ENSG00000123454 | ENSG00000123454 | dopamine beta-hydroxylase [Source:HGNC Symbol;Acc:HGNC:2689] |
ENSG00000108688 | ENSG00000108688 | C-C motif chemokine ligand 7 [Source:HGNC Symbol;Acc:HGNC:10634] |
ENSG00000131435 | ENSG00000131435 | PDZ and LIM domain 4 [Source:HGNC Symbol;Acc:HGNC:16501] |
ENSG00000136449 | ENSG00000136449 | MYCBP associated protein [Source:HGNC Symbol;Acc:HGNC:19677] |
ENSG00000110092 | ENSG00000110092 | cyclin D1 [Source:HGNC Symbol;Acc:HGNC:1582] |
ENSG00000120645 | ENSG00000120645 | IQ motif and Sec7 domain 3 [Source:HGNC Symbol;Acc:HGNC:29193] |
ENSG00000205057 | ENSG00000205057 | chronic lymphocytic leukemia up-regulated 1 opposite strand [Source:HGNC Symbol;Acc:HGNC:24070] |
ENSG00000123689 | ENSG00000123689 | G0/G1 switch 2 [Source:HGNC Symbol;Acc:HGNC:30229] |
ENSG00000170160 | ENSG00000170160 | coiled-coil domain containing 144A [Source:HGNC Symbol;Acc:HGNC:29072] |
ENSG00000113302 | ENSG00000113302 | interleukin 12B [Source:HGNC Symbol;Acc:HGNC:5970] |
ENSG00000102970 | ENSG00000102970 | C-C motif chemokine ligand 17 [Source:HGNC Symbol;Acc:HGNC:10615] |
ENSG00000198203 | ENSG00000198203 | sulfotransferase family 1C member 2 [Source:HGNC Symbol;Acc:HGNC:11456] |
ENSG00000184371 | ENSG00000184371 | colony stimulating factor 1 [Source:HGNC Symbol;Acc:HGNC:2432] |
ENSG00000149635 | ENSG00000149635 | osteoclast stimulatory transmembrane protein [Source:HGNC Symbol;Acc:HGNC:16116] |
ENSG00000204644 | ENSG00000204644 | ZFP57 zinc finger protein [Source:HGNC Symbol;Acc:HGNC:18791] |
ENSG00000050165 | ENSG00000050165 | dickkopf WNT signaling pathway inhibitor 3 [Source:HGNC Symbol;Acc:HGNC:2893] |
ENSG00000150510 | ENSG00000150510 | family with sequence similarity 124 member A [Source:HGNC Symbol;Acc:HGNC:26413] |
ENSG00000182871 | ENSG00000182871 | collagen type XVIII alpha 1 chain [Source:HGNC Symbol;Acc:HGNC:2195] |
ENSG00000134668 | ENSG00000134668 | SPOC domain containing 1 [Source:HGNC Symbol;Acc:HGNC:26338] |
ENSG00000164530 | ENSG00000164530 | peptidase inhibitor 16 [Source:HGNC Symbol;Acc:HGNC:21245] |
ENSG00000178860 | ENSG00000178860 | musculin [Source:HGNC Symbol;Acc:HGNC:7321] |
ENSG00000109471 | ENSG00000109471 | interleukin 2 [Source:HGNC Symbol;Acc:HGNC:6001] |
ENSG00000197121 | ENSG00000197121 | post-GPI attachment to proteins 1 [Source:HGNC Symbol;Acc:HGNC:25712] |
ENSG00000164400 | ENSG00000164400 | colony stimulating factor 2 [Source:HGNC Symbol;Acc:HGNC:2434] |
ENSG00000180616 | ENSG00000180616 | somatostatin receptor 2 [Source:HGNC Symbol;Acc:HGNC:11331] |
ENSG00000069482 | ENSG00000069482 | galanin and GMAP prepropeptide [Source:HGNC Symbol;Acc:HGNC:4114] |
ENSG00000133687 | ENSG00000133687 | transmembrane and tetratricopeptide repeat containing 1 [Source:HGNC Symbol;Acc:HGNC:24099] |
ENSG00000134716 | ENSG00000134716 | cytochrome P450 family 2 subfamily J member 2 [Source:HGNC Symbol;Acc:HGNC:2634] |
ENSG00000115009 | ENSG00000115009 | C-C motif chemokine ligand 20 [Source:HGNC Symbol;Acc:HGNC:10619] |
ENSG00000197822 | ENSG00000197822 | occludin [Source:HGNC Symbol;Acc:HGNC:8104] |
ENSG00000172724 | ENSG00000172724 | C-C motif chemokine ligand 19 [Source:HGNC Symbol;Acc:HGNC:10617] |
ENSG00000181634 | ENSG00000181634 | tumor necrosis factor superfamily member 15 [Source:HGNC Symbol;Acc:HGNC:11931] |
comp2_genes <- unlist(strsplit(x=as.character(scores$highest[, 2]), split=":"))
idx <- grep(pattern="^ENSG", x=comp2_genes)
comp2_genes <- comp2_genes[idx]
comp2_high_scores <- exclude_genes_expt(hs_inf_filt, ids=comp2_genes, method="keep")
## Before removal, there were 11944 entries.
## Now there are 50 entries.
## Percent kept: 0.097, 0.087, 0.077, 0.094, 0.077, 0.088, 0.053, 0.050, 0.059, 0.055, 0.063, 0.057, 0.024, 0.017, 0.015, 0.021, 0.017, 0.017
## Percent removed: 99.903, 99.913, 99.923, 99.906, 99.923, 99.912, 99.947, 99.950, 99.941, 99.945, 99.937, 99.943, 99.976, 99.983, 99.985, 99.979, 99.983, 99.983
ensembl_gene_id | description | |
---|---|---|
ENSG00000100079 | ENSG00000100079 | galectin 2 [Source:HGNC Symbol;Acc:HGNC:6562] |
ENSG00000128283 | ENSG00000128283 | CDC42 effector protein 1 [Source:HGNC Symbol;Acc:HGNC:17014] |
ENSG00000170439 | ENSG00000170439 | methyltransferase like 7B [Source:HGNC Symbol;Acc:HGNC:28276] |
ENSG00000156113 | ENSG00000156113 | potassium calcium-activated channel subfamily M alpha 1 [Source:HGNC Symbol;Acc:HGNC:6284] |
ENSG00000007350 | ENSG00000007350 | transketolase like 1 [Source:HGNC Symbol;Acc:HGNC:11835] |
ENSG00000123219 | ENSG00000123219 | centromere protein K [Source:HGNC Symbol;Acc:HGNC:29479] |
ENSG00000205809 | ENSG00000205809 | killer cell lectin like receptor C2 [Source:HGNC Symbol;Acc:HGNC:6375] |
ENSG00000165810 | ENSG00000165810 | butyrophilin like 9 [Source:HGNC Symbol;Acc:HGNC:24176] |
ENSG00000150687 | ENSG00000150687 | protease, serine 23 [Source:HGNC Symbol;Acc:HGNC:14370] |
ENSG00000167880 | ENSG00000167880 | envoplakin [Source:HGNC Symbol;Acc:HGNC:3503] |
ENSG00000150510 | ENSG00000150510 | family with sequence similarity 124 member A [Source:HGNC Symbol;Acc:HGNC:26413] |
ENSG00000167094 | ENSG00000167094 | tetratricopeptide repeat domain 16 [Source:HGNC Symbol;Acc:HGNC:26536] |
ENSG00000146918 | ENSG00000146918 | non-SMC condensin II complex subunit G2 [Source:HGNC Symbol;Acc:HGNC:21904] |
ENSG00000173372 | ENSG00000173372 | complement C1q A chain [Source:HGNC Symbol;Acc:HGNC:1241] |
ENSG00000214944 | ENSG00000214944 | Rho guanine nucleotide exchange factor 28 [Source:HGNC Symbol;Acc:HGNC:30322] |
ENSG00000174945 | ENSG00000174945 | archaelysin family metallopeptidase 1 [Source:HGNC Symbol;Acc:HGNC:22231] |
ENSG00000240247 | ENSG00000240247 | defensin alpha 1B [Source:HGNC Symbol;Acc:HGNC:33596] |
ENSG00000205810 | ENSG00000205810 | killer cell lectin like receptor C3 [Source:HGNC Symbol;Acc:HGNC:6376] |
ENSG00000149635 | ENSG00000149635 | osteoclast stimulatory transmembrane protein [Source:HGNC Symbol;Acc:HGNC:16116] |
ENSG00000102837 | ENSG00000102837 | olfactomedin 4 [Source:HGNC Symbol;Acc:HGNC:17190] |
ENSG00000109956 | ENSG00000109956 | beta-1,3-glucuronyltransferase 1 [Source:HGNC Symbol;Acc:HGNC:921] |
ENSG00000061337 | ENSG00000061337 | leucine zipper tumor suppressor 1 [Source:HGNC Symbol;Acc:HGNC:13861] |
ENSG00000206047 | ENSG00000206047 | defensin alpha 1 [Source:HGNC Symbol;Acc:HGNC:2761] |
ENSG00000205846 | ENSG00000205846 | C-type lectin domain family 6 member A [Source:HGNC Symbol;Acc:HGNC:14556] |
ENSG00000183542 | ENSG00000183542 | killer cell lectin like receptor C4 [Source:HGNC Symbol;Acc:HGNC:6377] |
ENSG00000113721 | ENSG00000113721 | platelet derived growth factor receptor beta [Source:HGNC Symbol;Acc:HGNC:8804] |
ENSG00000101188 | ENSG00000101188 | neurotensin receptor 1 [Source:HGNC Symbol;Acc:HGNC:8039] |
ENSG00000113657 | ENSG00000113657 | dihydropyrimidinase like 3 [Source:HGNC Symbol;Acc:HGNC:3015] |
ENSG00000196421 | ENSG00000196421 | long intergenic non-protein coding RNA 176 [Source:HGNC Symbol;Acc:HGNC:27655] |
ENSG00000145708 | ENSG00000145708 | corticotropin releasing hormone binding protein [Source:HGNC Symbol;Acc:HGNC:2356] |
ENSG00000133962 | ENSG00000133962 | cation channel sperm associated auxiliary subunit beta [Source:HGNC Symbol;Acc:HGNC:20500] |
ENSG00000148488 | ENSG00000148488 | ST8 alpha-N-acetyl-neuraminide alpha-2,8-sialyltransferase 6 [Source:HGNC Symbol;Acc:HGNC:23317] |
ENSG00000125780 | ENSG00000125780 | transglutaminase 3 [Source:HGNC Symbol;Acc:HGNC:11779] |
ENSG00000243772 | ENSG00000243772 | killer cell immunoglobulin like receptor, two Ig domains and long cytoplasmic tail 3 [Source:HGNC Symbol;Acc:HGNC:6331] |
ENSG00000170160 | ENSG00000170160 | coiled-coil domain containing 144A [Source:HGNC Symbol;Acc:HGNC:29072] |
ENSG00000162383 | ENSG00000162383 | solute carrier family 1 member 7 [Source:HGNC Symbol;Acc:HGNC:10945] |
ENSG00000108370 | ENSG00000108370 | regulator of G-protein signaling 9 [Source:HGNC Symbol;Acc:HGNC:10004] |
ENSG00000054793 | ENSG00000054793 | ATPase phospholipid transporting 9A (putative) [Source:HGNC Symbol;Acc:HGNC:13540] |
ENSG00000168329 | ENSG00000168329 | C-X3-C motif chemokine receptor 1 [Source:HGNC Symbol;Acc:HGNC:2558] |
ENSG00000184349 | ENSG00000184349 | ephrin A5 [Source:HGNC Symbol;Acc:HGNC:3225] |
ENSG00000100450 | ENSG00000100450 | granzyme H [Source:HGNC Symbol;Acc:HGNC:4710] |
ENSG00000173239 | ENSG00000173239 | lipase family member M [Source:HGNC Symbol;Acc:HGNC:23455] |
ENSG00000086548 | ENSG00000086548 | carcinoembryonic antigen related cell adhesion molecule 6 [Source:HGNC Symbol;Acc:HGNC:1818] |
ENSG00000111199 | ENSG00000111199 | transient receptor potential cation channel subfamily V member 4 [Source:HGNC Symbol;Acc:HGNC:18083] |
ENSG00000239839 | ENSG00000239839 | defensin alpha 3 [Source:HGNC Symbol;Acc:HGNC:2762] |
ENSG00000139567 | ENSG00000139567 | activin A receptor like type 1 [Source:HGNC Symbol;Acc:HGNC:175] |
ENSG00000188820 | ENSG00000188820 | family with sequence similarity 26 member F [Source:HGNC Symbol;Acc:HGNC:33391] |
ENSG00000101425 | ENSG00000101425 | bactericidal/permeability-increasing protein [Source:HGNC Symbol;Acc:HGNC:1095] |
ENSG00000176083 | ENSG00000176083 | zinc finger protein 683 [Source:HGNC Symbol;Acc:HGNC:28495] |
ENSG00000169908 | ENSG00000169908 | transmembrane 4 L six family member 1 [Source:HGNC Symbol;Acc:HGNC:11853] |
comp3_genes <- unlist(strsplit(x=as.character(scores$highest[, 3]), split=":"))
idx <- grep(pattern="^ENSG", x=comp3_genes)
comp3_genes <- comp3_genes[idx]
comp3_high_scores <- exclude_genes_expt(hs_inf_filt, ids=comp3_genes, method="keep")
## Before removal, there were 11944 entries.
## Now there are 50 entries.
## Percent kept: 0.401, 0.316, 0.140, 0.384, 0.155, 0.370, 0.275, 0.242, 0.094, 0.218, 0.125, 0.158, 0.486, 0.395, 0.215, 0.493, 0.271, 0.316
## Percent removed: 99.599, 99.684, 99.860, 99.616, 99.845, 99.630, 99.725, 99.758, 99.906, 99.782, 99.875, 99.842, 99.514, 99.605, 99.785, 99.507, 99.729, 99.684
ensembl_gene_id | description | |
---|---|---|
ENSG00000137673 | ENSG00000137673 | matrix metallopeptidase 7 [Source:HGNC Symbol;Acc:HGNC:7174] |
ENSG00000113302 | ENSG00000113302 | interleukin 12B [Source:HGNC Symbol;Acc:HGNC:5970] |
ENSG00000169908 | ENSG00000169908 | transmembrane 4 L six family member 1 [Source:HGNC Symbol;Acc:HGNC:11853] |
ENSG00000074410 | ENSG00000074410 | carbonic anhydrase 12 [Source:HGNC Symbol;Acc:HGNC:1371] |
ENSG00000122641 | ENSG00000122641 | inhibin beta A subunit [Source:HGNC Symbol;Acc:HGNC:6066] |
ENSG00000166923 | ENSG00000166923 | gremlin 1, DAN family BMP antagonist [Source:HGNC Symbol;Acc:HGNC:2001] |
ENSG00000106178 | ENSG00000106178 | C-C motif chemokine ligand 24 [Source:HGNC Symbol;Acc:HGNC:10623] |
ENSG00000125144 | ENSG00000125144 | metallothionein 1G [Source:HGNC Symbol;Acc:HGNC:7399] |
ENSG00000197506 | ENSG00000197506 | solute carrier family 28 member 3 [Source:HGNC Symbol;Acc:HGNC:16484] |
ENSG00000136695 | ENSG00000136695 | interleukin 36 receptor antagonist [Source:HGNC Symbol;Acc:HGNC:15561] |
ENSG00000205362 | ENSG00000205362 | metallothionein 1A [Source:HGNC Symbol;Acc:HGNC:7393] |
ENSG00000187848 | ENSG00000187848 | purinergic receptor P2X 2 [Source:HGNC Symbol;Acc:HGNC:15459] |
ENSG00000111052 | ENSG00000111052 | lin-7 homolog A, crumbs cell polarity complex component [Source:HGNC Symbol;Acc:HGNC:17787] |
ENSG00000069482 | ENSG00000069482 | galanin and GMAP prepropeptide [Source:HGNC Symbol;Acc:HGNC:4114] |
ENSG00000113657 | ENSG00000113657 | dihydropyrimidinase like 3 [Source:HGNC Symbol;Acc:HGNC:3015] |
ENSG00000186081 | ENSG00000186081 | keratin 5 [Source:HGNC Symbol;Acc:HGNC:6442] |
ENSG00000148677 | ENSG00000148677 | ankyrin repeat domain 1 [Source:HGNC Symbol;Acc:HGNC:15819] |
ENSG00000187134 | ENSG00000187134 | aldo-keto reductase family 1 member C1 [Source:HGNC Symbol;Acc:HGNC:384] |
ENSG00000105976 | ENSG00000105976 | MET proto-oncogene, receptor tyrosine kinase [Source:HGNC Symbol;Acc:HGNC:7029] |
ENSG00000105855 | ENSG00000105855 | integrin subunit beta 8 [Source:HGNC Symbol;Acc:HGNC:6163] |
ENSG00000172724 | ENSG00000172724 | C-C motif chemokine ligand 19 [Source:HGNC Symbol;Acc:HGNC:10617] |
ENSG00000133048 | ENSG00000133048 | chitinase 3 like 1 [Source:HGNC Symbol;Acc:HGNC:1932] |
ENSG00000100234 | ENSG00000100234 | TIMP metallopeptidase inhibitor 3 [Source:HGNC Symbol;Acc:HGNC:11822] |
ENSG00000138772 | ENSG00000138772 | annexin A3 [Source:HGNC Symbol;Acc:HGNC:541] |
ENSG00000108688 | ENSG00000108688 | C-C motif chemokine ligand 7 [Source:HGNC Symbol;Acc:HGNC:10634] |
ENSG00000151012 | ENSG00000151012 | solute carrier family 7 member 11 [Source:HGNC Symbol;Acc:HGNC:11059] |
ENSG00000117594 | ENSG00000117594 | hydroxysteroid 11-beta dehydrogenase 1 [Source:HGNC Symbol;Acc:HGNC:5208] |
ENSG00000039560 | ENSG00000039560 | retinoic acid induced 14 [Source:HGNC Symbol;Acc:HGNC:14873] |
ENSG00000118785 | ENSG00000118785 | secreted phosphoprotein 1 [Source:HGNC Symbol;Acc:HGNC:11255] |
ENSG00000154856 | ENSG00000154856 | APC down-regulated 1 [Source:HGNC Symbol;Acc:HGNC:15718] |
ENSG00000110436 | ENSG00000110436 | solute carrier family 1 member 2 [Source:HGNC Symbol;Acc:HGNC:10940] |
ENSG00000050730 | ENSG00000050730 | TNFAIP3 interacting protein 3 [Source:HGNC Symbol;Acc:HGNC:19315] |
ENSG00000176076 | ENSG00000176076 | potassium voltage-gated channel subfamily E regulatory subunit 5 [Source:HGNC Symbol;Acc:HGNC:6241] |
ENSG00000205358 | ENSG00000205358 | metallothionein 1H [Source:HGNC Symbol;Acc:HGNC:7400] |
ENSG00000053747 | ENSG00000053747 | laminin subunit alpha 3 [Source:HGNC Symbol;Acc:HGNC:6483] |
ENSG00000136689 | ENSG00000136689 | interleukin 1 receptor antagonist [Source:HGNC Symbol;Acc:HGNC:6000] |
ENSG00000166165 | ENSG00000166165 | creatine kinase B [Source:HGNC Symbol;Acc:HGNC:1991] |
ENSG00000102837 | ENSG00000102837 | olfactomedin 4 [Source:HGNC Symbol;Acc:HGNC:17190] |
ENSG00000112139 | ENSG00000112139 | MAM domain containing glycosylphosphatidylinositol anchor 1 [Source:HGNC Symbol;Acc:HGNC:19267] |
ENSG00000163687 | ENSG00000163687 | deoxyribonuclease 1 like 3 [Source:HGNC Symbol;Acc:HGNC:2959] |
ENSG00000205364 | ENSG00000205364 | metallothionein 1M [Source:HGNC Symbol;Acc:HGNC:14296] |
ENSG00000169429 | ENSG00000169429 | C-X-C motif chemokine ligand 8 [Source:HGNC Symbol;Acc:HGNC:6025] |
ENSG00000255398 | ENSG00000255398 | hydroxycarboxylic acid receptor 3 [Source:HGNC Symbol;Acc:HGNC:16824] |
ENSG00000178726 | ENSG00000178726 | thrombomodulin [Source:HGNC Symbol;Acc:HGNC:11784] |
ENSG00000066056 | ENSG00000066056 | tyrosine kinase with immunoglobulin like and EGF like domains 1 [Source:HGNC Symbol;Acc:HGNC:11809] |
ENSG00000136052 | ENSG00000136052 | solute carrier family 41 member 2 [Source:HGNC Symbol;Acc:HGNC:31045] |
ENSG00000125780 | ENSG00000125780 | transglutaminase 3 [Source:HGNC Symbol;Acc:HGNC:11779] |
ENSG00000178860 | ENSG00000178860 | musculin [Source:HGNC Symbol;Acc:HGNC:7321] |
ENSG00000197632 | ENSG00000197632 | serpin family B member 2 [Source:HGNC Symbol;Acc:HGNC:8584] |
ENSG00000162433 | ENSG00000162433 | adenylate kinase 4 [Source:HGNC Symbol;Acc:HGNC:363] |
plot_sample_heatmap(
data=sm(normalize_expt(comp1_high_scores, transform="log2", convert="cpm", norm="quant")),
row_label=NULL)
plot_sample_heatmap(
data=sm(normalize_expt(comp2_high_scores, transform="log2", convert="cpm", norm="quant")),
row_label=NULL)
plot_sample_heatmap(
data=sm(normalize_expt(comp3_high_scores, transform="log2", convert="cpm", norm="quant")),
row_label=NULL)
test <- sm(create_expt(metadata="sample_sheets/pbmc_samples-manual_switch.xlsx",
file_column="humanfile"))
test <- subset_expt(test, subset="condition!='uninf'")
## There were 21, now there are 18 samples.
test_norm <- sm(normalize_expt(test, transform="log2", convert="cpm", filter=TRUE, norm="quant"))
plot_pca(test_norm)$plot
test_batch <- sm(normalize_expt(test, transform="log2", convert="cpm", filter=TRUE,
norm="quant", batch="ruvg"))
plot_pca(test_batch)$plot
## Comp.1 Comp.2
## [1,] "20.51:ENSG00000137673" "11.92:ENSG00000125538"
## [2,] "17.37:ENSG00000113657" "11.74:ENSG00000163735"
## [3,] "16.95:ENSG00000122641" "11.2:ENSG00000113302"
## [4,] "15.07:ENSG00000108688" "11.14:ENSG00000122641"
## [5,] "14.65:ENSG00000166923" "10.6:ENSG00000143333"
## [6,] "13.38:ENSG00000148677" "9.251:ENSG00000105976"
## [7,] "12.64:ENSG00000133048" "8.576:ENSG00000113070"
## [8,] "12.63:ENSG00000106178" "8.552:ENSG00000123689"
## [9,] "12.44:ENSG00000100234" "8.265:ENSG00000074410"
## [10,] "12.1:ENSG00000197506" "8.032:ENSG00000205846"
## [11,] "11.54:ENSG00000076716" "7.734:ENSG00000182782"
## [12,] "11.45:ENSG00000117594" "7.706:ENSG00000106178"
## [13,] "11.44:ENSG00000136689" "7.559:ENSG00000169429"
## [14,] "10.88:ENSG00000250033" "7.534:ENSG00000117594"
## [15,] "10.85:ENSG00000178852" "7.455:ENSG00000123610"
## [16,] "10.61:ENSG00000151012" "7.07:ENSG00000131203"
## [17,] "10.15:ENSG00000136052" "7.011:ENSG00000166165"
## [18,] "9.835:ENSG00000125144" "6.981:ENSG00000162493"
## [19,] "9.762:ENSG00000162433" "6.847:ENSG00000255398"
## [20,] "9.583:ENSG00000178860" "6.579:ENSG00000173391"
## [21,] "9.51:ENSG00000175445" "6.257:ENSG00000178860"
## [22,] "9.379:ENSG00000103855" "6.2:ENSG00000125144"
## [23,] "9.357:ENSG00000184371" "6.177:ENSG00000106366"
## [24,] "9.292:ENSG00000118785" "6.16:ENSG00000172724"
## [25,] "9.218:ENSG00000243742" "6.15:ENSG00000184557"
## [26,] "8.921:ENSG00000251230" "6.126:ENSG00000099985"
## [27,] "8.869:ENSG00000105855" "6.113:ENSG00000105855"
## [28,] "8.847:ENSG00000138080" "6.054:ENSG00000183019"
## [29,] "8.654:ENSG00000110092" "5.984:ENSG00000137507"
## [30,] "8.548:ENSG00000138061" "5.959:ENSG00000196878"
## [31,] "8.543:ENSG00000176177" "5.907:ENSG00000105246"
## [32,] "8.54:ENSG00000151704" "5.772:ENSG00000169184"
## [33,] "8.369:ENSG00000136810" "5.769:ENSG00000178726"
## [34,] "8.359:ENSG00000172070" "5.729:ENSG00000123700"
## [35,] "8.181:ENSG00000120217" "5.664:ENSG00000171049"
## [36,] "8.169:ENSG00000253123" "5.64:ENSG00000242048"
## [37,] "8.165:ENSG00000174939" "5.527:ENSG00000186431"
## [38,] "8.09:ENSG00000111012" "5.507:ENSG00000157227"
## [39,] "7.801:ENSG00000154277" "5.44:ENSG00000110436"
## [40,] "7.73:ENSG00000069482" "5.436:ENSG00000146555"
## [41,] "7.729:ENSG00000164935" "5.41:ENSG00000130222"
## [42,] "7.53:ENSG00000180509" "5.401:ENSG00000185614"
## [43,] "7.426:ENSG00000117525" "5.394:ENSG00000166527"
## [44,] "7.407:ENSG00000123689" "5.322:ENSG00000163739"
## [45,] "7.288:ENSG00000169418" "5.259:ENSG00000006118"
## [46,] "7.249:ENSG00000115919" "5.254:ENSG00000102794"
## [47,] "7.093:ENSG00000151726" "5.253:ENSG00000125657"
## [48,] "7.069:ENSG00000140450" "5.212:ENSG00000011422"
## [49,] "7.057:ENSG00000039560" "5.194:ENSG00000219682"
## [50,] "7.05:ENSG00000113302" "5.115:ENSG00000075618"
annot <- fData(hs_inf_filt)
comp1_genes <- unlist(strsplit(x=as.character(scores$highest[, 1]), split=":"))
idx <- grep(pattern="^ENSG", x=comp1_genes)
comp1_genes <- comp1_genes[idx]
comp1_high_scores <- exclude_genes_expt(test_batch, ids=comp1_genes, method="keep")
## Before removal, there were 13557 entries.
## Now there are 50 entries.
## Percent kept: 0.385, 0.365, 0.289, 0.350, 0.301, 0.370, 0.422, 0.397, 0.262, 0.369, 0.308, 0.329, 0.384, 0.352, 0.293, 0.380, 0.333, 0.326
## Percent removed: 99.615, 99.635, 99.711, 99.650, 99.699, 99.630, 99.578, 99.603, 99.738, 99.631, 99.692, 99.671, 99.616, 99.648, 99.707, 99.620, 99.667, 99.674
plot_sample_heatmap(
data=sm(normalize_expt(comp1_high_scores, transform="log2", convert="cpm", norm="quant")),
row_label=NULL)
ensembl_gene_id | description | |
---|---|---|
ENSG00000137673 | ENSG00000137673 | matrix metallopeptidase 7 [Source:HGNC Symbol;Acc:HGNC:7174] |
ENSG00000113657 | ENSG00000113657 | dihydropyrimidinase like 3 [Source:HGNC Symbol;Acc:HGNC:3015] |
ENSG00000122641 | ENSG00000122641 | inhibin beta A subunit [Source:HGNC Symbol;Acc:HGNC:6066] |
ENSG00000108688 | ENSG00000108688 | C-C motif chemokine ligand 7 [Source:HGNC Symbol;Acc:HGNC:10634] |
ENSG00000166923 | ENSG00000166923 | gremlin 1, DAN family BMP antagonist [Source:HGNC Symbol;Acc:HGNC:2001] |
ENSG00000148677 | ENSG00000148677 | ankyrin repeat domain 1 [Source:HGNC Symbol;Acc:HGNC:15819] |
ENSG00000133048 | ENSG00000133048 | chitinase 3 like 1 [Source:HGNC Symbol;Acc:HGNC:1932] |
ENSG00000106178 | ENSG00000106178 | C-C motif chemokine ligand 24 [Source:HGNC Symbol;Acc:HGNC:10623] |
ENSG00000100234 | ENSG00000100234 | TIMP metallopeptidase inhibitor 3 [Source:HGNC Symbol;Acc:HGNC:11822] |
ENSG00000197506 | ENSG00000197506 | solute carrier family 28 member 3 [Source:HGNC Symbol;Acc:HGNC:16484] |
ENSG00000076716 | ENSG00000076716 | glypican 4 [Source:HGNC Symbol;Acc:HGNC:4452] |
ENSG00000117594 | ENSG00000117594 | hydroxysteroid 11-beta dehydrogenase 1 [Source:HGNC Symbol;Acc:HGNC:5208] |
ENSG00000136689 | ENSG00000136689 | interleukin 1 receptor antagonist [Source:HGNC Symbol;Acc:HGNC:6000] |
NA | NA | NA |
ENSG00000178852 | ENSG00000178852 | EF-hand calcium binding domain 13 [Source:HGNC Symbol;Acc:HGNC:26864] |
ENSG00000151012 | ENSG00000151012 | solute carrier family 7 member 11 [Source:HGNC Symbol;Acc:HGNC:11059] |
ENSG00000136052 | ENSG00000136052 | solute carrier family 41 member 2 [Source:HGNC Symbol;Acc:HGNC:31045] |
ENSG00000125144 | ENSG00000125144 | metallothionein 1G [Source:HGNC Symbol;Acc:HGNC:7399] |
ENSG00000162433 | ENSG00000162433 | adenylate kinase 4 [Source:HGNC Symbol;Acc:HGNC:363] |
ENSG00000178860 | ENSG00000178860 | musculin [Source:HGNC Symbol;Acc:HGNC:7321] |
ENSG00000175445 | ENSG00000175445 | lipoprotein lipase [Source:HGNC Symbol;Acc:HGNC:6677] |
ENSG00000103855 | ENSG00000103855 | CD276 molecule [Source:HGNC Symbol;Acc:HGNC:19137] |
ENSG00000184371 | ENSG00000184371 | colony stimulating factor 1 [Source:HGNC Symbol;Acc:HGNC:2432] |
ENSG00000118785 | ENSG00000118785 | secreted phosphoprotein 1 [Source:HGNC Symbol;Acc:HGNC:11255] |
NA.1 | NA | NA |
NA.2 | NA | NA |
ENSG00000105855 | ENSG00000105855 | integrin subunit beta 8 [Source:HGNC Symbol;Acc:HGNC:6163] |
ENSG00000138080 | ENSG00000138080 | elastin microfibril interfacer 1 [Source:HGNC Symbol;Acc:HGNC:19880] |
ENSG00000110092 | ENSG00000110092 | cyclin D1 [Source:HGNC Symbol;Acc:HGNC:1582] |
ENSG00000138061 | ENSG00000138061 | cytochrome P450 family 1 subfamily B member 1 [Source:HGNC Symbol;Acc:HGNC:2597] |
ENSG00000176177 | ENSG00000176177 | ENTH domain containing 1 [Source:HGNC Symbol;Acc:HGNC:26352] |
ENSG00000151704 | ENSG00000151704 | potassium voltage-gated channel subfamily J member 1 [Source:HGNC Symbol;Acc:HGNC:6255] |
ENSG00000136810 | ENSG00000136810 | thioredoxin [Source:HGNC Symbol;Acc:HGNC:12435] |
NA.3 | NA | NA |
ENSG00000120217 | ENSG00000120217 | CD274 molecule [Source:HGNC Symbol;Acc:HGNC:17635] |
NA.4 | NA | NA |
ENSG00000174939 | ENSG00000174939 | aspartate beta-hydroxylase domain containing 1 [Source:HGNC Symbol;Acc:HGNC:27380] |
ENSG00000111012 | ENSG00000111012 | cytochrome P450 family 27 subfamily B member 1 [Source:HGNC Symbol;Acc:HGNC:2606] |
ENSG00000154277 | ENSG00000154277 | ubiquitin C-terminal hydrolase L1 [Source:HGNC Symbol;Acc:HGNC:12513] |
ENSG00000069482 | ENSG00000069482 | galanin and GMAP prepropeptide [Source:HGNC Symbol;Acc:HGNC:4114] |
ENSG00000164935 | ENSG00000164935 | dendrocyte expressed seven transmembrane protein [Source:HGNC Symbol;Acc:HGNC:18549] |
ENSG00000180509 | ENSG00000180509 | potassium voltage-gated channel subfamily E regulatory subunit 1 [Source:HGNC Symbol;Acc:HGNC:6240] |
ENSG00000117525 | ENSG00000117525 | coagulation factor III, tissue factor [Source:HGNC Symbol;Acc:HGNC:3541] |
ENSG00000123689 | ENSG00000123689 | G0/G1 switch 2 [Source:HGNC Symbol;Acc:HGNC:30229] |
ENSG00000169418 | ENSG00000169418 | natriuretic peptide receptor 1 [Source:HGNC Symbol;Acc:HGNC:7943] |
ENSG00000115919 | ENSG00000115919 | kynureninase [Source:HGNC Symbol;Acc:HGNC:6469] |
ENSG00000151726 | ENSG00000151726 | acyl-CoA synthetase long-chain family member 1 [Source:HGNC Symbol;Acc:HGNC:3569] |
ENSG00000140450 | ENSG00000140450 | arrestin domain containing 4 [Source:HGNC Symbol;Acc:HGNC:28087] |
ENSG00000039560 | ENSG00000039560 | retinoic acid induced 14 [Source:HGNC Symbol;Acc:HGNC:14873] |
ENSG00000113302 | ENSG00000113302 | interleukin 12B [Source:HGNC Symbol;Acc:HGNC:5970] |
test_filt <- sm(normalize_expt(test, filter=TRUE))
excel_file <- glue::glue("excel/{rundate}_hs_infect_switchone_contr-v{ver}.xlsx")
test_de <- sm(all_pairwise(test_filt, model_batch=TRUE))
test_plots <- extract_de_plots(test_de)
test_table <- sm(combine_de_tables(test_de, excel=excel_file))
excel_file <- glue::glue("excel/{rundate}_hs_infect_switchone_sig-v{ver}.xlsx")
test_sig <- sm(extract_significant_genes(test_table, according_to="deseq", p=0.05, excel=excel_file))
head(test_sig$deseq$ups[[1]])
## deseq_logfc deseq_adjp edger_logfc edger_adjp limma_logfc
## ENSG00000115414 2.153 0.0024550 2.144 3.068e-04 1.799
## ENSG00000177675 2.015 0.0468000 1.943 1.084e-02 1.346
## ENSG00000155659 1.626 0.0006777 1.606 1.692e-05 1.392
## ENSG00000050767 1.607 0.0016020 1.587 3.668e-05 1.516
## ENSG00000182853 1.448 0.0033350 1.379 7.238e-06 1.232
## ENSG00000171631 1.372 0.0055770 1.359 3.068e-04 1.231
## limma_adjp basic_nummed basic_denmed basic_numvar
## ENSG00000115414 0.10420 8.283 5.3510 5.412e+00
## ENSG00000177675 0.35000 1.616 -0.9318 6.552e+00
## ENSG00000155659 0.13110 4.262 2.2720 1.758e+00
## ENSG00000050767 0.07678 2.703 1.1920 2.675e+00
## ENSG00000182853 0.04845 1.912 0.7903 6.430e+00
## ENSG00000171631 0.07678 3.666 2.2430 1.505e+00
## basic_denvar basic_logfc basic_t basic_p basic_adjp
## ENSG00000115414 5.728e+00 2.931 1.5310 1.573e-01 9.287e-01
## ENSG00000177675 7.569e+00 2.548 1.2170 2.532e-01 9.803e-01
## ENSG00000155659 2.192e+00 1.989 2.1840 5.632e-02 7.679e-01
## ENSG00000050767 2.278e+00 1.511 1.8860 8.626e-02 8.359e-01
## ENSG00000182853 7.233e+00 1.121 0.9952 3.441e-01 9.999e-01
## ENSG00000171631 1.499e+00 1.423 2.1210 5.964e-02 7.736e-01
## deseq_basemean deseq_lfcse deseq_stat deseq_p ebseq_fc
## ENSG00000115414 9426.0 0.4814 4.473 7.709e-06 3.195
## ENSG00000177675 106.4 0.6284 3.207 1.340e-03 2.907
## ENSG00000155659 183.7 0.3333 4.879 1.065e-06 2.833
## ENSG00000050767 116.1 0.3450 4.656 3.217e-06 3.038
## ENSG00000182853 261.3 0.3311 4.373 1.223e-05 2.447
## ENSG00000171631 194.8 0.3281 4.181 2.902e-05 2.472
## ebseq_logfc ebseq_postfc ebseq_mean ebseq_ppee ebseq_ppde
## ENSG00000115414 1.676 3.435 9412.4 0.9999 1.034e-04
## ENSG00000177675 1.539 3.163 106.3 0.9992 7.759e-04
## ENSG00000155659 1.502 2.894 184.1 0.9988 1.241e-03
## ENSG00000050767 1.603 3.141 116.1 0.9992 7.803e-04
## ENSG00000182853 1.291 2.640 260.8 0.9997 2.598e-04
## ENSG00000171631 1.306 2.531 194.5 0.9994 6.066e-04
## ebseq_adjp edger_logcpm edger_lr edger_p limma_ave
## ENSG00000115414 0.9999 9.468 24.91 6.020e-07 7.720
## ENSG00000177675 0.9992 3.065 14.84 1.172e-04 1.022
## ENSG00000155659 0.9988 3.808 32.41 1.248e-08 3.156
## ENSG00000050767 0.9992 3.167 30.12 4.059e-08 2.277
## ENSG00000182853 0.9997 4.328 35.06 3.203e-09 2.367
## ENSG00000171631 0.9994 3.909 24.82 6.308e-07 3.328
## limma_t limma_b limma_p limma_adjp_fdr deseq_adjp_fdr
## ENSG00000115414 3.457 -1.6420 0.0025910 1.042e-01 2.683e-03
## ENSG00000177675 1.904 -4.0050 0.0719400 3.499e-01 5.090e-02
## ENSG00000155659 3.233 -1.8970 0.0043150 1.312e-01 7.599e-04
## ENSG00000050767 3.893 -0.8535 0.0009530 7.678e-02 1.677e-03
## ENSG00000182853 4.650 0.7091 0.0001680 4.846e-02 3.628e-03
## ENSG00000171631 3.888 -0.6062 0.0009655 7.678e-02 6.058e-03
## edger_adjp_fdr basic_adjp_fdr lfc_meta lfc_var
## ENSG00000115414 3.069e-04 9.284e-01 2.092 9.577e-03
## ENSG00000177675 1.085e-02 9.801e-01 1.780 1.190e-01
## ENSG00000155659 1.692e-05 7.678e-01 1.544 1.541e-02
## ENSG00000050767 3.669e-05 8.359e-01 1.603 1.203e-04
## ENSG00000182853 7.237e-06 9.998e-01 1.346 2.010e-02
## ENSG00000171631 3.069e-04 7.736e-01 1.295 1.491e-02
## lfc_varbymed p_meta p_var
## ENSG00000115414 4.578e-03 8.664e-04 2.231e-06
## ENSG00000177675 6.686e-02 2.447e-02 1.691e-03
## ENSG00000155659 9.977e-03 1.439e-03 6.205e-06
## ENSG00000050767 7.505e-05 3.188e-04 3.017e-07
## ENSG00000182853 1.493e-02 6.008e-05 8.773e-09
## ENSG00000171631 1.151e-02 3.317e-04 3.015e-07
In the following block, I will take the comparisons performed without any batch in the model/adjustment and use them to search for shared/unique genes among the self-healing vs. uninfected and the chronic vs. uninfected.
hs_pairwise_nobatch_pca <- sm(plot_pca(hs_inf_filt, convert="cpm", transform="log2"))
hs_pairwise_nobatch_pca$plot
excel_file <- glue::glue("excel/{rundate}_hs_infect_nobatch_contr-v{ver}.xlsx")
hs_combined_nobatch <- sm(combine_de_tables(
hs_pairwise_nobatch,
excel=excel_file,
keepers=keepers_inf))
excel_file <- glue::glue("excel/{rundate}_hs_infect_nobatch_sig-v{ver}.xlsx")
hs_sig_nobatch <- sm(extract_significant_genes(
hs_combined_nobatch,
excel=excel_file))
hs_sig_nobatch$deseq$counts
## change_counts_up change_counts_down
## ch_sh 0 0
limma_deseq_order <- rank_order_scatter(
hs_pairwise_nobatch,
first_type="limma", second_type="deseq",
first_table=1, second_table=1)
limma_deseq_order$plot
up_lst <- list(
"sh_up" = rownames(hs_sig_nobatch[["deseq"]][["ups"]][["sh_nil"]]),
"ch_up" = rownames(hs_sig_nobatch[["deseq"]][["ups"]][["ch_nil"]]))
nobatch_up_venn <- Vennerable::Venn(Sets=up_lst)
Vennerable::plot(nobatch_up_venn, doWeights=FALSE)
## Maria-Adelaida and Najib asked about comparing the following pair of data against
## The intersection of a bunch of stuff later... So don't forget this!
## This gives me the genes only up in self vs. nil
nobatch_up_sh_solo <- nobatch_up_venn@IntersectionSets[["10"]]
## and ch vs nil
nobatch_up_ch_solo <- nobatch_up_venn@IntersectionSets[["01"]]
down_lst <- list(
"sh_down" = rownames(hs_sig_nobatch[["deseq"]][["downs"]][["sh_nil"]]),
"ch_down" = rownames(hs_sig_nobatch[["deseq"]][["downs"]][["ch_nil"]]))
nobatch_down_venn <- Vennerable::Venn(Sets=down_lst)
Vennerable::plot(nobatch_down_venn, doWeights=FALSE)
In the following blocks, a series of queries of the self-healing and chronic samples vs. the uninfected will be performed. The assumption is that the effect of infection, which is so much stronger than the difference of self-healing and chronic, will highlight some differences when performed using the chronic and self-healing samples.
In addition, we will be applying all of these tasks with and without strain 2504. I am guessing that the above results will cause one to want to see this with the removal of both troublesome strains (2504 and 2272). But for the moment I am think I will limit my self to only the 2504 removal in an attempt to avoid confusion.
Repeat the previous set of analyses with d107/108/110 in the model.
We have seen variants of these plots above, but hopefully these will serve as a reminder of the shades of difference we are hoping to observe.
Now perform the differential expression analyses, first with and then without 2504
excel_file <- glue::glue("excel/{rundate}_hs_infect_patbatch_contr-v{ver}.xlsx")
hs_combined_batch <- sm(combine_de_tables(
hs_pairwise_batch,
excel=excel_file,
keepers=keepers_uninf))
excel_file <- glue::glue("excel/{rundate}_hs_infect_patbatch_sig-v{ver}.xlsx")
hs_sig_batch <- sm(extract_significant_genes(
hs_combined_batch,
excel=excel_file))
hs_sig_batch[["deseq"]][["counts"]]
## change_counts_up change_counts_down
## sh_nil 906 298
## ch_nil 932 279
## ch_sh 0 0
remove_2504_pairwise_batch <- sm(all_pairwise(remove_2504_uninf_filt,
model_batch=TRUE, do_ebseq=FALSE))
excel_file <- glue::glue("excel/{rundate}_remove_2504_infect_patbatch_contr-v{ver}.xlsx")
remove_2504_combined_batch <- sm(combine_de_tables(
remove_2504_pairwise_batch,
excel=excel_file,
keepers=keepers_uninf))
excel_file <- glue::glue("excel/{rundate}_remove_2504_infect_patbatch_sig-v{ver}.xlsx")
remove_2504_sig_batch <- sm(extract_significant_genes(
remove_2504_combined_batch,
excel=excel_file))
remove_2504_sig_batch[["deseq"]][["counts"]]
## change_counts_up change_counts_down
## sh_nil 899 299
## ch_nil 965 369
## ch_sh 3 3
In the following blocks we will perform the intersections of (self-healing vs. uninfected) against (chronic vs. uninfected).
up_lst <- list(
"sh_up" = rownames(hs_sig_batch[["deseq"]][["ups"]][["sh_nil"]]),
"ch_up" = rownames(hs_sig_batch[["deseq"]][["ups"]][["ch_nil"]]))
batch_up_venn <- Vennerable::Venn(Sets=up_lst)
summary(batch_up_venn@IntersectionSets)
## Length Class Mode
## 00 0 -none- NULL
## 10 56 -none- character
## 01 82 -none- character
## 11 850 -none- character
down_lst <- list(
"sh_down" = rownames(hs_sig_batch[["deseq"]][["downs"]][["sh_nil"]]),
"ch_down" = rownames(hs_sig_batch[["deseq"]][["downs"]][["ch_nil"]]))
batch_down_venn <- Vennerable::Venn(Sets=down_lst)
Vennerable::plot(batch_down_venn, doWeights=FALSE)
## Length Class Mode
## 00 0 -none- NULL
## 10 65 -none- character
## 01 46 -none- character
## 11 233 -none- character
## Testing method: limma.
## Adding method: limma to the set.
## Testing method: deseq.
## Adding method: deseq to the set.
## Testing method: edger.
## Adding method: edger to the set.
## Testing method: ebseq.
## The first datum is missing method: ebseq.
## Testing method: basic.
## Adding method: basic to the set.
## Warning in cor(x = fs[["x"]], y = fs[["y"]], method = cor_method): the
## standard deviation is zero
## Warning in cor(x = fs[["x"]], y = fs[["y"]], method = cor_method): the
## standard deviation is zero
## $ch_sh
## $ch_sh$logfc
## [1] 0.9873
##
## $ch_sh$p
## [1] 0.9304
##
## $ch_sh$adjp
## [1] 0.06891
up_lst <- list(
"sh_up" = rownames(remove_2504_sig_batch[["deseq"]][["ups"]][["sh_nil"]]),
"ch_up" = rownames(remove_2504_sig_batch[["deseq"]][["ups"]][["ch_nil"]]))
batch_up_venn <- Vennerable::Venn(Sets=up_lst)
summary(batch_up_venn@IntersectionSets)
## Length Class Mode
## 00 0 -none- NULL
## 10 46 -none- character
## 01 112 -none- character
## 11 853 -none- character
down_lst <- list(
"sh_down" = rownames(remove_2504_sig_batch[["deseq"]][["downs"]][["sh_nil"]]),
"ch_down" = rownames(remove_2504_sig_batch[["deseq"]][["downs"]][["ch_nil"]]))
batch_down_venn <- Vennerable::Venn(Sets=down_lst)
Vennerable::plot(batch_down_venn, doWeights=FALSE)
## Length Class Mode
## 00 0 -none- NULL
## 10 47 -none- character
## 01 117 -none- character
## 11 252 -none- character
The following block writes out the unique/shared genes observed among the contrasts which included donor in the model.
kept_columns <- c("ensembltranscriptid", "ensemblgeneid", "description",
"deseq_logfc", "deseq_adjp")
start_data <- hs_sig_batch[["deseq"]]
sh_up_solo_genes <- batch_up_venn@IntersectionSets[["10"]]
ch_up_solo_genes <- batch_up_venn@IntersectionSets[["01"]]
shch_up_shared_genes <- batch_up_venn@IntersectionSets[["11"]]
sh_down_solo_genes <- batch_down_venn@IntersectionSets[["10"]]
ch_down_solo_genes <- batch_down_venn@IntersectionSets[["01"]]
shch_down_shared_genes <- batch_down_venn@IntersectionSets[["11"]]
xls_result <- write_xls(
data=start_data[["ups"]][["sh_nil"]][sh_up_solo_genes, kept_columns],
sheet="sh_up_solo")
xls_result <- write_xls(
data=start_data[["ups"]][["ch_nil"]][ch_up_solo_genes, kept_columns],
sheet="ch_up_solo", wb=xls_result[["workbook"]])
xls_result <- write_xls(
data=start_data[["ups"]][["ch_nil"]][shch_up_shared_genes, kept_columns],
sheet="shch_up_shared", wb=xls_result[["workbook"]])
xls_result <- write_xls(
data=start_data[["downs"]][["sh_nil"]][sh_down_solo_genes, kept_columns],
sheet="sh_down_solo")
xls_result <- write_xls(
data=start_data[["downs"]][["ch_nil"]][ch_down_solo_genes, kept_columns],
sheet="ch_down_solo", wb=xls_result[["workbook"]])
xls_result <- write_xls(
data=start_data[["downs"]][["ch_nil"]][shch_down_shared_genes, kept_columns],
sheet="shch_down_shared", wb=xls_result[["workbook"]],
excel=paste0("excel/figure_5a_stuff-v", ver, ".xlsx"))
## Saving to: excel/figure_5a_stuff-v20180822.xlsx
Repeat, this time attmepting to tamp down the variance by person.
excel_file <- glue::glue("excel/{rundate}_hs_infect_sva_contr-v{ver}.xlsx")
hs_combined_sva <- sm(combine_de_tables(
hs_pairwise_sva,
excel=excel_file,
keepers=keepers_uninf))
excel_file <- glue::glue("excel/{rundate}_hs_infect_sva_sig-v{ver}.xlsx")
hs_sig_sva <- sm(extract_significant_genes(
hs_combined_sva,
excel=excel_file))
hs_sig_sva$deseq$counts
## change_counts_up change_counts_down
## sh_nil 877 330
## ch_nil 899 327
## ch_sh 0 0
## Testing method: limma.
## Adding method: limma to the set.
## Testing method: deseq.
## Adding method: deseq to the set.
## Testing method: edger.
## Adding method: edger to the set.
## Testing method: ebseq.
## The second datum is missing method: ebseq.
## Testing method: basic.
## Adding method: basic to the set.
## $result
## $result$limma
## $result$limma$ch_sh
## $result$limma$ch_sh$logfc
## [1] 0.854
##
## $result$limma$ch_sh$p
## [1] 0.631
##
## $result$limma$ch_sh$adjp
## [1] 0.6309
##
##
##
## $result$deseq
## $result$deseq$ch_sh
## $result$deseq$ch_sh$logfc
## [1] 0.8638
##
## $result$deseq$ch_sh$p
## [1] 0.6503
##
## $result$deseq$ch_sh$adjp
## [1] 0.621
##
##
##
## $result$edger
## $result$edger$ch_sh
## $result$edger$ch_sh$logfc
## [1] 0.8628
##
## $result$edger$ch_sh$p
## [1] 0.6553
##
## $result$edger$ch_sh$adjp
## [1] 0.6553
##
##
##
## $result$basic
## $result$basic$ch_sh
## $result$basic$ch_sh$logfc
## [1] 0.8243
##
## $result$basic$ch_sh$p
## [1] 0.7317
##
## $result$basic$ch_sh$adjp
## [1] 0.02787
##
##
##
##
## $logfc
## ch_sh
## 0.8243
##
## $p
## ch_sh
## 0.7317
##
## $adjp
## ch_sh
## 0.02787
## Testing method: limma.
## Adding method: limma to the set.
## Testing method: deseq.
## Adding method: deseq to the set.
## Testing method: edger.
## Adding method: edger to the set.
## Testing method: ebseq.
## The second datum is missing method: ebseq.
## Testing method: basic.
## Adding method: basic to the set.
## $result
## $result$limma
## $result$limma$ch_sh
## $result$limma$ch_sh$logfc
## [1] 0.9373
##
## $result$limma$ch_sh$p
## [1] 0.8353
##
## $result$limma$ch_sh$adjp
## [1] 0.8353
##
##
##
## $result$deseq
## $result$deseq$ch_sh
## $result$deseq$ch_sh$logfc
## [1] 0.9412
##
## $result$deseq$ch_sh$p
## [1] 0.8464
##
## $result$deseq$ch_sh$adjp
## [1] 0.6794
##
##
##
## $result$edger
## $result$edger$ch_sh
## $result$edger$ch_sh$logfc
## [1] 0.9412
##
## $result$edger$ch_sh$p
## [1] 0.8489
##
## $result$edger$ch_sh$adjp
## [1] 0.8489
##
##
##
## $result$basic
## $result$basic$ch_sh
## $result$basic$ch_sh$logfc
## [1] 0.86
##
## $result$basic$ch_sh$p
## [1] 0.826
##
## $result$basic$ch_sh$adjp
## [1] 0.02085
##
##
##
##
## $logfc
## ch_sh
## 0.86
##
## $p
## ch_sh
## 0.826
##
## $adjp
## ch_sh
## 0.02085
rank <- rank_order_scatter(remove_2504_inf_de_sva, hs_pairwise_sva,
first_type="deseq", second_type="deseq",
first_table="sh_vs_chr",
second_table="sh_vs_chr")
rank$plot
remove_2504_pairwise_sva <- sm(all_pairwise(remove_2504_uninf_filt, model_batch="svaseq",
do_ebseq=FALSE))
excel_file <- glue::glue("excel/{rundate}_remove_2504_infect_sva_contr-v{ver}.xlsx")
remove_2504_combined_sva <- sm(combine_de_tables(
remove_2504_pairwise_sva,
excel=excel_file,
keepers=keepers_uninf))
excel_file <- glue::glue("excel/{rundate}_remove_2504_infect_sva_sig-v{ver}.xlsx")
remove_2504_sig_sva <- sm(extract_significant_genes(
remove_2504_combined_sva,
excel=excel_file))
remove_2504_sig_sva$deseq$counts
## change_counts_up change_counts_down
## sh_nil 883 322
## ch_nil 973 411
## ch_sh 1 2
up_lst <- list(
"sh_up" = rownames(hs_sig_sva[["deseq"]][["ups"]][["sh_nil"]]),
"ch_up" = rownames(hs_sig_sva[["deseq"]][["ups"]][["ch_nil"]]))
sva_up_venn <- Vennerable::Venn(Sets=up_lst)
summary(sva_up_venn@IntersectionSets)
## Length Class Mode
## 00 0 -none- NULL
## 10 47 -none- character
## 01 69 -none- character
## 11 830 -none- character
down_lst <- list(
"sh_down" = rownames(hs_sig_sva[["deseq"]][["downs"]][["sh_nil"]]),
"ch_down" = rownames(hs_sig_sva[["deseq"]][["downs"]][["ch_nil"]]))
sva_down_venn <- Vennerable::Venn(Sets=down_lst)
Vennerable::plot(sva_down_venn, doWeights=FALSE)
## Length Class Mode
## 00 0 -none- NULL
## 10 52 -none- character
## 01 49 -none- character
## 11 278 -none- character
similar <- sm(compare_de_results(hs_combined_nobatch, hs_combined_sva,
cor_method="spearman"))
similar$result$deseq
## $ch_sh
## $ch_sh$logfc
## [1] 0.9708
##
## $ch_sh$p
## [1] 0.8871
##
## $ch_sh$adjp
## [1] 0.05923
similar <- sm(compare_de_results(hs_combined_batch, hs_combined_sva,
cor_method="spearman"))
similar$result$deseq
## $sh_nil
## $sh_nil$logfc
## [1] 0.9912
##
## $sh_nil$p
## [1] 0.9683
##
## $sh_nil$adjp
## [1] 0.9683
##
##
## $ch_nil
## $ch_nil$logfc
## [1] 0.9894
##
## $ch_nil$p
## [1] 0.963
##
## $ch_nil$adjp
## [1] 0.963
##
##
## $ch_sh
## $ch_sh$logfc
## [1] 0.9829
##
## $ch_sh$p
## [1] 0.9558
##
## $ch_sh$adjp
## [1] 0.9558
up_lst <- list(
"sh_up" = rownames(remove_2504_sig_sva[["deseq"]][["ups"]][["sh_nil"]]),
"ch_up" = rownames(remove_2504_sig_sva[["deseq"]][["ups"]][["ch_nil"]]))
sva_up_venn <- Vennerable::Venn(Sets=up_lst)
summary(sva_up_venn@IntersectionSets)
## Length Class Mode
## 00 0 -none- NULL
## 10 35 -none- character
## 01 125 -none- character
## 11 848 -none- character
down_lst <- list(
"sh_down" = rownames(remove_2504_sig_sva[["deseq"]][["downs"]][["sh_nil"]]),
"ch_down" = rownames(remove_2504_sig_sva[["deseq"]][["downs"]][["ch_nil"]]))
sva_down_venn <- Vennerable::Venn(Sets=down_lst)
Vennerable::plot(sva_down_venn, doWeights=FALSE)
## Length Class Mode
## 00 0 -none- NULL
## 10 30 -none- character
## 01 119 -none- character
## 11 292 -none- character
Repeat again using fsva.
hs_pairwise_fsva_pca <- sm(plot_pca(hs_inf_filt, batch="fsva", norm="quant", transform="log2"))
hs_pairwise_fsva_pca$plot
excel_file <- glue::glue("excel/{rundate}_hs_infect_fsva_contr-v{ver}.xlsx")
hs_combined_fsva <- sm(combine_de_tables(
hs_pairwise_fsva,
excel=excel_file,
keepers=keepers_uninf))
excel_file <- glue::glue("excel/{rundate}_hs_infect_fsva_sig-v{ver}.xlsx")
hs_sig_fsva <- sm(extract_significant_genes(
hs_combined_fsva,
excel=excel_file))
up_lst <- list(
"sh_up" = rownames(hs_sig_fsva[["deseq"]][["ups"]][["sh_nil"]]),
"ch_up" = rownames(hs_sig_fsva[["deseq"]][["ups"]][["ch_nil"]]))
fsva_up_venn <- Vennerable::Venn(Sets=up_lst)
summary(fsva_up_venn@IntersectionSets)
## Length Class Mode
## 00 0 -none- NULL
## 10 69 -none- character
## 01 81 -none- character
## 11 860 -none- character
down_lst <- list(
"sh_down" = rownames(hs_sig_fsva[["deseq"]][["downs"]][["sh_nil"]]),
"ch_down" = rownames(hs_sig_fsva[["deseq"]][["downs"]][["ch_nil"]]))
fsva_down_venn <- Vennerable::Venn(Sets=down_lst)
Vennerable::plot(fsva_down_venn, doWeights=FALSE)
## Length Class Mode
## 00 0 -none- NULL
## 10 61 -none- character
## 01 69 -none- character
## 11 242 -none- character
##hs_sig_fsva$deseq$counts$common_solos_fsva <- sm(subset_significants(hs_sig_fsva))
similar <- sm(compare_de_results(hs_combined_nobatch, hs_combined_fsva,
cor_method="spearman"))
similar$result$deseq
## $ch_sh
## $ch_sh$logfc
## [1] 0.9465
##
## $ch_sh$p
## [1] 0.8312
##
## $ch_sh$adjp
## [1] 0.04638
Repeat once again, this time using combat to try to limit the contribution of the strain to the data. I do not think we will ever use this set of contrasts, so I will deactivate it but leave it here if it is required later.
old_condition <- hs_uninf$design$condition
names(old_condition) <- hs_uninf$design$sampleid
new_condition <- paste0(hs_uninf$design$state, '_', hs_uninf$design$donor)
hs_uninf_recond <- set_expt_factors(hs_uninf_filt,
batch="pathogenstrain",
condition=new_condition)
combat_input <- sm(normalize_expt(hs_uninf_recond, batch="combat_scale", filter=TRUE))
combat_input <- set_expt_conditions(combat_input, fact=old_condition)
combat_input <- set_expt_colors(combat_input, colors=c("green","blue","red"))
## The new colors are a character, changing according to condition.
excel_file <- glue::glue("excel/{rundate}_hs_infect_combatpath_contr-v{ver}.xlsx")
hs_combined_combatpath <- sm(combine_de_tables(
hs_pairwise_combatpath,
excel=excel_file,
keepers=keepers_uninf))
excel_file <- glue::glue("excel/{rundate}_hs_infect_combatpath_sig-v{ver}.xlsx")
hs_sig_combatpath <- sm(extract_significant_genes(
hs_combined_combatpath,
excel=excel_file))
hs_sig_combatpath$deseq$counts
## change_counts_up change_counts_down
## sh_nil 1634 2
## ch_nil 2464 2042
## ch_sh 495 955
up_lst <- list(
"sh_up" = rownames(hs_sig_combatpath[["deseq"]][["ups"]][["sh_nil"]]),
"ch_up" = rownames(hs_sig_combatpath[["deseq"]][["ups"]][["ch_nil"]]))
combatpath_up_venn <- Vennerable::Venn(Sets=up_lst)
summary(combatpath_up_venn@IntersectionSets)
## Length Class Mode
## 00 0 -none- NULL
## 10 0 -none- NULL
## 01 830 -none- character
## 11 1634 -none- character
down_lst <- list(
"sh_down" = rownames(hs_sig_combatpath[["deseq"]][["downs"]][["sh_nil"]]),
"ch_down" = rownames(hs_sig_combatpath[["deseq"]][["downs"]][["ch_nil"]]))
combatpath_down_venn <- Vennerable::Venn(Sets=down_lst)
Vennerable::plot(combatpath_down_venn, doWeights=FALSE)
## Length Class Mode
## 00 0 -none- NULL
## 10 0 -none- NULL
## 01 2040 -none- character
## 11 2 -none- character
similar <- sm(compare_de_results(hs_combined_nobatch, hs_combined_combatpath,
cor_method="spearman"))
similar$result$deseq
## $ch_sh
## $ch_sh$logfc
## [1] -0.02286
##
## $ch_sh$p
## [1] -0.132
##
## $ch_sh$adjp
## [1] -0.001533
similar <- sm(compare_de_results(hs_combined_fsva, hs_combined_combatpath,
cor_method="spearman"))
similar$result$deseq
## $sh_nil
## $sh_nil$logfc
## [1] -0.08734
##
## $sh_nil$p
## [1] 0.01233
##
## $sh_nil$adjp
## [1] 0.01233
##
##
## $ch_nil
## $ch_nil$logfc
## [1] -0.1004
##
## $ch_nil$p
## [1] 0.05137
##
## $ch_nil$adjp
## [1] 0.05137
##
##
## $ch_sh
## $ch_sh$logfc
## [1] 0.1955
##
## $ch_sh$p
## [1] -0.03469
##
## $ch_sh$adjp
## [1] -0.0347
For each of the following, perform a simple DE and see what happens:
The data used in the following is the quantile(cpm(filter())) where the condition was set to a concatenation of patient and healing state, combat was also performed, so we no longer want batch in the experimental model and also we need to pass ‘force=TRUE’ because deseq/edger will need to be coerced into accepting these modified data.
## chr_5430_d108 chr_5397_d108 chr_2504_d108 sh_2272_d108 sh_1022_d108
## chr chr chr sh sh
## sh_2189_d108 chr_5430_d110 chr_5397_d110 chr_2504_d110 sh_2272_d110
## sh chr chr chr sh
## sh_1022_d110 sh_2189_d110 chr_5430_d107 chr_5397_d107 chr_2504_d107
## sh sh chr chr chr
## sh_2272_d107 sh_1022_d107 sh_2189_d107
## sh sh sh
## Levels: sh chr
## Start by leaving the data relatively alone, especially noting that we do not
## have a usable batch by default.
hs_uninf_filtv2 <- hs_uninf_filt
donor_state <- paste0(hs_uninf_filtv2$design$state, "_", hs_uninf_filtv2$design$donor)
hs_uninf_filtv2 <- set_expt_factors(hs_uninf_filtv2, condition=donor_state)
uninf_patient_keepers <- list(
"d107_chun" = c("chronic_d107", "uninfected_d107"),
"d107_shun" = c("self_heal_d107", "uninfected_d107"),
"d107_chsh" = c("chronic_d107", "self_heal_d107"),
"d108_chun" = c("chronic_d108", "uninfected_d108"),
"d108_shun" = c("self_heal_d108", "uninfected_d108"),
"d108_chsh" = c("chronic_d108", "self_heal_d108"),
"d110_chun" = c("chronic_d110", "uninfected_d110"),
"d110_shun" = c("self_heal_d110", "uninfected_d110"),
"d110_chsh" = c("chronic_d110", "self_heal_d110"))
hs_uninf_filtv2_pairwise <- sm(all_pairwise(hs_uninf_filtv2,
model_batch=FALSE, do_ebseq=FALSE))
## chr_5430_d108 chr_5397_d108 sh_2272_d108 sh_1022_d108 sh_2189_d108
## chr chr sh sh sh
## chr_5430_d110 chr_5397_d110 sh_2272_d110 sh_1022_d110 sh_2189_d110
## chr chr sh sh sh
## chr_5430_d107 chr_5397_d107 sh_2272_d107 sh_1022_d107 sh_2189_d107
## chr chr sh sh sh
## Levels: sh chr
## Start by leaving the data relatively alone, especially noting that we do not
## have a usable batch by default.
remove_2504_uninf_filtv2 <- remove_2504_uninf_filt
donor_state <- paste0(remove_2504_uninf_filtv2$design$state, "_", remove_2504_uninf_filtv2$design$donor)
remove_2504_uninf_filtv2 <- set_expt_factors(remove_2504_uninf_filtv2, condition=donor_state)
remove_2504_uninf_filtv2_pairwise <- sm(all_pairwise(remove_2504_uninf_filtv2,
model_batch=FALSE, do_ebseq=FALSE))
Now we want to look at intersections from the perspective of contrasts performed comparing the self-healing/chronic vs. uninfected for the three donors separately.
This time for each of the three donors: self-healing up vs. uninfected.
up_sig <- hs_uninf_filtv2_sig$deseq$ups
## Do this in a way which is not stupid...
comp_lst <- list(
"donor_107" = rownames(up_sig[["d107_shun"]]),
"donor_108" = rownames(up_sig[["d108_shun"]]),
"donor_110" = rownames(up_sig[["d110_shun"]]))
comp_shun_up <- Vennerable::Venn(Sets=comp_lst)
up_res <- Vennerable::plot(comp_shun_up, doWeights=FALSE)
shared_shun_up <- comp_shun_up@IntersectionSets[["111"]]
de_table_shared_up_sh_first <- hs_uninf_filtv2_combined[["data"]][["d107_shun"]][shared_shun_up, ]
de_table_shared_up_sh_second <- hs_uninf_filtv2_combined[["data"]][["d108_shun"]][shared_shun_up, ]
de_table_shared_up_sh_third <- hs_uninf_filtv2_combined[["data"]][["d110_shun"]][shared_shun_up, ]
de_table_shared_up_sh_all <- merge(
de_table_shared_up_sh_first[, c("description", "deseq_logfc", "deseq_adjp")],
de_table_shared_up_sh_second[, c("deseq_logfc", "deseq_adjp")],
by="row.names")
de_table_shared_up_sh_all <- merge(
de_table_shared_up_sh_all,
de_table_shared_up_sh_third[, c("deseq_logfc", "deseq_adjp")],
by.x="Row.names", by.y="row.names")
rownames(de_table_shared_up_sh_all) <- de_table_shared_up_sh_all[["Row.names"]]
de_table_shared_up_sh_all <- de_table_shared_up_sh_all[, -1]
colnames(de_table_shared_up_sh_all) <- c("description", "logfc_107", "adjp_107",
"logfc_108", "adjp_108", "logfc_110", "adjp_110")
write.csv(de_table_shared_up_sh_all,
file=paste0("images/", rundate, "_de_table_shared_up_sh_all.csv"))
This time for each of the three donors: chronic up vs. uninfected.
up_sig <- hs_uninf_filtv2_sig$deseq$ups
comp_lst <- list(
"donor_107" = rownames(up_sig[["d107_chun"]]),
"donor_108" = rownames(up_sig[["d108_chun"]]),
"donor_110" = rownames(up_sig[["d110_chun"]]))
comp_chun_up <- Vennerable::Venn(Sets=comp_lst)
up_res <- Vennerable::plot(comp_chun_up, doWeights=FALSE)
shared_chun_up <- comp_chun_up@IntersectionSets[["111"]]
de_table_shared_up_ch_first <- hs_uninf_filtv2_combined[["data"]][["d107_chun"]][shared_chun_up, ]
de_table_shared_up_ch_second <- hs_uninf_filtv2_combined[["data"]][["d108_chun"]][shared_chun_up, ]
de_table_shared_up_ch_third <- hs_uninf_filtv2_combined[["data"]][["d110_chun"]][shared_chun_up, ]
de_table_shared_up_ch_all <- merge(
de_table_shared_up_ch_first[, c("description", "deseq_logfc", "deseq_adjp")],
de_table_shared_up_ch_second[, c("deseq_logfc", "deseq_adjp")],
by="row.names")
de_table_shared_up_ch_all <- merge(
de_table_shared_up_ch_all,
de_table_shared_up_ch_third[, c("deseq_logfc", "deseq_adjp")],
by.x="Row.names", by.y="row.names")
rownames(de_table_shared_up_ch_all) <- de_table_shared_up_ch_all[["Row.names"]]
de_table_shared_up_ch_all <- de_table_shared_up_ch_all[, -1]
colnames(de_table_shared_up_ch_all) <- c("description", "logfc_107", "adjp_107",
"logfc_108", "adjp_108", "logfc_110", "adjp_110")
write.csv(de_table_shared_up_ch_all,
file=paste0("images/", rundate, "_de_table_shared_up_ch_all.csv"))
This time for each of the three donors: self-healing down vs. uninfected.
down_sig <- hs_uninf_filtv2_sig$deseq$downs
comp_lst <- list(
"donor_107" = rownames(down_sig[["d107_shun"]]),
"donor_108" = rownames(down_sig[["d108_shun"]]),
"donor_110" = rownames(down_sig[["d110_shun"]]))
comp_shun_down <- Vennerable::Venn(Sets=comp_lst)
down_res <- Vennerable::plot(comp_shun_down, doWeights=FALSE)
shared_shun_down <- comp_shun_down@IntersectionSets[["111"]]
de_table_shared_down_sh_first <- hs_uninf_filtv2_combined[["data"]][["d107_shun"]][shared_shun_down, ]
de_table_shared_down_sh_second <- hs_uninf_filtv2_combined[["data"]][["d108_shun"]][shared_shun_down, ]
de_table_shared_down_sh_third <- hs_uninf_filtv2_combined[["data"]][["d110_shun"]][shared_shun_down, ]
de_table_shared_down_sh_all <- merge(
de_table_shared_down_sh_first[, c("description", "deseq_logfc", "deseq_adjp")],
de_table_shared_down_sh_second[, c("deseq_logfc", "deseq_adjp")],
by="row.names")
de_table_shared_down_sh_all <- merge(
de_table_shared_down_sh_all,
de_table_shared_down_sh_third[, c("deseq_logfc", "deseq_adjp")],
by.x="Row.names", by.y="row.names")
rownames(de_table_shared_down_sh_all) <- de_table_shared_down_sh_all[["Row.names"]]
de_table_shared_down_sh_all <- de_table_shared_down_sh_all[, -1]
colnames(de_table_shared_down_sh_all) <- c("description", "logfc_107",
"adjp_107", "logfc_108", "adjp_108",
"logfc_110", "adjp_110")
write.csv(de_table_shared_down_sh_all,
file=paste0("images/", rundate, "_de_table_shared_down_sh_all.csv"))
This time for each of the three donors: chronic down vs. uninfected.
down_sig <- hs_uninf_filtv2_sig$deseq$downs
comp_lst <- list(
"donor_107" = rownames(down_sig[["d107_chun"]]),
"donor_108" = rownames(down_sig[["d108_chun"]]),
"donor_110" = rownames(down_sig[["d110_chun"]]))
comp_chun_down <- Vennerable::Venn(Sets=comp_lst)
down_res <- Vennerable::plot(comp_chun_down, doWeights=FALSE)
shared_chun_down <- comp_chun_down@IntersectionSets[["111"]]
de_table_shared_down_ch_first <- hs_uninf_filtv2_combined[["data"]][["d107_chun"]][shared_chun_down, ]
de_table_shared_down_ch_second <- hs_uninf_filtv2_combined[["data"]][["d108_chun"]][shared_chun_down, ]
de_table_shared_down_ch_third <- hs_uninf_filtv2_combined[["data"]][["d110_chun"]][shared_chun_down, ]
de_table_shared_down_ch_all <- merge(
de_table_shared_down_ch_first[, c("description", "deseq_logfc", "deseq_adjp")],
de_table_shared_down_ch_second[, c("deseq_logfc", "deseq_adjp")],
by="row.names")
de_table_shared_down_ch_all <- merge(
de_table_shared_down_ch_all,
de_table_shared_down_ch_third[, c("deseq_logfc", "deseq_adjp")],
by.x="Row.names", by.y="row.names")
rownames(de_table_shared_down_ch_all) <- de_table_shared_down_ch_all[["Row.names"]]
de_table_shared_down_ch_all <- de_table_shared_down_ch_all[, -1]
colnames(de_table_shared_down_ch_all) <- c("description", "logfc_107",
"adjp_107", "logfc_108", "adjp_108",
"logfc_110", "adjp_110")
write.csv(de_table_shared_down_ch_all,
file=paste0("images/", rundate, "_de_table_shared_down_ch_all.csv"))
At this point, we should have a set of genes which are up/down in the self/uninfected and chronic/uninfected, kept in variables with names like: ‘shared_shun_down’ and ‘shared_chun_down’
Now we want a sense of what genes are shared/unique among the self-healing vs. uninfected and the chronic vs. uninfected comparisons performed above. One would assume that the most interesting genes in these sets will prove to be the the ones which are not shared.
shch_up_lst <- list(
"up_sh" = shared_shun_up,
"up_ch" = shared_chun_up)
shared_up <- Vennerable::Venn(Sets=shch_up_lst)
Vennerable::plot(shared_up, doWeights=FALSE)
shch_down_lst <- list(
"up_sh" = shared_shun_down,
"up_ch" = shared_chun_down)
shared_down <- Vennerable::Venn(Sets=shch_down_lst)
Vennerable::plot(shared_down, doWeights=FALSE)
upup_genes <- shared_up@IntersectionSets[["11"]]
upsh_notch <- shared_up@IntersectionSets[["10"]]
upch_notsh <- shared_up@IntersectionSets[["01"]]
downdown_genes <- shared_down@IntersectionSets[["11"]]
downsh_notch <- shared_down@IntersectionSets[["10"]]
downch_notsh <- shared_down@IntersectionSets[["01"]]
kept_columns <- c("ensembltranscriptid", "ensemblgeneid", "description",
"deseq_logfc", "deseq_adjp")
## Get the shared things in up_sh and up_ch, ergo can use either sheet
xls_result <- write_xls(data=up_sig[["d107_shun"]][upup_genes, kept_columns],
sheet="upsh_upch")
## Get the only-up_sh
xls_result <- write_xls(data=up_sig[["d107_shun"]][upsh_notch, kept_columns],
sheet="upsh_noch",
wb=xls_result[["workbook"]])
## Get the only-up_ch
xls_result <- write_xls(data=up_sig[["d107_chun"]][upch_notsh, kept_columns],
sheet="upch_nosh",
wb=xls_result[["workbook"]])
## Now repeat for the down: down-shared
## Get shared down down_sh down_ch
xls_result <- write_xls(data=down_sig[["d107_shun"]][downdown_genes, kept_columns],
sheet="downsh_downch",
wb=xls_result[["workbook"]])
## The down_sh only
xls_result <- write_xls(data=down_sig[["d107_shun"]][downsh_notch, kept_columns],
sheet="downsh_noch",
wb=xls_result[["workbook"]])
## The down_ch
xls_result <- write_xls(data=down_sig[["d107_chun"]][downch_notsh, kept_columns],
sheet="downch_nosh",
wb=xls_result[["workbook"]],
excel=paste0("excel/", rundate, "_figure_5c_stuff-v", ver, ".xlsx"))
## Saving to: excel/20190104_figure_5c_stuff-v20180822.xlsx
Do I think the exact same thing as in the previous comparison, but now simplify the ‘condition’ factor to just self-healing vs. chronic and see what happens.
uninf_strainbatch_qcf <- set_expt_batches(expt=hs_cds_uninf, fact="pathogenstrain")
uninf_strainbatch_qcf <- set_expt_conditions(expt=uninf_strainbatch_qcf, fact="state")
withuninf_strainbatch_pairs_chsh <- all_pairwise(
uninf_strainbatch_qcf,
model_batch=FALSE, force=TRUE, do_ebseq=FALSE)
chsh_keepers <- list(
"ch_sh" = c("chronic", "self_heal"),
"ch_nil" = c("chronic", "uninfected"),
"sh_nil" = c("self_heal", "uninfected"))
withuninf_strainbatch_tables_chsh <- sm(combine_de_tables(
withuninf_strainbatch_pairs_chsh,
keepers=chsh_keepers,
excel=paste0("excel/", rundate, "_withuninf_strainbatch_chsh_pairwise-v", ver, ".xlsx")))
withuninf_strainbatch_sig_chsh <- extract_significant_genes(
withuninf_strainbatch_tables_chsh,
excel=paste0("excel/", rundate, "_withuninf_strainbatch_chsh_sig-v", ver, ".xlsx"))
##withuninf_strainbatch_tables_chsh$limma_plots
##withuninf_strainbatch_tables_chsh$edger_plots
##withuninf_strainbatch_tables_chsh$deseq_plots
Generate DE lists of each donor for all contrasts for PBMCs.
I renamed these plots and am now hopelessly confused as to which is which. I think I will not run this for now but instead generate the worksheet without them and then return to this in the hopes that I can do a better job.
pp(file=paste0("images/", rundate, "_fig_05a-sh_uninf_vs_chr_uninf_up.pdf"))
Vennerable::plot(common_solos_batch$up_venn, doWeights=FALSE)
dev.off()
pp(file=paste0("images/", rundate, "_fig_05b-sh_uninf_vs_chr_uninf_down.pdf"))
Vennerable::plot(common_solos_batch$down_venn, doWeights=FALSE)
dev.off()
pp(file=paste0("images/", rundate, "_fig_05c-sh_uninf_donors_up.pdf"))
Vennerable::plot(sh_up_venn, doWeights=FALSE)
dev.off()
pp(file=paste0("images/", rundate, "_fig_05d-sh_uninf_donors_down.pdf"))
Vennerable::plot(sh_down_venn, doWeights=FALSE)
dev.off()
pp(file=paste0("images/", rundate, "_fig_05e-chr_uninf_donors_up.pdf"))
Vennerable::plot(chr_up_venn, doWeights=FALSE)
dev.off()
pp(file=paste0("images/", rundate, "_fig_05f-chr_uninf_donors_down.pdf"))
Vennerable::plot(chr_down_venn, doWeights=FALSE)
dev.off()
pp(file=paste0("images/", rundate, "_fig_05g-up_common.pdf"))
Vennerable::plot(shared_up, doWeights=FALSE)
dev.off()
pp(file=paste0("images/", rundate, "_fig_05h-down_common.pdf"))
Vennerable::plot(shared_down, doWeights=FALSE)
dev.off()
lp_pairwise_nobatch <- sm(all_pairwise(lp_inf_filt, model_batch=FALSE,
do_ebseq=FALSE))
excel_file <- glue::glue("excel/{rundate}_lp_infect_nobatch_contr-v{ver}.xlsx")
lp_combined_nobatch <- sm(combine_de_tables(lp_pairwise_nobatch, excel=excel_file))
excel_file <- glue::glue("excel/{rundate}_lp_infect_nobatch_sig-v{ver}.xlsx")
lp_sig_nobatch <- sm(extract_significant_genes(lp_combined_nobatch, excel=excel_file))
lp_pairwise_batch <- sm(all_pairwise(lp_inf_filt, model_batch=TRUE))
excel_file <- glue::glue("excel/{rundate}_lp_infect_batch_contr-v{ver}.xlsx")
lp_combined_batch <- sm(combine_de_tables(lp_pairwise_batch, excel=excel_file))
excel_file <- glue::glue("excel/{rundate}_lp_infect_batch_sig-v{ver}.xlsx")
lp_sig_batch <- sm(extract_significant_genes(lp_combined_batch, excel=excel_file))
lp_pairwise_ssva <- sm(all_pairwise(lp_inf_filt, model_batch="ssva"))
excel_file <- glue::glue("excel/{rundate}_lp_infect_ssva_contr-v{ver}.xlsx")
lp_combined_ssva <- sm(combine_de_tables(lp_pairwise_ssva, excel=excel_file))
excel_file <- glue::glue("excel/{rundate}_lp_infect_ssva_sig-v{ver}.xlsx")
lp_sig_ssva <- sm(extract_significant_genes(lp_combined_ssva, excel=excel_file))
lp_pairwise_fsva <- sm(all_pairwise(lp_inf_filt, model_batch="fsva"))
excel_file <- glue::glue("excel/{rundate}_lp_infect_fsva_contr-v{ver}.xlsx")
lp_combined_fsva <- sm(combine_de_tables(lp_pairwise_fsva, excel=excel_file))