This document turns to the infection of PBMC cells with L.panamensis. This data is particularly strangely affected by the different strains used to infect the cells, and as a result is both useful and troubling.
“Changes during infection hpgl0630-0636 and hpgl0650-hpgl0663”
Start out by creating the expt and poking at it to see how well/badly behaved the data is.
## Reread the sample sheet because I am fiddling with other possible surrogates (like strain)
## In fact, copy it to a separate sheet because these samples are a mess
infect_para <- expt_subset(parasite_expt, subset="experimentname=='infection'")
chosen_colors <- c("#990000", "#000099")
names(chosen_colors) <- c("pbmc_ch","pbmc_sh")
infect_para <- set_expt_colors(infect_para, colors=chosen_colors)
The following creates all the metric plots of the raw data.
infect_para_metrics <- sm(graph_metrics(infect_para))
Now visualize some relevant metrics.
## Repeat for the parasite
infect_para_metrics$libsize
## Wow, the range of coverage is shockingly large
infect_para_metrics$density
## But this looks much better I think
infect_para_metrics$boxplot
infect_para_written <- sm(write_expt(infect_para,
excel=paste0("excel/infection_parasite_data-v", ver, ".xlsx"),
violin=TRUE))
Now perform the ‘default’ normalization we use in the lab and look again.
infectpara_norm <- sm(normalize_expt(infect_para, convert="cpm", filter=TRUE, norm="quant"))
infectpara_norm_metrics <- sm(graph_metrics(infectpara_norm))
In this section, try out some normalizations/batch corrections and see the effect in PCA plots.
Start out by taking the parasite data and doing the default normalization and see what there is to see.
l2qcpm_para <- sm(normalize_expt(infect_para, transform="log2", convert="cpm",
norm="quant", filter=TRUE))
l2qcpm_para_pca <- sm(plot_pca(l2qcpm_para))
l2qcpm_para_pca$plot
## Though the colors don't show it well, the samples are actually split beautifully by strain, but
## clearly not by chronic/healing
knitr::kable(l2qcpm_para_pca$table)
sampleid | condition | batch | batch_int | PC1 | PC2 | colors | labels | |
---|---|---|---|---|---|---|---|---|
hpgl0631 | HPGL0631 | pbmc_ch | d108 | 2 | -0.4988 | -0.1901 | #990000 | HPGL0631 |
hpgl0632 | HPGL0632 | pbmc_ch | d108 | 2 | 0.1617 | -0.1427 | #990000 | HPGL0632 |
hpgl0633 | HPGL0633 | pbmc_ch | d108 | 2 | -0.0137 | 0.2963 | #990000 | HPGL0633 |
hpgl0634 | HPGL0634 | pbmc_sh | d108 | 2 | 0.1339 | -0.2135 | #000099 | HPGL0634 |
hpgl0635 | HPGL0635 | pbmc_sh | d108 | 2 | -0.0474 | 0.3478 | #000099 | HPGL0635 |
hpgl0636 | HPGL0636 | pbmc_sh | d108 | 2 | 0.1801 | -0.1622 | #000099 | HPGL0636 |
hpgl0651 | HPGL0651 | pbmc_ch | d110 | 3 | -0.4560 | -0.1643 | #990000 | HPGL0651 |
hpgl0652 | HPGL0652 | pbmc_ch | d110 | 3 | 0.2050 | -0.0989 | #990000 | HPGL0652 |
hpgl0653 | HPGL0653 | pbmc_ch | d110 | 3 | 0.0627 | 0.3543 | #990000 | HPGL0653 |
hpgl0654 | HPGL0654 | pbmc_sh | d110 | 3 | 0.1987 | -0.1988 | #000099 | HPGL0654 |
hpgl0655 | HPGL0655 | pbmc_sh | d110 | 3 | -0.0084 | 0.3517 | #000099 | HPGL0655 |
hpgl0656 | HPGL0656 | pbmc_sh | d110 | 3 | 0.2129 | -0.1374 | #000099 | HPGL0656 |
hpgl0658 | HPGL0658 | pbmc_ch | d107 | 1 | -0.5060 | -0.1996 | #990000 | HPGL0658 |
hpgl0659 | HPGL0659 | pbmc_ch | d107 | 1 | 0.1391 | -0.1387 | #990000 | HPGL0659 |
hpgl0660 | HPGL0660 | pbmc_ch | d107 | 1 | -0.0029 | 0.3053 | #990000 | HPGL0660 |
hpgl0661 | HPGL0661 | pbmc_sh | d107 | 1 | 0.1403 | -0.1908 | #000099 | HPGL0661 |
hpgl0662 | HPGL0662 | pbmc_sh | d107 | 1 | -0.0800 | 0.3288 | #000099 | HPGL0662 |
hpgl0663 | HPGL0663 | pbmc_sh | d107 | 1 | 0.1788 | -0.1472 | #000099 | HPGL0663 |
Now repeat the same thing, but let sva minimize surrogate variables.
l2qcpm_normbatch <- sm(normalize_expt(infect_para, transform="log2", convert="cpm", norm="quant", filter=TRUE, batch="sva"))
l2qcpm_normbatch_pca <- plot_pca(l2qcpm_normbatch)
Now plot the result and see if things make more sense.
l2qcpm_normbatch_pca$plot
Adding SVA to the normalization does not help much.
## That does nothing significant to clarify things.
knitr::kable(l2qcpm_normbatch_pca$table)
sampleid | condition | batch | batch_int | PC1 | PC2 | colors | labels | |
---|---|---|---|---|---|---|---|---|
hpgl0631 | HPGL0631 | pbmc_ch | d108 | 2 | -0.3179 | -0.0846 | #990000 | HPGL0631 |
hpgl0632 | HPGL0632 | pbmc_ch | d108 | 2 | 0.5265 | 0.0599 | #990000 | HPGL0632 |
hpgl0633 | HPGL0633 | pbmc_ch | d108 | 2 | -0.1376 | 0.0246 | #990000 | HPGL0633 |
hpgl0634 | HPGL0634 | pbmc_sh | d108 | 2 | -0.1523 | 0.3362 | #000099 | HPGL0634 |
hpgl0635 | HPGL0635 | pbmc_sh | d108 | 2 | -0.1448 | -0.0992 | #000099 | HPGL0635 |
hpgl0636 | HPGL0636 | pbmc_sh | d108 | 2 | -0.1758 | -0.1116 | #000099 | HPGL0636 |
hpgl0651 | HPGL0651 | pbmc_ch | d110 | 3 | 0.0806 | 0.0665 | #990000 | HPGL0651 |
hpgl0652 | HPGL0652 | pbmc_ch | d110 | 3 | -0.2465 | 0.0111 | #990000 | HPGL0652 |
hpgl0653 | HPGL0653 | pbmc_ch | d110 | 3 | -0.1124 | 0.0524 | #990000 | HPGL0653 |
hpgl0654 | HPGL0654 | pbmc_sh | d110 | 3 | -0.2810 | 0.0921 | #000099 | HPGL0654 |
hpgl0655 | HPGL0655 | pbmc_sh | d110 | 3 | 0.2511 | 0.0681 | #000099 | HPGL0655 |
hpgl0656 | HPGL0656 | pbmc_sh | d110 | 3 | 0.0701 | -0.4988 | #000099 | HPGL0656 |
hpgl0658 | HPGL0658 | pbmc_ch | d107 | 1 | 0.0630 | 0.0351 | #990000 | HPGL0658 |
hpgl0659 | HPGL0659 | pbmc_ch | d107 | 1 | -0.1255 | 0.2471 | #990000 | HPGL0659 |
hpgl0660 | HPGL0660 | pbmc_ch | d107 | 1 | -0.1392 | 0.0171 | #990000 | HPGL0660 |
hpgl0661 | HPGL0661 | pbmc_sh | d107 | 1 | 0.4089 | 0.3793 | #000099 | HPGL0661 |
hpgl0662 | HPGL0662 | pbmc_sh | d107 | 1 | 0.2387 | 0.0189 | #000099 | HPGL0662 |
hpgl0663 | HPGL0663 | pbmc_sh | d107 | 1 | 0.1941 | -0.6142 | #000099 | HPGL0663 |
No, not really, so lets change things by putting the ‘snp status’ as the “batch” factor and minimize it with sva/combat.
infect_parav2 <- set_expt_condition(infect_para, fact="state")
infect_parav2 <- set_expt_batch(infect_parav2, fact="snpclade")
l2qcpm_snpbatch_straincond_sva <- sm(normalize_expt(infect_parav2, transform="log2", convert="cpm",
norm="quant", filter=TRUE,
batch="sva"))
l2qcpm_snpbatch_straincond_pca <- plot_pca(l2qcpm_snpbatch_straincond_sva)
l2qcpm_snpbatch_straincond_pca$plot
SNP status does not clarify things.
## Pulling strain 5430 away from the others makes a semi-split
knitr::kable(l2qcpm_snpbatch_straincond_pca$table)
sampleid | condition | batch | batch_int | PC1 | PC2 | colors | labels | |
---|---|---|---|---|---|---|---|---|
hpgl0631 | HPGL0631 | chronic | red | 4 | -0.3179 | -0.0846 | #1B9E77 | HPGL0631 |
hpgl0632 | HPGL0632 | chronic | yellow | 6 | 0.5265 | 0.0599 | #1B9E77 | HPGL0632 |
hpgl0633 | HPGL0633 | chronic | blue_chronic | 1 | -0.1376 | 0.0246 | #1B9E77 | HPGL0633 |
hpgl0634 | HPGL0634 | self_heal | white | 5 | -0.1523 | 0.3362 | #7570B3 | HPGL0634 |
hpgl0635 | HPGL0635 | self_heal | blue_self | 2 | -0.1448 | -0.0992 | #7570B3 | HPGL0635 |
hpgl0636 | HPGL0636 | self_heal | pink | 3 | -0.1758 | -0.1116 | #7570B3 | HPGL0636 |
hpgl0651 | HPGL0651 | chronic | red | 4 | 0.0806 | 0.0665 | #1B9E77 | HPGL0651 |
hpgl0652 | HPGL0652 | chronic | yellow | 6 | -0.2465 | 0.0111 | #1B9E77 | HPGL0652 |
hpgl0653 | HPGL0653 | chronic | blue_chronic | 1 | -0.1124 | 0.0524 | #1B9E77 | HPGL0653 |
hpgl0654 | HPGL0654 | self_heal | white | 5 | -0.2810 | 0.0921 | #7570B3 | HPGL0654 |
hpgl0655 | HPGL0655 | self_heal | blue_self | 2 | 0.2511 | 0.0681 | #7570B3 | HPGL0655 |
hpgl0656 | HPGL0656 | self_heal | pink | 3 | 0.0701 | -0.4988 | #7570B3 | HPGL0656 |
hpgl0658 | HPGL0658 | chronic | red | 4 | 0.0630 | 0.0351 | #1B9E77 | HPGL0658 |
hpgl0659 | HPGL0659 | chronic | yellow | 6 | -0.1255 | 0.2471 | #1B9E77 | HPGL0659 |
hpgl0660 | HPGL0660 | chronic | blue_chronic | 1 | -0.1392 | 0.0171 | #1B9E77 | HPGL0660 |
hpgl0661 | HPGL0661 | self_heal | white | 5 | 0.4089 | 0.3793 | #7570B3 | HPGL0661 |
hpgl0662 | HPGL0662 | self_heal | blue_self | 2 | 0.2387 | 0.0189 | #7570B3 | HPGL0662 |
hpgl0663 | HPGL0663 | self_heal | pink | 3 | 0.1941 | -0.6142 | #7570B3 | HPGL0663 |
l2qcpm_snpbatch_straincond_pca <- plot_pca(l2qcpm_snpbatch_straincond_combat)
l2qcpm_snpbatch_straincond_pca$plot
SNP status does not clarify things.
## Doing the same thing with combat has little or no effect.
knitr::kable(l2qcpm_snpbatch_straincond_pca$table)
sampleid | condition | batch | batch_int | PC1 | PC2 | colors | labels | |
---|---|---|---|---|---|---|---|---|
hpgl0631 | HPGL0631 | chronic | red | 4 | -0.4988 | -0.1901 | #1B9E77 | HPGL0631 |
hpgl0632 | HPGL0632 | chronic | yellow | 6 | 0.1617 | -0.1427 | #1B9E77 | HPGL0632 |
hpgl0633 | HPGL0633 | chronic | blue_chronic | 1 | -0.0137 | 0.2963 | #1B9E77 | HPGL0633 |
hpgl0634 | HPGL0634 | self_heal | white | 5 | 0.1339 | -0.2135 | #7570B3 | HPGL0634 |
hpgl0635 | HPGL0635 | self_heal | blue_self | 2 | -0.0474 | 0.3478 | #7570B3 | HPGL0635 |
hpgl0636 | HPGL0636 | self_heal | pink | 3 | 0.1801 | -0.1622 | #7570B3 | HPGL0636 |
hpgl0651 | HPGL0651 | chronic | red | 4 | -0.4560 | -0.1643 | #1B9E77 | HPGL0651 |
hpgl0652 | HPGL0652 | chronic | yellow | 6 | 0.2050 | -0.0989 | #1B9E77 | HPGL0652 |
hpgl0653 | HPGL0653 | chronic | blue_chronic | 1 | 0.0627 | 0.3543 | #1B9E77 | HPGL0653 |
hpgl0654 | HPGL0654 | self_heal | white | 5 | 0.1987 | -0.1988 | #7570B3 | HPGL0654 |
hpgl0655 | HPGL0655 | self_heal | blue_self | 2 | -0.0084 | 0.3517 | #7570B3 | HPGL0655 |
hpgl0656 | HPGL0656 | self_heal | pink | 3 | 0.2129 | -0.1374 | #7570B3 | HPGL0656 |
hpgl0658 | HPGL0658 | chronic | red | 4 | -0.5060 | -0.1996 | #1B9E77 | HPGL0658 |
hpgl0659 | HPGL0659 | chronic | yellow | 6 | 0.1391 | -0.1387 | #1B9E77 | HPGL0659 |
hpgl0660 | HPGL0660 | chronic | blue_chronic | 1 | -0.0029 | 0.3053 | #1B9E77 | HPGL0660 |
hpgl0661 | HPGL0661 | self_heal | white | 5 | 0.1403 | -0.1908 | #7570B3 | HPGL0661 |
hpgl0662 | HPGL0662 | self_heal | blue_self | 2 | -0.0800 | 0.3288 | #7570B3 | HPGL0662 |
hpgl0663 | HPGL0663 | self_heal | pink | 3 | 0.1788 | -0.1472 | #7570B3 | HPGL0663 |
Ok, so let us remove the healing state with combat and see if that allows us to see a split on some other factor.
infect_parastrain <- set_expt_condition(infect_para, fact="pathogenstrain")
infect_parastrain <- set_expt_batch(infect_parastrain, fact="state")
l2qcpm_parastrain <- sm(normalize_expt(infect_parastrain, transform="log2", convert="cpm", norm="quant", filter=TRUE, batch="combat_scale"))
l2qcpm_parastrain_pca <- plot_pca(l2qcpm_parastrain)
l2qcpm_parastrain_pca$plot
hmm ok, I think I quit for today.
## wtf!?!? how did this happen?
knitr::kable(l2qcpm_parastrain_pca$table)
sampleid | condition | batch | batch_int | PC1 | PC2 | colors | labels | |
---|---|---|---|---|---|---|---|---|
hpgl0631 | HPGL0631 | s5430 | chronic | 1 | -0.4266 | 0.2780 | #1B9E77 | HPGL0631 |
hpgl0632 | HPGL0632 | s5397 | chronic | 1 | -0.1580 | -0.3384 | #D95F02 | HPGL0632 |
hpgl0633 | HPGL0633 | s2504 | chronic | 1 | -0.0378 | -0.0036 | #7570B3 | HPGL0633 |
hpgl0634 | HPGL0634 | s2272 | self_heal | 2 | 0.1090 | -0.0865 | #E7298A | HPGL0634 |
hpgl0635 | HPGL0635 | s1022 | self_heal | 2 | 0.2916 | 0.3432 | #66A61E | HPGL0635 |
hpgl0636 | HPGL0636 | s2189 | self_heal | 2 | 0.1751 | -0.1096 | #E6AB02 | HPGL0636 |
hpgl0651 | HPGL0651 | s5430 | chronic | 1 | -0.4043 | 0.2412 | #1B9E77 | HPGL0651 |
hpgl0652 | HPGL0652 | s5397 | chronic | 1 | -0.1295 | -0.3764 | #D95F02 | HPGL0652 |
hpgl0653 | HPGL0653 | s2504 | chronic | 1 | 0.0055 | -0.0715 | #7570B3 | HPGL0653 |
hpgl0654 | HPGL0654 | s2272 | self_heal | 2 | 0.1340 | -0.1603 | #E7298A | HPGL0654 |
hpgl0655 | HPGL0655 | s1022 | self_heal | 2 | 0.3060 | 0.2918 | #66A61E | HPGL0655 |
hpgl0656 | HPGL0656 | s2189 | self_heal | 2 | 0.1928 | -0.1450 | #E6AB02 | HPGL0656 |
hpgl0658 | HPGL0658 | s5430 | chronic | 1 | -0.4317 | 0.2803 | #1B9E77 | HPGL0658 |
hpgl0659 | HPGL0659 | s5397 | chronic | 1 | -0.1645 | -0.3141 | #D95F02 | HPGL0659 |
hpgl0660 | HPGL0660 | s2504 | chronic | 1 | -0.0336 | -0.0085 | #7570B3 | HPGL0660 |
hpgl0661 | HPGL0661 | s2272 | self_heal | 2 | 0.1228 | -0.0884 | #E7298A | HPGL0661 |
hpgl0662 | HPGL0662 | s1022 | self_heal | 2 | 0.2723 | 0.3717 | #66A61E | HPGL0662 |
hpgl0663 | HPGL0663 | s2189 | self_heal | 2 | 0.1770 | -0.1041 | #E6AB02 | HPGL0663 |