Start out by extracting the relevant data and querying it to see the general quality.
This first block sets the names of the samples and colors. It also makes separate data sets for:
## Reread the sample sheet because I am fiddling with other possible surrogates (like strain)
## In fact, copy it to a separate sheet because these samples are a mess
hs_infection <- subset_expt(hs_expt, subset="experimentname=='infection'")
## There were 42, now there are 15 samples.
chosen_colors <- c("#009900","#990000", "#000099")
names(chosen_colors) <- c("uninf","chr","sh")
hs_uninf <- set_expt_colors(hs_infection, colors=chosen_colors)
## The new colors are a character, changing according to condition.
## There were 15, now there are 12 samples.
uninf_newnames <- paste0(pData(hs_uninf)$label, "_", pData(hs_uninf)$donor)
hs_uninf <- set_expt_samplenames(hs_uninf, newnames=uninf_newnames)
inf_newnames <- paste0(pData(hs_inf)$label, "_", pData(hs_inf)$donor)
hs_inf <- set_expt_samplenames(hs_inf, newnames=inf_newnames)
hs_cds_infection <- subset_expt(hs_cds_expt, subset="experimentname=='infection'")
## There were 42, now there are 15 samples.
## The new colors are a character, changing according to condition.
## There were 15, now there are 12 samples.
hs_cds_uninf <- set_expt_samplenames(hs_cds_uninf, newnames=uninf_newnames)
hs_cds_inf <- set_expt_samplenames(hs_cds_inf, newnames=inf_newnames)
##infection_model_test <- model_test(hs_cds_infection$design)
hs_cds_removeboth <- subset_expt(hs_cds_uninf, subset="pathogenstrain!='s2272'")
## There were 15, now there are 15 samples.
## There were 15, now there are 15 samples.
The following creates metric plots of the raw data.
Now let us visualize some of these metrics for the data set of all features including the uninfected samples.
Start with the relative library sizes. Note that this includes all feature types.
The picture is slightly different if we only look at coding sequences.
Look at the density of counts / feature for all samples. Use density plots and boxplots to view this information.
Now let us look at how the samples relate to each other via pairwise correlation heatmaps. Once again, show this first for all features, then only cds features.
Having looked at these metrics, now let us write out the results in 4 excel workbooks, representing the same 4 data sets.
excel_file <- glue::glue("excel/{rundate}_hs_data-infection_with_uninf-v{ver}.xlsx")
hs_uninf_data <- sm(write_expt(
hs_uninf, norm="quant", violin=FALSE, convert="cpm",
transform="log2", batch="pca", filter=TRUE,
excel=excel_file))
excel_file <- glue::glue("excel/{rundate}_hs_data-infection_no_uninf-v{ver}.xlsx")
hs_inf_data <- sm(write_expt(
hs_inf, norm="quant", violin=FALSE, convert="cpm",
transform="log2", batch="pca", filter=TRUE,
excel=excel_file))
excel_file <- glue::glue("excel/{rundate}_hs_cds_data-infection_with_uninf-v{ver}.xlsx")
hs_cds_uninf_data <- sm(
write_expt(hs_cds_uninf, norm="quant", violin=FALSE, convert="cpm",
transform="log2", batch="pca", filter=TRUE,
excel=excel_file))
excel_file <- glue::glue("excel/{rundate}_hs_cds_data-infection_no_uninf-v{ver}.xlsx")
hs_cds_inf_data <- sm(
write_expt(hs_cds_inf, norm="quant", violin=FALSE, convert="cpm",
transform="log2", batch="pca", filter=TRUE,
excel=excel_file))
Now perform the ‘default’ normalization we use in the lab and look again.
hs_uninf_cqf <- sm(normalize_expt(hs_uninf, convert="cpm", filter=TRUE, norm="quant"))
hs_uninf_met <- sm(graph_metrics(hs_uninf))
hs_inf_cqf <- sm(normalize_expt(hs_inf, convert="cpm", filter=TRUE, norm="quant"))
hs_inf_cqf_met <- sm(graph_metrics(hs_inf))
Construct figure 4, this should include the following panels:
## Going to write the image to: images/figure_4a.pdf when dev.off() is called.
## png
## 2
## Going to write the image to: images/figure_4b.pdf when dev.off() is called.
## png
## 2
write.csv(hs_uninf_data$raw_scaled_pca_table, file="images/figure_4b.csv")
pp(file="images/figure_4c.pdf")
## Going to write the image to: images/figure_4c.pdf when dev.off() is called.
## png
## 2
write.csv(hs_inf_data$raw_scaled_pca_table, file="images/figure_4c.csv")
pp(file="images/figure_4d.pdf")
## Going to write the image to: images/figure_4d.pdf when dev.off() is called.
## png
## 2
## Going to write the image to: images/figure_4e.pdf when dev.off() is called.
## png
## 2
## Going to write the image to: images/figure_4a_cds.pdf when dev.off() is called.
## png
## 2
## Going to write the image to: images/figure_4b_cds.pdf when dev.off() is called.
## png
## 2
write.csv(hs_cds_uninf_data$raw_scaled_pca_table, file="images/figure_4b_cds.csv")
pp(file="images/figure_4c_cds.pdf")
## Going to write the image to: images/figure_4c_cds.pdf when dev.off() is called.
## png
## 2
write.csv(hs_cds_inf_data$raw_scaled_pca_table, file="images/figure_4c_cds.csv")
pp(file="images/figure_4d_cds.pdf")
## Going to write the image to: images/figure_4d_cds.pdf when dev.off() is called.
## png
## 2
## Going to write the image to: images/figure_4e_cds.pdf when dev.off() is called.
## png
## 2
Now let us try a few different ways of dealing with the batch effects/surrogate variables. In each case, I will use a PCA plot to see how the method changes the sample clustering.
In this first iteration, we will log2(cpm(quant(filter()))) the data and leave the experimental parameters as the default: condition == the 6 strains, 3 chronic 3 self-healing batch == the three patients p107
## Start with the non-uninfected, no batch correction
hs_inf_lqcf <- sm(normalize_expt(hs_inf, filter=TRUE, convert="cpm",
transform="log2", norm="quant"))
hs_pca_inf_lqcf <- plot_pca(hs_inf_lqcf)
hs_pca_inf_lqcf$plot
## 3 three patients are super obvious I think
## The patients 107/108 are on the left while 110 is on the right.
knitr::kable(hs_pca_inf_lqcf$table)
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | pc_4 | pc_5 | pc_6 | pc_7 | pc_8 | pc_9 | pc_10 | pc_11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
chr_5430_d108 | HPGL0631 | chr | d108 | 1 | #990000 | HPGL0631 | -0.1946 | -0.4136 | -0.1946 | -0.4136 | 0.0541 | -0.3363 | 0.0612 | -0.0650 | 0.6781 | 0.0233 | -0.3162 | -0.0772 | -0.1322 |
chr_5397_d108 | HPGL0632 | chr | d108 | 1 | #990000 | HPGL0632 | -0.1647 | -0.3877 | -0.1647 | -0.3877 | 0.0073 | -0.1022 | 0.2906 | -0.3099 | -0.6296 | 0.3302 | -0.1880 | -0.0769 | -0.0406 |
sh_1022_d108 | HPGL0635 | sh | d108 | 1 | #000099 | HPGL0635 | -0.0715 | -0.3280 | -0.0715 | -0.3280 | 0.0733 | 0.0264 | 0.0369 | 0.7699 | -0.2015 | -0.3288 | 0.0797 | -0.1455 | 0.1660 |
sh_2189_d108 | HPGL0636 | sh | d108 | 1 | #000099 | HPGL0636 | -0.1185 | -0.3566 | -0.1185 | -0.3566 | 0.2201 | 0.4665 | -0.4034 | -0.2463 | 0.0875 | 0.0162 | 0.4298 | 0.3056 | 0.0033 |
chr_5430_d110 | HPGL0651 | chr | d110 | 2 | #990000 | HPGL0651 | 0.3558 | -0.0317 | 0.3558 | -0.0317 | -0.3547 | -0.0975 | -0.2049 | -0.2931 | 0.0042 | -0.2284 | -0.1096 | -0.1164 | 0.6694 |
chr_5397_d110 | HPGL0652 | chr | d110 | 2 | #990000 | HPGL0652 | 0.3688 | -0.0053 | 0.3688 | -0.0053 | -0.5102 | 0.2696 | 0.0622 | 0.1699 | 0.0059 | 0.0303 | -0.2632 | 0.4104 | -0.4198 |
sh_1022_d110 | HPGL0655 | sh | d110 | 2 | #000099 | HPGL0655 | 0.4480 | 0.1721 | 0.4480 | 0.1721 | 0.3911 | -0.2974 | 0.1033 | 0.2004 | 0.0777 | 0.5080 | 0.1950 | 0.2466 | 0.1765 |
sh_2189_d110 | HPGL0656 | sh | d110 | 2 | #000099 | HPGL0656 | 0.4163 | 0.1035 | 0.4163 | 0.1035 | 0.2617 | 0.0940 | 0.1395 | -0.2234 | -0.0010 | -0.3137 | 0.1691 | -0.5347 | -0.4159 |
chr_5430_d107 | HPGL0658 | chr | d107 | 3 | #990000 | HPGL0658 | -0.3063 | 0.2202 | -0.3063 | 0.2202 | -0.4010 | -0.4792 | 0.0247 | -0.0537 | -0.0449 | -0.1202 | 0.5672 | 0.0985 | -0.1803 |
chr_5397_d107 | HPGL0659 | chr | d107 | 3 | #990000 | HPGL0659 | -0.2846 | 0.3036 | -0.2846 | 0.3036 | -0.2250 | 0.4529 | 0.1458 | 0.1173 | 0.2127 | 0.4435 | 0.0478 | -0.4246 | 0.1679 |
sh_1022_d107 | HPGL0662 | sh | d107 | 3 | #000099 | HPGL0662 | -0.1810 | 0.3521 | -0.1810 | 0.3521 | 0.1810 | -0.1465 | -0.6834 | 0.0734 | -0.2050 | 0.0394 | -0.3916 | -0.0726 | -0.1761 |
sh_2189_d107 | HPGL0663 | sh | d107 | 3 | #000099 | HPGL0663 | -0.2677 | 0.3714 | -0.2677 | 0.3714 | 0.3023 | 0.1498 | 0.4275 | -0.1392 | 0.0157 | -0.3998 | -0.2199 | 0.3867 | 0.1819 |
## NULL
write.csv(hs_pca_inf_lqcf$pcatable, file="csv/infection_nouninfected_no_batch.csv")
## This shows clean sehumantion by patient
## Therefore we will now add patient as a surrogate variable and minimize it
hscds_inf_lqcf <- sm(normalize_expt(hs_cds_inf, filter=TRUE, convert="cpm",
transform="log2", norm="quant"))
hscds_pca_inf_lqcf <- plot_pca(hscds_inf_lqcf)
hscds_pca_inf_lqcf$plot
For the second iteration, use the same normalization, but add a combat correction in an attempt to minimize patient’s effect in the variance.
## Here the split is semi chronic/self-healing, but not quite
hs_pca_inf_lqcf_cbdonor <- plot_pca(hs_inf_lqcf_cbdonor)
hs_pca_inf_lqcf_cbdonor$plot
## There are 2 sh and 1 chr on the right vs. 2 chr and 1 sh on the left
knitr::kable(hs_pca_inf_lqcf_cbdonor$table)
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | pc_4 | pc_5 | pc_6 | pc_7 | pc_8 | pc_9 | pc_10 | pc_11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
chr_5430_d108 | HPGL0631 | chr | d108 | 1 | #990000 | HPGL0631 | -0.2462 | 0.0257 | -0.2462 | 0.0257 | 0.1865 | -0.1959 | 0.2256 | 0.6612 | 0.1487 | 0.2002 | 0.4031 | 0.0680 | -0.2548 |
chr_5397_d108 | HPGL0632 | chr | d108 | 1 | #990000 | HPGL0632 | -0.3142 | -0.0198 | -0.3142 | -0.0198 | 0.3953 | 0.3625 | 0.0358 | -0.3706 | -0.3146 | -0.1907 | 0.0704 | 0.0579 | -0.4976 |
sh_1022_d108 | HPGL0635 | sh | d108 | 1 | #000099 | HPGL0635 | 0.5957 | 0.1158 | 0.5957 | 0.1158 | -0.1126 | 0.1306 | -0.1458 | -0.1759 | 0.4008 | 0.3148 | 0.0402 | 0.0302 | -0.4520 |
sh_2189_d108 | HPGL0636 | sh | d108 | 1 | #000099 | HPGL0636 | -0.1502 | -0.1034 | -0.1502 | -0.1034 | -0.5308 | -0.4299 | -0.2312 | 0.0854 | -0.2249 | -0.2088 | -0.3310 | 0.1644 | -0.3541 |
chr_5430_d110 | HPGL0651 | chr | d110 | 2 | #990000 | HPGL0651 | -0.1561 | 0.4030 | -0.1561 | 0.4030 | 0.1621 | -0.3548 | -0.4693 | -0.3074 | 0.0570 | 0.0426 | 0.3254 | 0.2033 | 0.3328 |
chr_5397_d110 | HPGL0652 | chr | d110 | 2 | #990000 | HPGL0652 | -0.0823 | 0.3519 | -0.0823 | 0.3519 | -0.2437 | 0.0900 | 0.6687 | -0.1802 | 0.0272 | 0.1109 | -0.1935 | 0.3764 | 0.2164 |
sh_1022_d110 | HPGL0655 | sh | d110 | 2 | #000099 | HPGL0655 | 0.1949 | -0.4252 | 0.1949 | -0.4252 | 0.1421 | 0.0286 | 0.0532 | 0.0336 | 0.3895 | -0.6257 | 0.0529 | 0.2889 | 0.2085 |
sh_2189_d110 | HPGL0656 | sh | d110 | 2 | #000099 | HPGL0656 | 0.1502 | -0.4115 | 0.1502 | -0.4115 | -0.0459 | 0.3036 | -0.2129 | 0.1555 | -0.4462 | 0.4199 | 0.0421 | 0.3014 | 0.3050 |
chr_5430_d107 | HPGL0658 | chr | d107 | 3 | #990000 | HPGL0658 | 0.0856 | 0.2708 | 0.0856 | 0.2708 | 0.4538 | 0.0315 | -0.1486 | 0.3083 | 0.0120 | -0.0070 | -0.6816 | -0.1784 | 0.1238 |
chr_5397_d107 | HPGL0659 | chr | d107 | 3 | #990000 | HPGL0659 | 0.1471 | 0.3391 | 0.1471 | 0.3391 | -0.3582 | 0.3540 | -0.0513 | 0.2147 | -0.2142 | -0.3696 | 0.2998 | -0.4308 | 0.1401 |
sh_1022_d107 | HPGL0662 | sh | d107 | 3 | #000099 | HPGL0662 | 0.2863 | -0.2508 | 0.2863 | -0.2508 | 0.1643 | -0.4946 | 0.3557 | -0.2555 | -0.2735 | 0.0754 | 0.0827 | -0.4625 | 0.0847 |
sh_2189_d107 | HPGL0663 | sh | d107 | 3 | #000099 | HPGL0663 | -0.5106 | -0.2955 | -0.5106 | -0.2955 | -0.2130 | 0.1745 | -0.0798 | -0.1692 | 0.4382 | 0.2379 | -0.1104 | -0.4188 | 0.1472 |
## NULL
hs_inf_pcainfo <- pca_information(hs_inf, plot_pcas=TRUE,
expt_factors=c("condition", "batch", "pathogenstrain",
"state", "donor", "rnangul"))
## More shallow curves in these plots suggest more genes in this principle component.
Look for significant correlations between the PCs and some factors in the experimental design.
Here we will set the batch to the humansite strains and condition to a combination of the patient and state state; then perform the pca again.
new_condition <- paste0(hs_inf$design$state, '_', hs_inf$design$donor)
hs_inf_strbatch <- set_expt_factors(hs_inf, batch="pathogenstrain", condition=new_condition)
hs_inf_lqcf_cbstr <- sm(normalize_expt(hs_inf_strbatch, transform="log2", convert="cpm",
norm="quant", filter=TRUE, batch="combat_scale"))
## Doing that kind of sucked the variance out of the data, but it did cause the
## samples to split by strain quite strongly
knitr::kable(hs_inf_lqcf_cbstr_pca$table)
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | pc_4 | pc_5 | pc_6 | pc_7 | pc_8 | pc_9 | pc_10 | pc_11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
chr_5430_d108 | HPGL0631 | chronic_d108 | s5430 | 4 | #1B9E77 | HPGL0631 | -0.2929 | -0.4076 | -0.2929 | -0.4076 | 0.2051 | -0.0961 | 0.4480 | -0.2779 | 0.3533 | -0.1624 | -0.0594 | 0.2949 | 0.3063 |
chr_5397_d108 | HPGL0632 | chronic_d108 | s5397 | 3 | #1B9E77 | HPGL0632 | -0.2858 | -0.3945 | -0.2858 | -0.3945 | 0.3930 | 0.4189 | -0.3229 | 0.2126 | -0.0661 | -0.0291 | -0.1571 | -0.1147 | -0.3960 |
sh_1022_d108 | HPGL0635 | self_heal_d108 | s1022 | 1 | #D95F02 | HPGL0635 | 0.2191 | -0.3901 | 0.2191 | -0.3901 | -0.1488 | -0.5389 | -0.1717 | 0.2854 | -0.1851 | -0.2584 | 0.3908 | 0.1482 | -0.1315 |
sh_2189_d108 | HPGL0636 | self_heal_d108 | s2189 | 2 | #D95F02 | HPGL0636 | 0.2207 | -0.4311 | 0.2207 | -0.4311 | -0.4510 | 0.1529 | 0.0484 | -0.1778 | -0.1796 | 0.4539 | -0.1637 | -0.3303 | 0.2172 |
chr_5430_d110 | HPGL0651 | chronic_d110 | s5430 | 4 | #7570B3 | HPGL0651 | -0.0516 | 0.2144 | -0.0516 | 0.2144 | -0.3083 | 0.2699 | 0.0529 | 0.5827 | -0.0493 | -0.0029 | -0.2433 | 0.4844 | 0.2480 |
chr_5397_d110 | HPGL0652 | chronic_d110 | s5397 | 3 | #7570B3 | HPGL0652 | -0.0545 | 0.1919 | -0.0545 | 0.1919 | -0.3222 | -0.2961 | -0.2213 | -0.1994 | 0.4899 | 0.0741 | -0.3700 | 0.0605 | -0.4589 |
sh_1022_d110 | HPGL0655 | self_heal_d110 | s1022 | 1 | #E7298A | HPGL0655 | 0.4316 | 0.1721 | 0.4316 | 0.1721 | 0.4199 | 0.0164 | 0.1859 | -0.1394 | -0.1267 | 0.4900 | 0.1234 | 0.3771 | -0.2381 |
sh_2189_d110 | HPGL0656 | self_heal_d110 | s2189 | 2 | #E7298A | HPGL0656 | 0.4457 | 0.1653 | 0.4457 | 0.1653 | 0.1626 | 0.0565 | -0.0388 | -0.2311 | -0.2265 | -0.5927 | -0.4053 | -0.1435 | 0.1366 |
chr_5430_d107 | HPGL0658 | chronic_d107 | s5430 | 4 | #66A61E | HPGL0658 | -0.4045 | 0.2501 | -0.4045 | 0.2501 | 0.0814 | -0.1468 | -0.5294 | -0.3207 | -0.2783 | 0.1437 | 0.1483 | 0.1007 | 0.3860 |
chr_5397_d107 | HPGL0659 | chronic_d107 | s5397 | 3 | #66A61E | HPGL0659 | -0.4070 | 0.2677 | -0.4070 | 0.2677 | -0.1140 | -0.1269 | 0.5427 | 0.0227 | -0.4199 | -0.0567 | 0.0534 | -0.2820 | -0.3055 |
sh_1022_d107 | HPGL0662 | self_heal_d107 | s1022 | 1 | #E6AB02 | HPGL0662 | 0.0958 | 0.1625 | 0.0958 | 0.1625 | -0.2358 | 0.5041 | -0.0136 | -0.1782 | 0.3004 | -0.2265 | 0.6231 | -0.0732 | -0.0654 |
sh_2189_d107 | HPGL0663 | self_heal_d107 | s2189 | 2 | #E6AB02 | HPGL0663 | 0.0833 | 0.1993 | 0.0833 | 0.1993 | 0.3181 | -0.2140 | 0.0198 | 0.4210 | 0.3878 | 0.1670 | 0.0598 | -0.5221 | 0.3014 |
## NULL
Now change only the condition to self/chronic and make super-explicit the split in the samples.
hs_inf_lqcf_cbstrv2 <- set_expt_conditions(hs_inf_lqcf_cbstr, fact="state")
hs_inf_lqcf_cbstrv2 <- set_expt_colors(hs_inf_lqcf_cbstrv2, colors=c("#880000","#000088"))
## The new colors are a character, changing according to condition.
## Thus 3 runs of chronic on the right and self-state on the left
knitr::kable(hs_inf_lqcf_cbstrv2_pca$table)
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | pc_4 | pc_5 | pc_6 | pc_7 | pc_8 | pc_9 | pc_10 | pc_11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
chr_5430_d108 | HPGL0631 | chronic | s5430 | 4 | #880000 | HPGL0631 | -0.2929 | -0.4076 | -0.2929 | -0.4076 | 0.2051 | -0.0961 | 0.4480 | -0.2779 | 0.3533 | -0.1624 | -0.0594 | 0.2949 | 0.3063 |
chr_5397_d108 | HPGL0632 | chronic | s5397 | 3 | #880000 | HPGL0632 | -0.2858 | -0.3945 | -0.2858 | -0.3945 | 0.3930 | 0.4189 | -0.3229 | 0.2126 | -0.0661 | -0.0291 | -0.1571 | -0.1147 | -0.3960 |
sh_1022_d108 | HPGL0635 | self_heal | s1022 | 1 | #000088 | HPGL0635 | 0.2191 | -0.3901 | 0.2191 | -0.3901 | -0.1488 | -0.5389 | -0.1717 | 0.2854 | -0.1851 | -0.2584 | 0.3908 | 0.1482 | -0.1315 |
sh_2189_d108 | HPGL0636 | self_heal | s2189 | 2 | #000088 | HPGL0636 | 0.2207 | -0.4311 | 0.2207 | -0.4311 | -0.4510 | 0.1529 | 0.0484 | -0.1778 | -0.1796 | 0.4539 | -0.1637 | -0.3303 | 0.2172 |
chr_5430_d110 | HPGL0651 | chronic | s5430 | 4 | #880000 | HPGL0651 | -0.0516 | 0.2144 | -0.0516 | 0.2144 | -0.3083 | 0.2699 | 0.0529 | 0.5827 | -0.0493 | -0.0029 | -0.2433 | 0.4844 | 0.2480 |
chr_5397_d110 | HPGL0652 | chronic | s5397 | 3 | #880000 | HPGL0652 | -0.0545 | 0.1919 | -0.0545 | 0.1919 | -0.3222 | -0.2961 | -0.2213 | -0.1994 | 0.4899 | 0.0741 | -0.3700 | 0.0605 | -0.4589 |
sh_1022_d110 | HPGL0655 | self_heal | s1022 | 1 | #000088 | HPGL0655 | 0.4316 | 0.1721 | 0.4316 | 0.1721 | 0.4199 | 0.0164 | 0.1859 | -0.1394 | -0.1267 | 0.4900 | 0.1234 | 0.3771 | -0.2381 |
sh_2189_d110 | HPGL0656 | self_heal | s2189 | 2 | #000088 | HPGL0656 | 0.4457 | 0.1653 | 0.4457 | 0.1653 | 0.1626 | 0.0565 | -0.0388 | -0.2311 | -0.2265 | -0.5927 | -0.4053 | -0.1435 | 0.1366 |
chr_5430_d107 | HPGL0658 | chronic | s5430 | 4 | #880000 | HPGL0658 | -0.4045 | 0.2501 | -0.4045 | 0.2501 | 0.0814 | -0.1468 | -0.5294 | -0.3207 | -0.2783 | 0.1437 | 0.1483 | 0.1007 | 0.3860 |
chr_5397_d107 | HPGL0659 | chronic | s5397 | 3 | #880000 | HPGL0659 | -0.4070 | 0.2677 | -0.4070 | 0.2677 | -0.1140 | -0.1269 | 0.5427 | 0.0227 | -0.4199 | -0.0567 | 0.0534 | -0.2820 | -0.3055 |
sh_1022_d107 | HPGL0662 | self_heal | s1022 | 1 | #000088 | HPGL0662 | 0.0958 | 0.1625 | 0.0958 | 0.1625 | -0.2358 | 0.5041 | -0.0136 | -0.1782 | 0.3004 | -0.2265 | 0.6231 | -0.0732 | -0.0654 |
sh_2189_d107 | HPGL0663 | self_heal | s2189 | 2 | #000088 | HPGL0663 | 0.0833 | 0.1993 | 0.0833 | 0.1993 | 0.3181 | -0.2140 | 0.0198 | 0.4210 | 0.3878 | 0.1670 | 0.0598 | -0.5221 | 0.3014 |
For the next few blocks we will just repeat what we did but include the uninfected samples. Ideally doing so will have ~0 effect on the positions of the sample types.
In this first example, we see why the uninfected samples were initially removed from the analyses I think.
## Start with the non-uninfected, no batch correction
hs_uninf_lqcf <- sm(normalize_expt(hs_uninf, filter=TRUE, convert="cpm",
transform="log2", norm="quant"))
hs_uninf_lqcf_pca <- plot_pca(hs_uninf_lqcf)
hs_uninf_lqcf_pca$plot
## In this case, the uninfected samples cause the p107/p108 samples to smoosh together
knitr::kable(hs_uninf_lqcf_pca$table)
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | pc_4 | pc_5 | pc_6 | pc_7 | pc_8 | pc_9 | pc_10 | pc_11 | pc_12 | pc_13 | pc_14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uninf_d108 | HPGL0630 | uninf | d108 | 1 | #009900 | HPGL0630 | -0.4808 | -0.0581 | -0.4808 | -0.0581 | 0.2830 | -0.0822 | 0.0972 | 0.4322 | 0.0573 | -0.2898 | 0.3568 | -0.3745 | 0.0606 | -0.0351 | 0.2364 | -0.0068 |
chr_5430_d108 | HPGL0631 | chr | d108 | 1 | #990000 | HPGL0631 | 0.1268 | -0.1719 | 0.1268 | -0.1719 | 0.3658 | -0.1102 | -0.2859 | -0.3985 | -0.1112 | 0.2565 | -0.1933 | -0.4740 | 0.0317 | -0.3750 | 0.1198 | -0.0709 |
chr_5397_d108 | HPGL0632 | chr | d108 | 1 | #990000 | HPGL0632 | 0.1225 | -0.1421 | 0.1225 | -0.1421 | 0.3544 | -0.0270 | -0.0724 | -0.2746 | 0.2023 | -0.4254 | 0.2835 | 0.5003 | -0.2771 | -0.2076 | -0.1358 | -0.0188 |
sh_1022_d108 | HPGL0635 | sh | d108 | 1 | #000099 | HPGL0635 | 0.1005 | -0.0565 | 0.1005 | -0.0565 | 0.2971 | -0.1041 | 0.0258 | 0.2465 | -0.6886 | -0.0083 | -0.1080 | 0.2358 | 0.3126 | 0.1501 | -0.3074 | 0.0573 |
sh_2189_d108 | HPGL0636 | sh | d108 | 1 | #000099 | HPGL0636 | 0.1783 | -0.0981 | 0.1783 | -0.0981 | 0.3118 | -0.2882 | 0.3135 | 0.1240 | 0.3946 | 0.3075 | -0.2697 | 0.0682 | -0.1679 | 0.4715 | 0.1377 | 0.0294 |
uninf_d110 | HPGL0650 | uninf | d110 | 2 | #009900 | HPGL0650 | -0.5435 | 0.1390 | -0.5435 | 0.1390 | 0.1209 | 0.5233 | 0.1572 | -0.2518 | 0.0373 | 0.4247 | 0.0179 | 0.2065 | 0.0513 | 0.0206 | -0.1187 | -0.0142 |
chr_5430_d110 | HPGL0651 | chr | d110 | 2 | #990000 | HPGL0651 | 0.1728 | 0.3580 | 0.1728 | 0.3580 | -0.0067 | 0.1093 | -0.3180 | 0.1165 | 0.3331 | -0.0486 | -0.0285 | -0.0627 | 0.3014 | -0.0272 | -0.1382 | 0.6479 |
chr_5397_d110 | HPGL0652 | chr | d110 | 2 | #990000 | HPGL0652 | 0.1877 | 0.3739 | 0.1877 | 0.3739 | -0.0144 | 0.3186 | -0.0344 | 0.2669 | -0.1068 | -0.1563 | -0.3637 | 0.1722 | -0.1489 | -0.2022 | 0.5024 | -0.2666 |
sh_1022_d110 | HPGL0655 | sh | d110 | 2 | #000099 | HPGL0655 | 0.0426 | 0.4316 | 0.0426 | 0.4316 | -0.2017 | -0.2693 | 0.0019 | -0.2458 | -0.3528 | 0.1399 | 0.4062 | -0.0744 | -0.3944 | 0.2110 | 0.1706 | 0.1670 |
sh_2189_d110 | HPGL0656 | sh | d110 | 2 | #000099 | HPGL0656 | 0.0658 | 0.4049 | 0.0658 | 0.4049 | -0.1323 | -0.1789 | 0.2128 | -0.1242 | 0.1989 | -0.1803 | -0.0356 | -0.2178 | 0.2292 | -0.0048 | -0.4484 | -0.5296 |
uninf_d107 | HPGL0657 | uninf | d107 | 3 | #009900 | HPGL0657 | -0.5041 | -0.1422 | -0.5041 | -0.1422 | -0.3566 | -0.3726 | -0.2693 | -0.0360 | -0.0003 | -0.1617 | -0.4815 | 0.1429 | -0.1671 | -0.0184 | -0.0709 | 0.0862 |
chr_5430_d107 | HPGL0658 | chr | d107 | 3 | #990000 | HPGL0658 | 0.1272 | -0.3002 | 0.1272 | -0.3002 | -0.1712 | 0.3419 | -0.4993 | -0.0108 | 0.0299 | -0.0769 | 0.1489 | -0.1223 | 0.0134 | 0.5546 | 0.0025 | -0.2823 |
chr_5397_d107 | HPGL0659 | chr | d107 | 3 | #990000 | HPGL0659 | 0.1868 | -0.2754 | 0.1868 | -0.2754 | -0.2282 | 0.3199 | 0.3630 | 0.1632 | -0.0839 | -0.0618 | -0.0887 | -0.3077 | -0.4432 | -0.1728 | -0.3479 | 0.2215 |
sh_1022_d107 | HPGL0662 | sh | d107 | 3 | #000099 | HPGL0662 | 0.1232 | -0.1913 | 0.1232 | -0.1913 | -0.3135 | -0.1911 | -0.1024 | 0.3503 | 0.1279 | 0.4868 | 0.3290 | 0.2485 | 0.1181 | -0.3814 | 0.0029 | -0.1753 |
sh_2189_d107 | HPGL0663 | sh | d107 | 3 | #000099 | HPGL0663 | 0.0944 | -0.2716 | 0.0944 | -0.2716 | -0.3084 | 0.0105 | 0.4103 | -0.3578 | -0.0378 | -0.2063 | 0.0267 | 0.0591 | 0.4800 | 0.0168 | 0.3951 | 0.1552 |
## NULL
For the second iteration, use the same normalization, but add a combat correction in an attempt to minimize patient’s effect in the variance.
## Here the split is semi chronic/self-state, but not quite
hs_uninf_lqcf_cbdonor_pca <- plot_pca(hs_uninf_lqcf_cbdonor)
hs_uninf_lqcf_cbdonor_pca$plot
## Now we have weak sehumantion between strains, I thought for a moment it might
## be few snps vs. many but that is not true.
## There are 2 sh and 1 chr on the right vs. 2 chr and 1 sh on the left
knitr::kable(hs_uninf_lqcf_cbdonor_pca$table)
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | pc_4 | pc_5 | pc_6 | pc_7 | pc_8 | pc_9 | pc_10 | pc_11 | pc_12 | pc_13 | pc_14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uninf_d108 | HPGL0630 | uninf | d108 | 1 | #009900 | HPGL0630 | -0.4055 | 0.3147 | -0.4055 | 0.3147 | -0.2762 | 0.3615 | 0.4048 | 0.2448 | 0.1367 | 0.0012 | -0.0270 | 0.1592 | -0.2696 | 0.0276 | -0.0815 | 0.3381 |
chr_5430_d108 | HPGL0631 | chr | d108 | 1 | #990000 | HPGL0631 | 0.0373 | -0.2855 | 0.0373 | -0.2855 | 0.1307 | -0.3376 | -0.1720 | 0.0654 | -0.1626 | 0.0795 | 0.0987 | 0.3392 | -0.6808 | 0.0207 | 0.1988 | 0.1566 |
chr_5397_d108 | HPGL0632 | chr | d108 | 1 | #990000 | HPGL0632 | 0.0357 | -0.2956 | 0.0357 | -0.2956 | 0.1509 | -0.1810 | 0.3956 | -0.2690 | -0.1826 | 0.1314 | -0.2963 | -0.0154 | 0.2031 | 0.4518 | -0.3284 | 0.2616 |
sh_1022_d108 | HPGL0635 | sh | d108 | 1 | #000099 | HPGL0635 | 0.2320 | 0.2811 | 0.2320 | 0.2811 | -0.2131 | -0.0786 | -0.3089 | -0.1918 | -0.0024 | -0.1067 | 0.5664 | 0.2220 | 0.2487 | 0.1796 | -0.2064 | 0.3138 |
sh_2189_d108 | HPGL0636 | sh | d108 | 1 | #000099 | HPGL0636 | 0.0460 | -0.2250 | 0.0460 | -0.2250 | 0.1745 | 0.2832 | -0.4177 | 0.4813 | -0.1783 | -0.2195 | -0.1772 | -0.1673 | 0.0534 | -0.1394 | -0.4245 | 0.1480 |
uninf_d110 | HPGL0650 | uninf | d110 | 2 | #009900 | HPGL0650 | -0.5967 | -0.1876 | -0.5967 | -0.1876 | -0.3528 | -0.1122 | -0.2258 | -0.0663 | -0.1406 | 0.1834 | 0.0931 | 0.0257 | 0.1080 | 0.0864 | -0.1530 | -0.4944 |
chr_5430_d110 | HPGL0651 | chr | d110 | 2 | #990000 | HPGL0651 | 0.1846 | -0.1379 | 0.1846 | -0.1379 | 0.0403 | -0.1131 | 0.3682 | 0.4529 | 0.1178 | -0.4192 | 0.2550 | 0.0662 | 0.1445 | 0.2599 | 0.1765 | -0.3828 |
chr_5397_d110 | HPGL0652 | chr | d110 | 2 | #990000 | HPGL0652 | 0.2104 | -0.1496 | 0.2104 | -0.1496 | 0.0771 | 0.0995 | -0.0432 | -0.1570 | 0.7703 | 0.2134 | -0.0883 | 0.1705 | -0.0861 | -0.1493 | -0.2763 | -0.2062 |
sh_1022_d110 | HPGL0655 | sh | d110 | 2 | #000099 | HPGL0655 | 0.1474 | 0.3690 | 0.1474 | 0.3690 | 0.2176 | 0.0457 | 0.0792 | -0.0813 | -0.1376 | 0.2117 | 0.2250 | -0.5998 | -0.3671 | 0.1195 | -0.1566 | -0.2559 |
sh_2189_d110 | HPGL0656 | sh | d110 | 2 | #000099 | HPGL0656 | 0.1403 | 0.3207 | 0.1403 | 0.3207 | 0.1936 | 0.2314 | 0.0632 | -0.2624 | -0.3857 | -0.1588 | -0.2646 | 0.4844 | 0.0429 | -0.2207 | -0.0015 | -0.3431 |
uninf_d107 | HPGL0657 | uninf | d107 | 3 | #009900 | HPGL0657 | -0.4669 | 0.2231 | -0.4669 | 0.2231 | 0.4933 | -0.2653 | -0.1629 | -0.1522 | 0.2713 | -0.3209 | -0.0893 | -0.1390 | 0.1478 | -0.0328 | 0.2283 | 0.1523 |
chr_5430_d107 | HPGL0658 | chr | d107 | 3 | #990000 | HPGL0658 | 0.1330 | -0.0232 | 0.1330 | -0.0232 | -0.3752 | -0.4353 | 0.2401 | -0.0628 | -0.0646 | -0.1636 | -0.0829 | -0.2441 | 0.0353 | -0.6396 | -0.0771 | 0.0977 |
chr_5397_d107 | HPGL0659 | chr | d107 | 3 | #990000 | HPGL0659 | 0.1876 | -0.1059 | 0.1876 | -0.1059 | -0.4099 | 0.2759 | -0.2346 | -0.2685 | 0.0701 | -0.2620 | -0.3269 | -0.2547 | -0.1144 | 0.2846 | 0.4192 | 0.0248 |
sh_1022_d107 | HPGL0662 | sh | d107 | 3 | #000099 | HPGL0662 | 0.1663 | 0.2829 | 0.1663 | 0.2829 | -0.0433 | -0.1901 | -0.1382 | 0.4078 | -0.0008 | 0.5716 | -0.2740 | 0.0422 | 0.3027 | 0.0502 | 0.3197 | 0.0481 |
sh_2189_d107 | HPGL0663 | sh | d107 | 3 | #000099 | HPGL0663 | -0.0516 | -0.3812 | -0.0516 | -0.3812 | 0.1925 | 0.4162 | 0.1522 | -0.1410 | -0.1110 | 0.2586 | 0.3881 | -0.0892 | 0.2315 | -0.2984 | 0.3630 | 0.1413 |
## NULL
Including the uninfected samples and changing the condition should not much matter
## Here we will set the batch to the humansite strains and condition to a
## combination of the patient and state state; then perform the pca.
new_condition <- paste0(hs_uninf$design$state, '_', hs_uninf$design$donor)
hs_uninfv2 <- set_expt_factors(hs_uninf, condition=new_condition, batch="pathogenstrain")
hs_uninfv2_lqcf_cbstr <- sm(normalize_expt(hs_uninfv2, transform="log2", convert="cpm",
norm="quant", filter=TRUE, batch="combat_scale"))
## This is a surprise to me, I would have expected the uninfected to still push
## the other samples off to a side or somesuch.
knitr::kable(hs_uninfv2_lqcf_cbstr_pca$table)
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | pc_4 | pc_5 | pc_6 | pc_7 | pc_8 | pc_9 | pc_10 | pc_11 | pc_12 | pc_13 | pc_14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uninf_d108 | HPGL0630 | uninfected_d108 | none | 1 | #1B9E77 | HPGL0630 | -0.3977 | -0.2732 | -0.3977 | -0.2732 | 0.0594 | 0.2258 | 0.1614 | -0.0158 | -0.5708 | 0.0970 | 0.3152 | 0.2646 | 0.2362 | -0.2071 | 0.1282 | 0.0006 |
chr_5430_d108 | HPGL0631 | chronic_d108 | s5430 | 5 | #C16610 | HPGL0631 | 0.2767 | -0.3763 | 0.2767 | -0.3763 | 0.1145 | -0.2778 | -0.4681 | -0.0498 | 0.1920 | 0.0731 | 0.3974 | 0.0295 | 0.1246 | -0.1213 | -0.1097 | 0.4000 |
chr_5397_d108 | HPGL0632 | chronic_d108 | s5397 | 4 | #C16610 | HPGL0632 | 0.2734 | -0.3742 | 0.2734 | -0.3742 | 0.1964 | -0.4194 | 0.4514 | 0.2620 | -0.0190 | -0.1366 | -0.0976 | -0.1095 | 0.0584 | -0.0684 | -0.0886 | -0.4186 |
sh_1022_d108 | HPGL0635 | self_heal_d108 | s1022 | 2 | #8D6B86 | HPGL0635 | -0.0432 | -0.3499 | -0.0432 | -0.3499 | -0.1155 | 0.1463 | -0.0384 | -0.5311 | -0.1196 | -0.1886 | -0.1741 | -0.3595 | -0.5260 | -0.0620 | 0.0283 | 0.0017 |
sh_2189_d108 | HPGL0636 | self_heal_d108 | s2189 | 3 | #8D6B86 | HPGL0636 | -0.0424 | -0.3784 | -0.0424 | -0.3784 | 0.1347 | 0.3986 | -0.1055 | 0.1645 | 0.2615 | 0.1865 | -0.4337 | 0.2548 | 0.0972 | 0.4529 | 0.0468 | 0.0143 |
uninf_d110 | HPGL0650 | uninfected_d110 | none | 1 | #BC4399 | HPGL0650 | -0.4743 | -0.0555 | -0.4743 | -0.0555 | -0.6450 | -0.1173 | -0.0069 | 0.2411 | 0.3250 | -0.0268 | -0.0421 | -0.1557 | 0.1342 | -0.1698 | -0.1949 | -0.0103 |
chr_5430_d110 | HPGL0651 | chronic_d110 | s5430 | 5 | #A66753 | HPGL0651 | 0.1216 | 0.2363 | 0.1216 | 0.2363 | 0.2013 | 0.2959 | 0.0426 | 0.2285 | -0.1661 | -0.4636 | -0.2366 | -0.1518 | 0.1540 | -0.1936 | -0.3715 | 0.3999 |
chr_5397_d110 | HPGL0652 | chronic_d110 | s5397 | 4 | #A66753 | HPGL0652 | 0.1230 | 0.2186 | 0.1230 | 0.2186 | 0.0309 | 0.2709 | 0.0826 | -0.4585 | 0.3342 | -0.0778 | 0.2602 | 0.2489 | 0.1789 | -0.0557 | -0.3485 | -0.4185 |
sh_1022_d110 | HPGL0655 | self_heal_d110 | s1022 | 2 | #96A713 | HPGL0655 | -0.1764 | 0.2426 | -0.1764 | 0.2426 | 0.1340 | -0.3530 | -0.1543 | 0.0403 | -0.1479 | 0.2059 | -0.2395 | 0.4772 | -0.4686 | -0.1053 | -0.3085 | -0.0099 |
sh_2189_d110 | HPGL0656 | self_heal_d110 | s2189 | 3 | #96A713 | HPGL0656 | -0.1843 | 0.2217 | -0.1843 | 0.2217 | 0.1698 | -0.0989 | 0.0436 | 0.0025 | -0.1623 | 0.2793 | 0.2598 | -0.4807 | 0.0207 | 0.5857 | -0.2513 | 0.0098 |
uninf_d107 | HPGL0657 | uninfected_d107 | none | 1 | #D59D08 | HPGL0657 | -0.3127 | 0.2392 | -0.3127 | 0.2392 | 0.4254 | -0.2098 | -0.0729 | -0.2135 | 0.2548 | -0.0023 | -0.2297 | -0.1782 | 0.2560 | -0.2288 | 0.4849 | -0.0014 |
chr_5430_d107 | HPGL0658 | chronic_d107 | s5430 | 5 | #9D7426 | HPGL0658 | 0.3613 | 0.1646 | 0.3613 | 0.1646 | -0.3089 | -0.0443 | 0.4910 | -0.2072 | -0.0062 | 0.4147 | -0.1336 | 0.0727 | 0.0781 | -0.0375 | 0.1597 | 0.4075 |
chr_5397_d107 | HPGL0659 | chronic_d107 | s5397 | 4 | #9D7426 | HPGL0659 | 0.3638 | 0.1841 | 0.3638 | 0.1841 | -0.2330 | 0.1358 | -0.4841 | 0.1654 | -0.3187 | 0.2230 | -0.1400 | -0.1832 | 0.1213 | -0.1518 | 0.1675 | -0.4031 |
sh_1022_d107 | HPGL0662 | self_heal_d107 | s1022 | 2 | #666666 | HPGL0662 | 0.0509 | 0.1493 | 0.0509 | 0.1493 | 0.0980 | 0.2892 | 0.1331 | 0.4228 | 0.2770 | -0.0073 | 0.4084 | -0.0190 | -0.5072 | -0.1039 | 0.3267 | -0.0001 |
sh_2189_d107 | HPGL0663 | self_heal_d107 | s2189 | 3 | #666666 | HPGL0663 | 0.0603 | 0.1511 | 0.0603 | 0.1511 | -0.2620 | -0.2420 | -0.0752 | -0.0512 | -0.1339 | -0.5765 | 0.0861 | 0.2899 | 0.0422 | 0.4665 | 0.3310 | 0.0279 |
## NULL
hs_uninfv2_lqcf_cbstr <- set_expt_conditions(hs_uninfv2_lqcf_cbstr, fact="state")
hs_uninfv2_lqcf_cbstr <- set_expt_colors(hs_uninfv2_lqcf_cbstr,
colors=c("#880000","#000088","#008800"))
## The new colors are a character, changing according to condition.
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | pc_4 | pc_5 | pc_6 | pc_7 | pc_8 | pc_9 | pc_10 | pc_11 | pc_12 | pc_13 | pc_14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
uninf_d108 | HPGL0630 | uninfected | none | 1 | #008800 | HPGL0630 | -0.3977 | -0.2732 | -0.3977 | -0.2732 | 0.0594 | 0.2258 | 0.1614 | -0.0158 | -0.5708 | 0.0970 | 0.3152 | 0.2646 | 0.2362 | -0.2071 | 0.1282 | 0.0006 |
chr_5430_d108 | HPGL0631 | chronic | s5430 | 5 | #880000 | HPGL0631 | 0.2767 | -0.3763 | 0.2767 | -0.3763 | 0.1145 | -0.2778 | -0.4681 | -0.0498 | 0.1920 | 0.0731 | 0.3974 | 0.0295 | 0.1246 | -0.1213 | -0.1097 | 0.4000 |
chr_5397_d108 | HPGL0632 | chronic | s5397 | 4 | #880000 | HPGL0632 | 0.2734 | -0.3742 | 0.2734 | -0.3742 | 0.1964 | -0.4194 | 0.4514 | 0.2620 | -0.0190 | -0.1366 | -0.0976 | -0.1095 | 0.0584 | -0.0684 | -0.0886 | -0.4186 |
sh_1022_d108 | HPGL0635 | self_heal | s1022 | 2 | #000088 | HPGL0635 | -0.0432 | -0.3499 | -0.0432 | -0.3499 | -0.1155 | 0.1463 | -0.0384 | -0.5311 | -0.1196 | -0.1886 | -0.1741 | -0.3595 | -0.5260 | -0.0620 | 0.0283 | 0.0017 |
sh_2189_d108 | HPGL0636 | self_heal | s2189 | 3 | #000088 | HPGL0636 | -0.0424 | -0.3784 | -0.0424 | -0.3784 | 0.1347 | 0.3986 | -0.1055 | 0.1645 | 0.2615 | 0.1865 | -0.4337 | 0.2548 | 0.0972 | 0.4529 | 0.0468 | 0.0143 |
uninf_d110 | HPGL0650 | uninfected | none | 1 | #008800 | HPGL0650 | -0.4743 | -0.0555 | -0.4743 | -0.0555 | -0.6450 | -0.1173 | -0.0069 | 0.2411 | 0.3250 | -0.0268 | -0.0421 | -0.1557 | 0.1342 | -0.1698 | -0.1949 | -0.0103 |
chr_5430_d110 | HPGL0651 | chronic | s5430 | 5 | #880000 | HPGL0651 | 0.1216 | 0.2363 | 0.1216 | 0.2363 | 0.2013 | 0.2959 | 0.0426 | 0.2285 | -0.1661 | -0.4636 | -0.2366 | -0.1518 | 0.1540 | -0.1936 | -0.3715 | 0.3999 |
chr_5397_d110 | HPGL0652 | chronic | s5397 | 4 | #880000 | HPGL0652 | 0.1230 | 0.2186 | 0.1230 | 0.2186 | 0.0309 | 0.2709 | 0.0826 | -0.4585 | 0.3342 | -0.0778 | 0.2602 | 0.2489 | 0.1789 | -0.0557 | -0.3485 | -0.4185 |
sh_1022_d110 | HPGL0655 | self_heal | s1022 | 2 | #000088 | HPGL0655 | -0.1764 | 0.2426 | -0.1764 | 0.2426 | 0.1340 | -0.3530 | -0.1543 | 0.0403 | -0.1479 | 0.2059 | -0.2395 | 0.4772 | -0.4686 | -0.1053 | -0.3085 | -0.0099 |
sh_2189_d110 | HPGL0656 | self_heal | s2189 | 3 | #000088 | HPGL0656 | -0.1843 | 0.2217 | -0.1843 | 0.2217 | 0.1698 | -0.0989 | 0.0436 | 0.0025 | -0.1623 | 0.2793 | 0.2598 | -0.4807 | 0.0207 | 0.5857 | -0.2513 | 0.0098 |
uninf_d107 | HPGL0657 | uninfected | none | 1 | #008800 | HPGL0657 | -0.3127 | 0.2392 | -0.3127 | 0.2392 | 0.4254 | -0.2098 | -0.0729 | -0.2135 | 0.2548 | -0.0023 | -0.2297 | -0.1782 | 0.2560 | -0.2288 | 0.4849 | -0.0014 |
chr_5430_d107 | HPGL0658 | chronic | s5430 | 5 | #880000 | HPGL0658 | 0.3613 | 0.1646 | 0.3613 | 0.1646 | -0.3089 | -0.0443 | 0.4910 | -0.2072 | -0.0062 | 0.4147 | -0.1336 | 0.0727 | 0.0781 | -0.0375 | 0.1597 | 0.4075 |
chr_5397_d107 | HPGL0659 | chronic | s5397 | 4 | #880000 | HPGL0659 | 0.3638 | 0.1841 | 0.3638 | 0.1841 | -0.2330 | 0.1358 | -0.4841 | 0.1654 | -0.3187 | 0.2230 | -0.1400 | -0.1832 | 0.1213 | -0.1518 | 0.1675 | -0.4031 |
sh_1022_d107 | HPGL0662 | self_heal | s1022 | 2 | #000088 | HPGL0662 | 0.0509 | 0.1493 | 0.0509 | 0.1493 | 0.0980 | 0.2892 | 0.1331 | 0.4228 | 0.2770 | -0.0073 | 0.4084 | -0.0190 | -0.5072 | -0.1039 | 0.3267 | -0.0001 |
sh_2189_d107 | HPGL0663 | self_heal | s2189 | 3 | #000088 | HPGL0663 | 0.0603 | 0.1511 | 0.0603 | 0.1511 | -0.2620 | -0.2420 | -0.0752 | -0.0512 | -0.1339 | -0.5765 | 0.0861 | 0.2899 | 0.0422 | 0.4665 | 0.3310 | 0.0279 |
As per a conversation with Maria Adelaida on skype, lets remove all samples except those for one patient, then see if some aspect of the data jumps out (strain:strain variation, for example)
## There were 12, now there are 4 samples.
single_patient <- set_expt_batches(single_patient, fact="state")
single_norm <- sm(normalize_expt(single_patient, transform="log2", norm="quant",
convert="cpm", filter=TRUE))
single_norm_pca <- plot_pca(single_norm)
single_norm_pca$plot
## There were 12, now there are 4 samples.
single_patient <- set_expt_batches(single_patient, fact="state")
single_norm <- sm(normalize_expt(single_patient, transform="log2", norm="quant",
convert="cpm", filter=TRUE))
single_norm_pca <- plot_pca(single_norm)
single_norm_pca$plot
## There were 12, now there are 4 samples.
single_patient <- set_expt_batches(single_patient, fact="state")
single_norm <- sm(normalize_expt(single_patient, transform="log2", norm="quant",
convert="cpm", filter=TRUE))
single_norm_pca <- plot_pca(single_norm)
single_norm_pca$plot
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | |
---|---|---|---|---|---|---|---|---|---|---|---|
chr_5430_d110 | HPGL0651 | chr | chronic | 1 | #990000 | HPGL0651 | -0.4139 | 0.3802 | -0.4139 | 0.3802 | -0.6589 |
chr_5397_d110 | HPGL0652 | chr | chronic | 1 | #990000 | HPGL0652 | -0.5581 | -0.4864 | -0.5581 | -0.4864 | 0.4493 |
sh_1022_d110 | HPGL0655 | sh | self_heal | 2 | #000099 | HPGL0655 | 0.6357 | -0.5006 | 0.6357 | -0.5006 | -0.3088 |
sh_2189_d110 | HPGL0656 | sh | self_heal | 2 | #000099 | HPGL0656 | 0.3363 | 0.6068 | 0.3363 | 0.6068 | 0.5183 |
In our previous discussion, Hector suggested that sample ‘HPGL0635’ is sufficiently dis-similar to its cohort samples that it might actually be a member of strain ‘2504’ rather than ‘1022’. Let us look and see what happens if that is changed.
I am going to leave out the uninfected samples to avoid the confusion they generate.
hs_lqcf_noswitch <- sm(normalize_expt(hs_inf, transform="log2", convert="cpm",
norm="quant", filter=TRUE))
plot_pca(hs_lqcf_noswitch)$plot
## This is just to note that the original color for sample 635 was orange to
## match strain '1022'
##switch_one <- set_expt_condition(no_uninfected, ids=c("sHPGL0635"), fact="ch2504")
switch_one <- set_expt_conditions(hs_inf, ids=c("sh_1022_d108"), fact="chr")
switcher <- list("sh_1022_d108" = "pink")
switch_one <- set_expt_colors(expt=switch_one, colors=switcher, change_by="sample")
## The new colors are a list, changing according to sampleID.
lqcf_switch <- sm(normalize_expt(switch_one, transform="log2", convert="cpm",
norm="quant", filter=TRUE, batch="combat_scale"))
##plot_pca(lqcf_switch)$plot
## Note that now it is pink, matching 'ch2504'
I may be biased, but I think this suggests that the samples were not switched.
One query was to see if there is a reversal of two samples.
combined_condition <- paste0(hs_inf$design$state, '_', hs_inf$design$donor)
## with_uninfected_combined <- set_expt_factors(with_uninfected,
## batch="pathogenstrain",
## condition=combined_condition)
hs_inf_combined <- set_expt_factors(hs_inf, batch="donor", condition="state")
head(exprs(normalize_expt(hs_inf, convert="cpm", filter=TRUE)))
## This function will replace the expt$expressionset slot with:
## cpm(hpgl(data))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: hpgl
## Removing 37656 low-count genes (13385 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
## chr_5430_d108 chr_5397_d108 sh_1022_d108 sh_2189_d108
## ENSG00000000419 19.93 19.095 17.839 17.857
## ENSG00000000457 25.97 30.211 23.498 28.006
## ENSG00000000460 13.89 11.027 9.207 8.197
## ENSG00000000938 531.27 522.552 402.052 435.895
## ENSG00000000971 4.43 4.931 4.700 3.415
## ENSG00000001036 38.86 37.203 40.378 44.009
## chr_5430_d110 chr_5397_d110 sh_1022_d110 sh_2189_d110
## ENSG00000000419 16.117 17.215 12.681 15.250
## ENSG00000000457 18.314 20.265 17.558 18.094
## ENSG00000000460 7.265 7.117 4.633 4.459
## ENSG00000000938 618.721 552.993 477.564 452.209
## ENSG00000000971 2.564 1.762 1.544 2.973
## ENSG00000001036 46.030 39.311 35.604 35.929
## chr_5430_d107 chr_5397_d107 sh_1022_d107 sh_2189_d107
## ENSG00000000419 16.318 14.829 14.626 15.874
## ENSG00000000457 27.501 26.048 24.619 27.976
## ENSG00000000460 9.928 8.473 7.449 6.905
## ENSG00000000938 461.352 332.347 391.544 298.729
## ENSG00000000971 4.222 7.846 3.725 3.808
## ENSG00000001036 31.723 23.616 30.524 21.000
combined_pca1 <- sm(normalize_expt(hs_inf_combined, filter=TRUE, batch="pca", convert="cpm"))
combined_pca1 <- set_expt_colors(combined_pca1, colors=c("#880000", "#000088"))
## The new colors are a character, changing according to condition.
plot_pca(sm(normalize_expt(combined_pca1, filter=TRUE, transform="log2",
convert="cpm", norm="quant")))$plot
combined_pca2 <- set_expt_factors(combined_pca1, batch="pathogenstrain", condition="state")
combined_pca2 <- set_expt_colors(combined_pca2, colors=c("#880000", "#000088"))
## The new colors are a character, changing according to condition.
combined_pca3 <- sm(normalize_expt(combined_pca2, filter=TRUE, batch="pca"))
l2cq_combined_pca3 <- sm(normalize_expt(combined_pca3, filter=TRUE, transform="log2",
convert="cpm", norm="quant"))
plot_pca(l2cq_combined_pca3)$plot
donor_strain_varpart <- sm(varpart(expt=hs_inf, predictor=NULL,
factors=c("condition","pathogenstrain","donor")))
donor_strain_varpart$percent_plot
## Going to write the image to: images/varpart_donor_strain.png when dev.off() is called.
## png
## 2
## Going to write the image to: images/varpart_donor_strain_pct.png when dev.off() is called.
## png
## 2
The experimental design does not fully supprt interaction models, but I want to see how it looks.
test_data <- sm(normalize_expt(hs_inf_combined, convert="cpm", norm="quant", filter=TRUE))
query_model_string <- "~ condition:pathogenstrain + donor"
query_design <- hs_inf_combined[["design"]]
query_conditions <- as.factor(query_design[["condition"]])
##query_batches <- as.factor(query_design[["anotherbatch"]])
query_batches <- as.factor(x=c("a","a","a","a","a","a","b","b","b","b","b","b","c","c","c","c","c","c"))
query_strains <- as.factor(query_design[["pathogenstrain"]])
query_donors <- as.factor(query_design[["donor"]])
data_mtrx <- as.data.frame(exprs(test_data))
query_model <- model.matrix(~ 0 + query_conditions + query_donors + query_strains, data=query_design)
combined_voom <- limma::voom(counts=data_mtrx, design=query_model, normalize.method="quantile")
combined_fit <- limma::lmFit(combined_voom, query_model, robust=TRUE)
combined_contrast <- limma::makeContrasts(
chsh=query_conditionschronic-query_conditionsself_heal,
levels=query_model)
combined_cfit <- limma::contrasts.fit(combined_fit, combined_contrast)
combined_bayes <- limma::eBayes(combined_cfit, robust=TRUE)
combined_table <- limma::topTable(combined_bayes, number=nrow(combined_bayes), resort.by="logFC")
hist(combined_table$adj.P.Val)
min(combined_table$adj.P.Val)
test_ma <- plot_ma_de(table=combined_table, expr_col="AveExpr", fc_col="logFC", p_col="adj.P.Val", logfc_cutoff=0.6)
test_ma$plot
head(combined_table)
“Changes during infection hpgl0630-0636 and hpgl0650-hpgl0663”
Start out by creating the expt and poking at it to see how well/badly behaved the data is.
## Reread the sample sheet because I am fiddling with other possible surrogates (like strain)
## In fact, copy it to a separate sheet because these samples are a mess
lp_inf <- subset_expt(parasite_expt, subset="experimentname=='infection'")
## There were 25, now there are 12 samples.
chosen_colors <- c("#990000", "#000099")
names(chosen_colors) <- c("chr","sh")
lp_inf <- set_expt_colors(lp_inf, colors=chosen_colors)
## The new colors are a character, changing according to condition.
The following creates all the metric plots of the raw data.
Now visualize some relevant metrics.
excel_file <- glue::glue("excel/{rundate}_infection_parasite_data-v{ver}.xlsx")
lp_inf_written <- sm(write_expt(
lp_inf,
excel=excel_file,
violin=TRUE))
Now perform the ‘default’ normalization we use in the lab and look again.
In this section, try out some normalizations/batch corrections and see the effect in PCA plots. Start out by taking the parasite data and doing the default normalization and see what there is to see.
lp_l2qcpm <- sm(normalize_expt(lp_inf, transform="log2", convert="cpm",
norm="quant", filter=TRUE))
lp_l2qcpm_pca <- sm(plot_pca(lp_l2qcpm))
lp_l2qcpm_pca$plot
## Though the colors don't show it well, the samples are actually split
## beautifully by strain, but clearly not by chronic/healing
knitr::kable(lp_l2qcpm_pca$table)
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | pc_4 | pc_5 | pc_6 | pc_7 | pc_8 | pc_9 | pc_10 | pc_11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
hpgl0631 | HPGL0631 | chr | d108 | 1 | #990000 | HPGL0631 | -0.4834 | -0.1571 | -0.4834 | -0.1571 | -0.0722 | -0.0559 | 0.0081 | -0.0893 | 0.0068 | -0.1928 | 0.2552 | -0.0563 | -0.7324 |
hpgl0632 | HPGL0632 | chr | d108 | 1 | #990000 | HPGL0632 | 0.2127 | -0.1613 | 0.2127 | -0.1613 | -0.1133 | -0.4873 | 0.2562 | -0.2970 | 0.2275 | 0.5394 | 0.2876 | -0.1083 | 0.0640 |
hpgl0635 | HPGL0635 | sh | d108 | 1 | #000099 | HPGL0635 | 0.0091 | 0.5149 | 0.0091 | 0.5149 | -0.1514 | -0.0644 | -0.0006 | -0.2383 | 0.6117 | -0.3718 | -0.2237 | 0.0021 | 0.0717 |
hpgl0636 | HPGL0636 | sh | d108 | 1 | #000099 | HPGL0636 | 0.2346 | -0.1982 | 0.2346 | -0.1982 | -0.4736 | 0.6504 | 0.4162 | 0.0327 | -0.0028 | 0.0064 | 0.0043 | 0.0251 | -0.0030 |
hpgl0651 | HPGL0651 | chr | d110 | 2 | #990000 | HPGL0651 | -0.4435 | -0.1404 | -0.4435 | -0.1404 | 0.2364 | 0.1450 | 0.0070 | 0.2201 | 0.2264 | 0.4698 | -0.5464 | 0.0213 | 0.0619 |
hpgl0652 | HPGL0652 | chr | d110 | 2 | #990000 | HPGL0652 | 0.2555 | -0.1521 | 0.2555 | -0.1521 | 0.5531 | -0.0630 | 0.3166 | 0.3276 | 0.0498 | -0.3492 | 0.0504 | -0.4274 | 0.0345 |
hpgl0655 | HPGL0655 | sh | d110 | 2 | #000099 | HPGL0655 | 0.0465 | 0.4722 | 0.0465 | 0.4722 | 0.3609 | 0.1887 | 0.0624 | 0.1523 | -0.0920 | 0.1990 | 0.4112 | 0.5292 | -0.0367 |
hpgl0656 | HPGL0656 | sh | d110 | 2 | #000099 | HPGL0656 | 0.2658 | -0.1949 | 0.2658 | -0.1949 | 0.3006 | 0.3171 | -0.5482 | -0.5461 | -0.0754 | 0.0283 | -0.0398 | -0.0980 | -0.0277 |
hpgl0658 | HPGL0658 | chr | d107 | 3 | #990000 | HPGL0658 | -0.4910 | -0.1772 | -0.4910 | -0.1772 | -0.0827 | -0.0363 | -0.0344 | -0.1057 | -0.1744 | -0.2587 | 0.2901 | 0.0352 | 0.6640 |
hpgl0659 | HPGL0659 | chr | d107 | 3 | #990000 | HPGL0659 | 0.1896 | -0.1541 | 0.1896 | -0.1541 | -0.0289 | -0.3762 | 0.1575 | -0.1017 | -0.3967 | -0.2285 | -0.4631 | 0.4996 | -0.0759 |
hpgl0662 | HPGL0662 | sh | d107 | 3 | #000099 | HPGL0662 | -0.0270 | 0.5097 | -0.0270 | 0.5097 | -0.2156 | -0.0618 | -0.0703 | 0.0590 | -0.5411 | 0.1763 | -0.1379 | -0.5042 | -0.0205 |
hpgl0663 | HPGL0663 | sh | d107 | 3 | #000099 | HPGL0663 | 0.2311 | -0.1617 | 0.2311 | -0.1617 | -0.3133 | -0.1563 | -0.5705 | 0.5864 | 0.1600 | -0.0181 | 0.1122 | 0.0819 | 0.0000 |
Now repeat the same thing, but let sva minimize surrogate variables.
lp_l2qcpm_normbatch <- sm(normalize_expt(lp_inf, transform="log2", convert="cpm",
norm="quant", filter=TRUE, batch="sva"))
lp_l2qcpm_normbatch_pca <- plot_pca(lp_l2qcpm_normbatch)
Now plot the result and see if things make more sense.
Adding SVA to the normalization does not help much.
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | pc_4 | pc_5 | pc_6 | pc_7 | pc_8 | pc_9 | pc_10 | pc_11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
hpgl0631 | HPGL0631 | chr | d108 | 1 | #990000 | HPGL0631 | -0.0527 | 0.1296 | -0.0527 | 0.1296 | 0.0128 | -0.0996 | 0.0054 | -0.1964 | -0.2518 | 0.0458 | -0.7325 | 0.3060 | 0.3908 |
hpgl0632 | HPGL0632 | chr | d108 | 1 | #990000 | HPGL0632 | -0.1677 | 0.5102 | -0.1677 | 0.5102 | 0.1896 | -0.2647 | -0.2426 | 0.5221 | -0.3122 | 0.1044 | 0.0761 | -0.2665 | -0.0751 |
hpgl0635 | HPGL0635 | sh | d108 | 1 | #000099 | HPGL0635 | 0.2048 | 0.0813 | 0.2048 | 0.0813 | -0.0165 | -0.2151 | -0.6266 | -0.3626 | 0.1899 | -0.0199 | 0.0840 | 0.3076 | -0.3992 |
hpgl0636 | HPGL0636 | sh | d108 | 1 | #000099 | HPGL0636 | 0.6715 | -0.2705 | 0.6715 | -0.2705 | 0.5034 | -0.0033 | 0.0216 | 0.0125 | 0.0030 | -0.0065 | -0.0155 | -0.3237 | 0.1829 |
hpgl0651 | HPGL0651 | chr | d110 | 2 | #990000 | HPGL0651 | -0.1976 | -0.2089 | -0.1976 | -0.2089 | 0.0251 | 0.2322 | -0.2492 | 0.4708 | 0.5229 | 0.0018 | 0.0682 | 0.2748 | 0.3770 |
hpgl0652 | HPGL0652 | chr | d110 | 2 | #990000 | HPGL0652 | -0.5132 | -0.2049 | -0.5132 | -0.2049 | 0.2930 | 0.3293 | -0.0529 | -0.3731 | -0.0576 | 0.4210 | 0.0395 | -0.2803 | -0.1200 |
hpgl0655 | HPGL0655 | sh | d110 | 2 | #000099 | HPGL0655 | -0.1283 | -0.3735 | -0.1283 | -0.3735 | 0.0729 | 0.1816 | 0.1058 | 0.2083 | -0.4016 | -0.5505 | -0.0375 | 0.2168 | -0.3938 |
hpgl0656 | HPGL0656 | sh | d110 | 2 | #000099 | HPGL0656 | -0.0954 | -0.4679 | -0.0954 | -0.4679 | -0.5002 | -0.5689 | 0.0675 | 0.0442 | 0.0396 | 0.1077 | -0.0311 | -0.3065 | 0.0164 |
hpgl0658 | HPGL0658 | chr | d107 | 3 | #990000 | HPGL0658 | -0.0216 | 0.1083 | -0.0216 | 0.1083 | -0.0269 | -0.1194 | 0.2156 | -0.2629 | -0.2729 | -0.0507 | 0.6593 | 0.2909 | 0.4213 |
hpgl0659 | HPGL0659 | chr | d107 | 3 | #990000 | HPGL0659 | -0.1794 | 0.3524 | -0.1794 | 0.3524 | 0.1061 | -0.1025 | 0.3549 | -0.2117 | 0.5055 | -0.4896 | -0.0802 | -0.2479 | -0.0683 |
hpgl0662 | HPGL0662 | sh | d107 | 3 | #000099 | HPGL0662 | 0.2234 | 0.1274 | 0.2234 | 0.1274 | -0.0662 | 0.0588 | 0.5280 | 0.1886 | 0.1458 | 0.4985 | -0.0260 | 0.3261 | -0.3893 |
hpgl0663 | HPGL0663 | sh | d107 | 3 | #000099 | HPGL0663 | 0.2562 | 0.2165 | 0.2562 | 0.2165 | -0.5932 | 0.5715 | -0.1275 | -0.0397 | -0.1106 | -0.0620 | -0.0042 | -0.2975 | 0.0574 |
No, not really, so lets change things by putting the ‘snp status’ as the “batch” factor and minimize it with sva/combat.
lp_infv2 <- set_expt_conditions(lp_inf, fact="state")
lp_infv2 <- set_expt_batches(lp_infv2, fact="snpclade")
lp_l2qcpm_snpbatch_straincond_sva <- sm(normalize_expt(lp_infv2, norm="quant",
transform="log2",
filter=TRUE,
batch="fsva"))
lp_l2qcpm_snpbatch_straincond_pca <- plot_pca(lp_l2qcpm_snpbatch_straincond_sva)
lp_l2qcpm_snpbatch_straincond_pca$plot
SNP status does not clarify things.
## Pulling strain 5430 away from the others makes a semi-split
knitr::kable(lp_l2qcpm_snpbatch_straincond_pca$table)
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | pc_4 | pc_5 | pc_6 | pc_7 | pc_8 | pc_9 | pc_10 | pc_11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
hpgl0631 | HPGL0631 | chronic | red | 3 | #1B9E77 | HPGL0631 | -0.2233 | -0.1180 | -0.2233 | -0.1180 | -0.1626 | -0.0133 | -0.1030 | 0.1025 | 0.2177 | -0.1736 | 0.2353 | 0.7027 | -0.4225 |
hpgl0632 | HPGL0632 | chronic | yellow | 4 | #1B9E77 | HPGL0632 | 0.2409 | -0.2099 | 0.2409 | -0.2099 | -0.4783 | -0.1831 | -0.1319 | 0.0033 | -0.5194 | -0.3886 | 0.2618 | -0.1343 | 0.1659 |
hpgl0635 | HPGL0635 | self_heal | blue_self | 1 | #7570B3 | HPGL0635 | -0.3690 | 0.2797 | -0.3690 | 0.2797 | -0.1049 | -0.0056 | -0.6731 | -0.3156 | 0.1194 | 0.1592 | -0.0395 | -0.0669 | 0.3049 |
hpgl0636 | HPGL0636 | self_heal | pink | 2 | #7570B3 | HPGL0636 | 0.3640 | 0.6219 | 0.3640 | 0.6219 | 0.2171 | -0.5507 | 0.0326 | 0.0468 | 0.0088 | -0.0331 | -0.0228 | 0.0129 | -0.2046 |
hpgl0651 | HPGL0651 | chronic | red | 3 | #1B9E77 | HPGL0651 | -0.2033 | -0.2060 | -0.2033 | -0.2060 | 0.2704 | -0.0040 | 0.0978 | -0.4131 | -0.5420 | 0.2952 | -0.1442 | -0.0437 | -0.4194 |
hpgl0652 | HPGL0652 | chronic | yellow | 4 | #1B9E77 | HPGL0652 | 0.2441 | -0.4701 | 0.2441 | -0.4701 | 0.2249 | -0.2550 | 0.0779 | -0.1868 | 0.4005 | 0.3029 | 0.4120 | -0.0866 | 0.2239 |
hpgl0655 | HPGL0655 | self_heal | blue_self | 1 | #7570B3 | HPGL0655 | -0.2868 | -0.1165 | -0.2868 | -0.1165 | 0.4408 | -0.0683 | 0.2135 | -0.0201 | 0.0462 | -0.6017 | -0.2838 | 0.0532 | 0.3583 |
hpgl0656 | HPGL0656 | self_heal | pink | 2 | #7570B3 | HPGL0656 | 0.3083 | -0.0153 | 0.3083 | -0.0153 | 0.4184 | 0.5021 | -0.3521 | 0.4751 | -0.1790 | 0.0751 | 0.0716 | 0.0292 | 0.0282 |
hpgl0658 | HPGL0658 | chronic | red | 3 | #1B9E77 | HPGL0658 | -0.2178 | -0.0966 | -0.2178 | -0.0966 | -0.1519 | 0.0273 | -0.0103 | 0.2805 | 0.3243 | -0.1313 | -0.0699 | -0.6577 | -0.4443 |
hpgl0659 | HPGL0659 | chronic | yellow | 4 | #1B9E77 | HPGL0659 | 0.2056 | -0.2091 | 0.2056 | -0.2091 | -0.3343 | -0.1000 | 0.0628 | 0.1968 | 0.0768 | 0.3108 | -0.7093 | 0.1944 | 0.1512 |
hpgl0662 | HPGL0662 | self_heal | blue_self | 1 | #7570B3 | HPGL0662 | -0.3874 | 0.2922 | -0.3874 | 0.2922 | -0.1469 | 0.0817 | 0.4930 | 0.3180 | -0.1611 | 0.3321 | 0.2996 | 0.0054 | 0.2877 |
hpgl0663 | HPGL0663 | self_heal | pink | 2 | #7570B3 | HPGL0663 | 0.3247 | 0.2477 | 0.3247 | 0.2477 | -0.1927 | 0.5689 | 0.2927 | -0.4873 | 0.2079 | -0.1470 | -0.0107 | -0.0086 | -0.0295 |
Ok, so let us remove the healing state with combat and see if that allows us to see a split on some other factor.
lp_inf_strain <- set_expt_conditions(lp_inf, fact="pathogenstrain")
lp_inf_strain <- set_expt_batches(lp_inf_strain, fact="state")
lp_l2qcpm_strain <- sm(normalize_expt(lp_inf_strain, transform="log2", convert="cpm",
norm="quant", filter=TRUE, batch="combat_scale"))
hmm ok, I think I quit for today.
hmm ok, I think I quit for today.
sampleid | condition | batch | batch_int | colors | labels | PC1 | PC2 | pc_1 | pc_2 | pc_3 | pc_4 | pc_5 | pc_6 | pc_7 | pc_8 | pc_9 | pc_10 | pc_11 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
hpgl0631 | HPGL0631 | s5430 | chronic | 1 | #1B9E77 | HPGL0631 | -0.4841 | 0.1044 | -0.4841 | 0.1044 | -0.0995 | 0.0189 | -0.0847 | 0.0967 | 0.2492 | 0.1920 | -0.0028 | 0.7299 | -0.1137 |
hpgl0632 | HPGL0632 | s5397 | chronic | 1 | #D95F02 | HPGL0632 | 0.0172 | -0.3172 | 0.0172 | -0.3172 | -0.3354 | 0.0868 | -0.2829 | -0.5622 | -0.1517 | 0.3593 | -0.0033 | -0.0471 | 0.3811 |
hpgl0635 | HPGL0635 | s1022 | self_heal | 2 | #7570B3 | HPGL0635 | 0.2343 | 0.4341 | 0.2343 | 0.4341 | -0.1534 | 0.0157 | -0.1824 | -0.2487 | 0.5802 | -0.4389 | -0.0304 | -0.0938 | 0.1243 |
hpgl0636 | HPGL0636 | s2189 | self_heal | 2 | #E7298A | HPGL0636 | 0.2099 | -0.2127 | 0.2099 | -0.2127 | -0.2481 | -0.7816 | 0.0074 | 0.0299 | -0.0298 | -0.0193 | 0.0149 | -0.0067 | -0.3903 |
hpgl0651 | HPGL0651 | s5430 | chronic | 1 | #1B9E77 | HPGL0651 | -0.4547 | 0.0895 | -0.4547 | 0.0895 | 0.2866 | -0.0701 | 0.2007 | -0.4370 | -0.4048 | -0.4468 | -0.0750 | -0.0666 | -0.1000 |
hpgl0652 | HPGL0652 | s5397 | chronic | 1 | #D95F02 | HPGL0652 | 0.0498 | -0.3352 | 0.0498 | -0.3352 | 0.4981 | -0.1558 | 0.3307 | 0.0988 | 0.3541 | 0.1199 | -0.3471 | -0.0296 | 0.3864 |
hpgl0655 | HPGL0655 | s1022 | self_heal | 2 | #7570B3 | HPGL0655 | 0.2551 | 0.3907 | 0.2551 | 0.3907 | 0.3821 | -0.0974 | 0.0740 | -0.0266 | -0.1562 | 0.3296 | 0.6245 | 0.0516 | 0.1079 |
hpgl0656 | HPGL0656 | s2189 | self_heal | 2 | #E7298A | HPGL0656 | 0.2370 | -0.2186 | 0.2370 | -0.2186 | 0.4112 | 0.2995 | -0.5931 | 0.0924 | -0.0808 | -0.0534 | -0.1483 | 0.0199 | -0.4023 |
hpgl0658 | HPGL0658 | s5430 | chronic | 1 | #1B9E77 | HPGL0658 | -0.4940 | 0.0937 | -0.4940 | 0.0937 | -0.1019 | 0.0411 | -0.1086 | 0.2997 | 0.1651 | 0.2539 | 0.0853 | -0.6625 | -0.1106 |
hpgl0659 | HPGL0659 | s5397 | chronic | 1 | #D95F02 | HPGL0659 | 0.0009 | -0.2999 | 0.0009 | -0.2999 | -0.2018 | 0.1018 | -0.0402 | 0.4744 | -0.1989 | -0.4800 | 0.3474 | 0.0876 | 0.3882 |
hpgl0662 | HPGL0662 | s1022 | self_heal | 2 | #7570B3 | HPGL0662 | 0.2094 | 0.4507 | 0.2094 | 0.4507 | -0.2060 | 0.0536 | 0.0859 | 0.2788 | -0.4250 | 0.1354 | -0.5677 | 0.0315 | 0.1306 |
hpgl0663 | HPGL0663 | s2189 | self_heal | 2 | #E7298A | HPGL0663 | 0.2191 | -0.1794 | 0.2191 | -0.1794 | -0.2319 | 0.4875 | 0.5932 | -0.0961 | 0.0985 | 0.0483 | 0.1025 | -0.0144 | -0.4015 |