sample_sheet <- glue::glue("sample_sheets/tmrc2_samples_20210610.xlsx")

1 Introduction

This document is intended to provide a general overview of the TMRC2 samples which have thus far been sequenced. In some cases, this includes only those samples starting in 2019; in other instances I am including our previous (2015-2016) samples.

In all cases the processing performed was:

  1. Default trimming was performed.
  2. Hisat2 was used to map the remaining reads against the Leishmania panamensis genome revision 36.
  3. The alignments from hisat2 were used to count reads/gene against the revision 36 annotations with htseq.
  4. These alignments were also passed to the pileup functionality of samtools and the vcf/bcf utilities in order to make a matrix of all observed differences between each sample with respect to the reference.

The analyses in this document use the matrices of counts/gene from #3 and variants/position from #4 in order to provide some images and metrics describing the samples we have sequenced so far.

2 Annotations

Everything which follows depends on the Existing TriTrypDB annotations revision 46, circa 2019. The following block loads a database of these annotations and turns it into a matrix where the rows are genes and columns are all the annotation types provided by TriTrypDB.

The same database was used to create a matrix of orthologous genes between L.panamensis and all of the other species in the TriTrypDB.

tt <- sm(library(EuPathDB))
tt <- sm(library(org.Lpanamensis.MHOMCOL81L13.v46.eg.db))
pan_db <- org.Lpanamensis.MHOMCOL81L13.v46.eg.db
all_fields <- columns(pan_db)

all_lp_annot <- sm(load_orgdb_annotations(
    pan_db,
    keytype = "gid",
    fields = c("annot_gene_entrez_id", "annot_gene_name",
               "annot_strand", "annot_chromosome", "annot_cds_length",
               "annot_gene_product")))$genes

lp_go <- sm(load_orgdb_go(pan_db))
lp_lengths <- all_lp_annot[, c("gid", "annot_cds_length")]
colnames(lp_lengths)  <- c("ID", "length")
all_lp_annot[["annot_gene_product"]] <- tolower(all_lp_annot[["annot_gene_product"]])
orthos <- sm(EuPathDB::extract_eupath_orthologs(db = pan_db))

hisat_annot <- all_lp_annot
## rownames(hisat_annot) <- paste0("exon_", rownames(hisat_annot), ".E1")

3 TODO:

Resequence samples: TMRC20002, TMRC20006, TMRC20004 (maybe TMRC20008 and TMRC20029)

4 Generate Expressionsets and Sample Estimation

The process of sample estimation takes two primary inputs:

  1. The sample sheet, which contains all the metadata we currently have on hand, including filenames for the outputs of #3 and #4 above.
  2. The gene annotations.

An expressionset is a data structure used in R to examine RNASeq data. It is comprised of annotations, metadata, and expression data. In the case of our processing pipeline, the location of the expression data is provided by the filenames in the metadata.

The first lines of the following block create the Expressionset. All of the following lines perform various normalizations and generate plots from it.

4.1 Notes

The following samples are much lower coverage:

  • TMRC20002
  • TMRC20006
  • TMRC20007
  • TMRC20008

20210610: I made some manual changes to the sample sheet which I downloaded, filling in some zymodeme with ‘unknown’

4.2 TODO:

  1. Do the multi-gene family removal right here instead of way down at the bottom
  2. Add zymodeme snps to the annotation later.
  3. Start phylogenetic analysis of variant table.
sanitize_columns <- c("passagenumber", "clinicalresponse", "clinicalcategorical",
                      "zymodemecategorical", "phenotypiccharacteristics")
lp_expt <- sm(create_expt(sample_sheet,
                          gene_info = hisat_annot,
                          id_column = "hpglidentifier",
                          file_column = "lpanamensisv36hisatfile")) %>%
  set_expt_conditions(fact = "zymodemecategorical") %>%
  subset_expt(nonzero = 8600) %>%
  semantic_expt_filter(semantic = c("amastin", "gp63", "leishmanolysin"),
                       semantic_column = "annot_gene_product") %>%
  sanitize_expt_metadata(columns = sanitize_columns) %>%
  set_expt_factors(columns = sanitize_columns, class = "factor")
## The samples (and read coverage) removed when filtering 8600 non-zero genes are:
## TMRC20002 TMRC20004 TMRC20006 TMRC20029 TMRC20008 
##  11681227    564812   6670348   1658096   6249790
## subset_expt(): There were 68, now there are 63 samples.
## semantic_expt_filter(): Removed 68 genes.
libsizes <- plot_libsize(lp_expt)
pp(file = "images/lp_expt_libsizes.png", image = libsizes$plot, width = 12, height = 9)

## I think samples 7,10 should be removed at minimum, probably also 9,11
nonzero <- plot_nonzero(lp_expt)
nonzero$plot
## Warning: ggrepel: 37 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

lp_box <- plot_boxplot(lp_expt)
## 4524 entries are 0.  We are on a log scale, adding 1 to the data.
pp(file = "images/lp_expt_boxplot.png", image = lp_box, width = 12, height = 9)

filter_plot <- plot_libsize_prepost(lp_expt)
filter_plot$lowgene_plot
## Warning: Using alpha for a discrete variable is not advised.

filter_plot$count_plot

4.3 Distribution Visualization

Najib’s favorite plots are of course the PCA/TNSE. These are nice to look at in order to get a sense of the relationships between samples. They also provide a good opportunity to see what happens when one applies different normalizations, surrogate analyses, filters, etc. In addition, one may set different experimental factors as the primary ‘condition’ (usually the color of plots) and surrogate ‘batches’.

4.4 By Susceptilibity

Column ‘Q’ in the sample sheet, make a categorical version of it with these parameters:

  • 0 <= x <= 35 is resistant
  • 36 <= x <= 48 is ambiguous
  • 49 <= x is sensitive
starting <- as.numeric(pData(lp_expt)[["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]])
sus_categorical <- starting
na_idx <- is.na(starting)
sus_categorical[na_idx] <- "unknown"

resist_idx <- starting <= 0.35
sus_categorical[resist_idx] <- "resistant"
indeterminant_idx <- starting >= 0.36 & starting <= 0.48
sus_categorical[indeterminant_idx] <- "ambiguous"
susceptible_idx <- starting >= 0.49
sus_categorical[susceptible_idx] <- "sensitive"

pData(lp_expt$expressionset)[["sus_category"]] <- sus_categorical
clinical_samples <- lp_expt %>%
  set_expt_batches(fact = sus_categorical)

clinical_norm <- sm(normalize_expt(clinical_samples, norm = "quant", transform = "log2",
                                   convert = "cpm", batch = FALSE, filter = TRUE))
zymo_pca <- plot_pca(clinical_norm, plot_title = "PCA of parasite expression values")
pp(file = "images/zymo_pca_sus_shape.png", image = zymo_pca$plot)

zymo_3dpca <- plot_3d_pca(zymo_pca)
zymo_3dpca$plot
clinical_n <- sm(normalize_expt(clinical_samples, transform = "log2",
                                convert = "cpm", batch = FALSE, filter = TRUE))
zymo_tsne <- plot_tsne(clinical_n, plot_title = "TSNE of parasite expression values")
zymo_tsne$plot

clinical_nb <- normalize_expt(clinical_samples, convert = "cpm", transform = "log2",
                         filter = TRUE, batch = "svaseq")
## Removing 144 low-count genes (8566 remaining).
## batch_counts: Before batch/surrogate estimation, 827 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 2903 entries are 0<x<1: 1%.
## Setting 277 low elements to zero.
## transform_counts: Found 277 values equal to 0, adding 1 to the matrix.
clinical_nb_pca <- plot_pca(clinical_nb, plot_title = "PCA of parasite expression values")
pp(file = "images/clinical_nb_pca_sus_shape.png", image = clinical_nb_pca$plot)

clinical_nb_tsne <- plot_tsne(clinical_nb, plot_title = "TSNE of parasite expression values")
clinical_nb_tsne$plot

corheat <- plot_corheat(clinical_norm, plot_title = "Correlation heatmap of parasite
                 expression values
")
corheat$plot

plot_sm(clinical_norm)$plot
## Performing correlation.

4.5 By Cure/Fail status

cf_expt <- set_expt_conditions(lp_expt, fact = "clinicalcategorical") %>%
  set_expt_batches(fact = sus_categorical)

cf_norm <- normalize_expt(cf_expt, convert = "cpm", transform = "log2",
                          norm = "quant", filter = TRUE)
## Removing 144 low-count genes (8566 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
start_cf <- plot_pca(cf_norm, plot_title = "PCA of parasite expression values")
pp(file = "images/cf_sus_shape.png", image = start_cf$plot)

cf_nb <- normalize_expt(cf_expt, convert = "cpm", transform = "log2",
                        norm = "quant", filter = TRUE, batch = "svaseq")
## Warning in normalize_expt(cf_expt, convert = "cpm", transform = "log2", :
## Quantile normalization and sva do not always play well together.
## Removing 144 low-count genes (8566 remaining).
## batch_counts: Before batch/surrogate estimation, 2 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 3534 entries are 0<x<1: 1%.
## Setting 134 low elements to zero.
## transform_counts: Found 134 values equal to 0, adding 1 to the matrix.
cf_nb_pca <- plot_pca(cf_nb, plot_title = "PCA of parasite expression values")
pp(file = "images/cf_sus_share_nb.png", image = cf_nb_pca$plot)

cf_norm <- normalize_expt(cf_expt, transform = "log2", convert = "cpm",
                          filter = TRUE, norm = "quant")
## Removing 144 low-count genes (8566 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
test <- pca_information(cf_norm,
                        expt_factors = c("clinicalcategorical", "zymodemecategorical",
                                         "pathogenstrain", "passagenumber"),
                        num_components = 6, plot_pcas = TRUE)
test$anova_p
##                           PC1       PC2       PC3    PC4     PC5     PC6
## clinicalcategorical 7.056e-02 4.047e-01 2.052e-02 0.1541 0.48989 0.46854
## zymodemecategorical 8.474e-06 6.051e-01 2.480e-02 0.8261 0.31641 0.31532
## pathogenstrain      4.381e-01 4.212e-01 1.353e-05 0.6289 0.02871 0.70783
## passagenumber       5.646e-01 4.494e-05 1.468e-01 0.1060 0.16486 0.08373
test$cor_heatmap

sus_expt <- set_expt_conditions(lp_expt, fact = "sus_category") %>%
  set_expt_batches(fact = "zymodemecategorical")

sus_norm <- normalize_expt(sus_expt, transform = "log2", convert = "cpm",
                           norm = "quant", filter = TRUE)
## Removing 144 low-count genes (8566 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
sus_pca <- plot_pca(sus_norm, plot_title = "PCA of parasite expression values")
sus_pca$plot

sus_nb <- normalize_expt(sus_expt, transform = "log2", convert = "cpm",
                         batch = "svaseq", filter = TRUE)
## Removing 144 low-count genes (8566 remaining).
## batch_counts: Before batch/surrogate estimation, 827 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 2903 entries are 0<x<1: 1%.
## Setting 166 low elements to zero.
## transform_counts: Found 166 values equal to 0, adding 1 to the matrix.
sus_nb_pca <- plot_pca(sus_nb, plot_title = "PCA of parasite expression values")
pp(file = "images/sus_nb_pca.png", image = sus_nb_pca$plot)
## Warning: ggrepel: 16 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 20 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

At this time, we do not have very many samples, so the set of metrics/plots is fairly limited. There is really only one factor in the metadata which we can use for performing differential expression analyses, the ‘zymodeme’.

5 Zymodeme analyses

The following sections perform a series of analyses which seek to elucidate differences between the zymodemes 2.2 and 2.3 either through differential expression or variant profiles.

5.1 Differential expression

5.1.1 With respect to zymodeme attribution

TODO: Do this with and without sva and compare the results.

zy_expt <- subset_expt(lp_expt, subset = "condition=='z2.2'|condition=='z2.3'")
## subset_expt(): There were 63, now there are 29 samples.
zy_norm <- normalize_expt(zy_expt, filter = TRUE, convert = "cpm", norm = "quant")
## Removing 166 low-count genes (8544 remaining).
zy_de_nobatch <- sm(all_pairwise(zy_expt, filter = TRUE, model_batch = "svaseq"))
zy_de <- sm(all_pairwise(zy_expt, filter = TRUE, model_batch = "svaseq"))
zy_table <- sm(combine_de_tables(zy_de, excel = glue::glue("excel/zy_tables-v{ver}.xlsx")))
zy_sig <- sm(extract_significant_genes(zy_table, excel = glue::glue("excel/zy_sig-v{ver}.xlsx")))

5.1.2 Images of zymodeme DE

zy_table[["plots"]][["z23_vs_z22"]][["deseq_ma_plots"]][["plot"]]

5.2 With respect to cure/failure

In contrast, we can search for genes which are differentially expressed with respect to cure/failure status.

cf_de <- sm(all_pairwise(cf_expt, filter = TRUE, model_batch = "svaseq"))
cf_table <- sm(combine_de_tables(cf_de, excel = glue::glue("excel/cf_tables-v{ver}.xlsx")))
cf_sig <- sm(extract_significant_genes(cf_table, excel = glue::glue("excel/cf_sig-v{ver}.xlsx")))

5.3 With respect to susceptibility

Finally, we can use our category of susceptibility and look for genes which change from sensitive to resistant. Keep in mind, though, that for the moment we have a lot of ambiguous and unknown strains.

sus_de <- sm(all_pairwise(sus_expt, filter = TRUE, model_batch = "svaseq"))
sus_table <- sm(combine_de_tables(sus_de, excel = glue::glue("excel/sus_tables-v{ver}.xlsx")))
sus_sig <- sm(extract_significant_genes(sus_table, excel = glue::glue("excel/sus_sig-v{ver}.xlsx")))
knitr::kable(head(sus_sig$deseq$ups$sensitive_vs_resistant, n = 20))
gid annotgeneproduct annotgenetype chromosome start end strand annotgeneentrezid annotgenename annotstrand annotchromosome annotcdslength length deseq_logfc deseq_adjp edger_logfc edger_adjp limma_logfc limma_adjp basic_nummed basic_denmed basic_numvar basic_denvar basic_logfc basic_t basic_p basic_adjp deseq_basemean deseq_lfcse deseq_stat deseq_p ebseq_fc ebseq_logfc ebseq_c1mean ebseq_c2mean ebseq_mean ebseq_var ebseq_postfc ebseq_ppee ebseq_ppde ebseq_adjp edger_logcpm edger_lr edger_p limma_ave limma_t limma_b limma_p limma_adjp_ihw deseq_adjp_ihw edger_adjp_ihw ebseq_adjp_ihw basic_adjp_ihw lfc_meta lfc_var lfc_varbymed p_meta p_var
LPAL13_000044900 LPAL13_000044900 actin-related protein 2, putative protein coding LPAL13_SCAF000645 507 1685 - reverse Not Assigned 1179.0 1178 28.010 0.0000 13.380 0.0000 8.2610 0.3530 3.6410 -4.2250 18.173 0.1055 7.865 8.404 0.0000 0.0000 870.40 1.3260 21.120 0e+00 120487.21 16.878 0.0000 1204.862 816.197 5.866e+05 296.832 1.0000 0e+00 1.0000 4.9370 48.24 0e+00 1.1310 1.3320 -4.511 0.1877 4.045e-01 4.406e-95 6.316e-09 0.000e+00 6.311e-06 13.800 8.923e+00 6.466e-01 6.257e-02 1.174e-02
LPAL13_000035800 LPAL13_000035800 hypothetical protein protein coding LPAL13_SCAF000500 737 1006 - reverse Not Assigned 270.0 269 14.540 0.0000 13.800 0.0000 9.8980 0.3753 4.7850 -3.9610 16.123 0.4540 8.746 9.699 0.0000 0.0000 2548.00 1.2930 11.250 0e+00 21845.10 14.415 0.1641 3803.389 2576.542 1.302e+07 946.975 0.0000 0e+00 0.0000 6.4810 73.28 0e+00 1.9270 1.2800 -4.547 0.2053 5.017e-01 1.134e-25 6.169e-14 0.000e+00 1.067e-06 15.610 7.949e+00 5.093e-01 6.843e-02 1.405e-02
LPAL13_320026300 LPAL13_320026300 hypothetical protein, conserved protein coding LpaL13_32 754268 755485 - reverse 32 1218.0 1217 13.980 0.0000 13.200 0.0000 9.1380 0.4066 4.4360 -4.0340 21.011 0.7598 8.469 8.163 0.0000 0.0000 1558.00 1.2680 11.020 0e+00 14426.16 13.816 0.1534 2357.791 1597.263 2.438e+06 561.074 0.0000 0e+00 0.0000 5.7730 63.40 0e+00 1.7870 1.2060 -4.600 0.2325 4.633e-01 8.686e-25 4.207e-12 0.000e+00 5.982e-06 11.990 9.684e-02 8.075e-03 7.750e-02 1.802e-02
LPAL13_000053200 LPAL13_000053200 hypothetical protein protein coding LPAL13_SCAF000804 5037 5249 - reverse Not Assigned 213.0 212 8.660 0.0000 10.060 0.0000 5.2360 0.0907 0.6563 -4.2250 10.158 0.1055 4.881 6.943 0.0000 0.0000 73.21 1.2520 6.916 0e+00 11962.59 13.546 0.0000 119.616 81.030 7.350e+03 30.186 1.0000 0e+00 1.0000 1.4230 39.59 0e+00 -1.1420 2.2900 -3.625 0.0255 1.234e-01 3.513e-09 1.142e-07 0.000e+00 3.370e-05 7.953 1.106e-01 1.391e-02 8.497e-03 2.166e-04
LPAL13_000051300 LPAL13_000051300 hypothetical protein, conserved protein coding LPAL13_SCAF000772 11 2344 + forward Not Assigned 2334.0 2333 8.421 0.0000 9.051 0.0000 2.9220 0.4366 -0.0057 -4.0020 11.186 0.6120 3.996 5.186 0.0000 0.0006 140.30 1.4020 6.004 0e+00 1405.97 10.457 0.1125 172.174 116.671 7.380e+04 47.638 0.0000 0e+00 0.0000 2.3610 26.08 0e+00 -1.2640 1.1420 -4.640 0.2578 4.942e-01 4.447e-07 2.656e-05 0.000e+00 5.676e-04 6.422 3.951e+00 6.152e-01 8.593e-02 2.215e-02
LPAL13_300029400 LPAL13_300029400 hypothetical protein, conserved protein coding LpaL13_30 853953 854150 - reverse 30 198.0 197 6.350 0.0000 6.260 0.0000 4.9050 0.0096 1.6480 -2.4720 1.706 1.8110 4.120 8.045 0.0000 0.0000 91.71 0.8385 7.573 0e+00 70.71 6.144 1.8871 134.139 91.477 1.183e+04 23.273 0.0000 0e+00 0.0000 1.6890 41.62 0e+00 -0.0924 3.4720 -1.678 0.0010 1.085e-02 3.899e-11 8.495e-08 0.000e+00 2.466e-05 5.921 3.575e-01 6.038e-02 3.172e-04 3.018e-07
LPAL13_000017600 LPAL13_000017600 hypothetical protein, conserved protein coding LPAL13_SCAF000146 359 586 + forward Not Assigned 228.0 227 6.343 0.0000 6.329 0.0000 5.7160 0.1190 4.2350 -0.9343 4.891 2.3477 5.170 7.560 0.0000 0.0000 615.40 0.7654 8.287 0e+00 67.17 6.070 14.2982 961.055 655.649 4.213e+05 53.926 0.0000 1e+00 0.0000 4.4350 43.58 0e+00 2.2210 2.1230 -3.643 0.0378 1.639e-01 4.969e-13 4.327e-08 1.000e+00 9.841e-06 6.530 2.115e+00 3.239e-01 1.261e-02 4.768e-04
LPAL13_000040700 LPAL13_000040700 hypothetical protein, conserved protein coding LPAL13_SCAF000598 54 1067 + forward Not Assigned 1014.0 1013 6.086 0.0002 7.545 0.0001 2.2580 0.2699 -1.5860 -4.2250 7.064 0.1055 2.639 4.480 0.0002 0.0024 21.07 1.3450 4.526 0e+00 2462.97 11.266 0.0000 24.620 16.678 4.281e+02 6.722 1.0000 0e+00 1.0000 -0.1668 22.03 0e+00 -2.5800 1.5540 -4.342 0.1254 3.637e-01 1.656e-04 1.006e-04 0.000e+00 2.020e-03 5.070 2.889e+00 5.697e-01 4.180e-02 5.241e-03
LPAL13_080010600 LPAL13_080010600 hypothetical protein, conserved protein coding LpaL13_08 195555 195749 - reverse 8 195.0 194 5.761 0.0002 7.165 0.0001 1.9980 0.1654 -2.1240 -4.2250 5.295 0.1055 2.101 4.099 0.0005 0.0045 10.31 1.2670 4.549 0e+00 1874.51 10.872 0.0000 18.735 12.691 5.733e+02 5.646 1.0000 0e+00 1.0000 -1.0520 21.83 0e+00 -3.3450 1.9120 -4.054 0.0606 1.910e-01 2.170e-04 1.080e-04 0.000e+00 3.759e-03 4.548 3.206e+00 7.050e-01 2.020e-02 1.224e-03
LPAL13_000011700 LPAL13_000011700 hypothetical protein protein coding LPAL13_SCAF000076 101 364 - reverse Not Assigned 264.0 263 5.750 0.0012 7.244 0.0003 2.5290 0.1359 -1.6090 -4.2250 7.713 0.1055 2.615 4.255 0.0003 0.0036 14.55 1.4560 3.949 1e-04 2510.78 11.294 0.0000 25.098 17.002 5.103e+02 6.951 1.0000 0e+00 1.0000 -0.6106 19.16 0e+00 -3.0780 2.0420 -3.913 0.0454 1.568e-01 1.191e-03 4.100e-04 0.000e+00 3.650e-03 4.970 1.691e+00 3.403e-01 1.517e-02 6.860e-04
LPAL13_040019400 LPAL13_040019400 hypothetical protein protein coding LpaL13_04 440768 441127 - reverse 4 360.0 359 5.537 0.0000 5.376 0.0000 3.6830 0.0383 -0.3080 -3.4550 1.856 1.2917 3.147 6.746 0.0000 0.0001 38.43 1.0170 5.443 0e+00 48.48 5.599 0.8221 40.331 27.586 2.402e+03 8.993 0.0000 0e+00 0.0000 0.4513 25.47 0e+00 -1.6730 2.7650 -3.064 0.0075 4.461e-02 4.997e-06 2.600e-05 0.000e+00 4.499e-05 4.932 1.457e-01 2.954e-02 2.501e-03 1.875e-05
LPAL13_350011800 LPAL13_350011800 hypothetical protein, conserved protein coding LpaL13_35 171009 171242 + forward 35 234.0 233 5.073 0.0000 5.050 0.0000 4.2060 0.0165 2.7180 -0.8548 2.746 0.1697 3.573 9.297 0.0000 0.0000 176.30 0.6435 7.884 0e+00 32.10 5.004 9.6863 311.202 213.939 7.486e+04 23.658 0.0000 1e+00 0.0000 2.6270 44.16 0e+00 1.0650 3.2010 -1.704 0.0022 2.243e-02 4.770e-12 3.061e-08 9.654e-01 1.067e-06 4.875 6.786e-01 1.392e-01 7.233e-04 1.570e-06
LPAL13_170014500 LPAL13_170014500 hypothetical protein, conserved protein coding LpaL13_17 361708 362040 + forward 17 333.0 332 4.837 0.0003 4.724 0.0027 2.1550 0.1443 -1.0540 -3.1350 7.247 1.7316 2.081 2.890 0.0072 0.0313 19.12 1.1180 4.326 0e+00 32.96 5.043 1.2101 40.206 27.627 1.665e+03 8.351 0.0000 0e+00 0.0000 -0.4863 13.64 2e-04 -2.6780 2.0050 -3.918 0.0494 1.662e-01 4.988e-04 2.706e-03 0.000e+00 3.142e-02 3.710 7.737e-01 2.086e-01 1.655e-02 8.096e-04
LPAL13_080010800 LPAL13_080010800 hypothetical protein protein coding LpaL13_08 199409 199792 - reverse 8 384.0 383 4.734 0.0014 6.247 0.0004 1.4160 0.3958 -2.3890 -4.2250 4.791 0.1055 1.836 3.757 0.0011 0.0084 11.62 1.2100 3.911 1e-04 1177.41 10.201 0.0000 11.764 7.969 1.362e+02 3.580 1.0000 0e+00 1.0000 -0.8327 18.69 0e+00 -3.1550 1.2300 -4.565 0.2233 4.546e-01 1.351e-03 4.919e-04 0.000e+00 9.317e-03 3.666 2.812e+00 7.671e-01 7.447e-02 1.661e-02
LPAL13_200050100 LPAL13_200050100 hypothetical protein protein coding LpaL13_20.1 1627529 1627717 + forward 20.1 189.0 188 4.612 0.0000 4.582 0.0000 4.7730 0.0082 2.4290 -2.0220 1.076 2.6699 4.452 7.891 0.0000 0.0001 121.70 0.6825 6.758 0e+00 25.40 4.667 8.1538 207.323 143.075 2.629e+04 17.527 0.0000 0e+00 0.0000 2.1250 33.62 0e+00 0.7007 3.5480 -1.353 0.0007 1.100e-02 1.324e-08 1.116e-06 0.000e+00 1.433e-04 4.649 1.629e+00 3.504e-01 2.497e-04 1.870e-07
LPAL13_000011800 LPAL13_000011800 hypothetical protein, conserved protein coding LPAL13_SCAF000076 446 640 - reverse Not Assigned 195.0 194 4.323 0.0034 4.677 0.0051 0.6508 0.6867 -2.5750 -3.9900 4.063 0.6650 1.416 2.776 0.0096 0.0385 12.47 1.1990 3.604 3e-04 51.35 5.682 0.1807 9.781 6.684 8.693e+01 2.840 0.9994 6e-04 0.9994 -0.8096 12.06 5e-04 -3.1110 0.6322 -4.846 0.5296 7.726e-01 3.336e-03 6.549e-03 1.369e-02 3.854e-02 2.849 3.412e+00 1.198e+00 1.768e-01 9.335e-02
LPAL13_000035500 LPAL13_000035500 hypothetical protein, conserved protein coding LPAL13_SCAF000492 7045 7410 + forward Not Assigned 366.0 365 4.245 0.0000 4.234 0.0000 3.8840 0.0982 4.4620 0.7273 2.742 0.4752 3.735 8.851 0.0000 0.0000 522.10 0.6605 6.427 0e+00 21.24 4.409 45.2429 961.289 665.790 4.244e+05 20.247 0.0000 1e+00 0.0000 4.2070 31.17 0e+00 2.6740 2.2390 -3.501 0.0288 1.141e-01 4.845e-08 3.871e-06 9.654e-01 6.768e-07 4.227 9.038e-01 2.138e-01 9.597e-03 2.763e-04
LPAL13_000014000 LPAL13_000014000 hypothetical protein protein coding LPAL13_SCAF000119 655 942 + forward Not Assigned 288.0 287 4.081 0.0000 4.066 0.0000 3.4080 0.0504 2.3010 -0.9661 1.686 1.4648 3.268 6.862 0.0000 0.0001 129.20 0.5882 6.937 0e+00 16.85 4.074 11.5358 194.496 135.477 1.427e+04 12.824 0.0000 1e+00 0.0000 2.2120 37.32 0e+00 1.0250 2.6200 -2.833 0.0111 5.709e-02 3.426e-09 2.680e-07 9.374e-01 8.178e-05 4.060 7.553e-01 1.860e-01 3.683e-03 4.070e-05
LPAL13_000026500 LPAL13_000026500 hypothetical protein protein coding LPAL13_SCAF000301 144 494 - reverse Not Assigned 351.0 350 3.991 0.0002 3.931 0.0008 1.8810 0.3831 -0.0450 -2.3340 6.192 2.0999 2.289 3.222 0.0033 0.0179 47.30 0.8778 4.546 0e+00 18.72 4.227 3.0706 57.668 40.056 1.815e+03 8.614 0.0000 0e+00 0.0000 0.8996 16.58 0e+00 -0.9806 1.2610 -4.595 0.2121 5.126e-01 1.558e-04 8.214e-04 0.000e+00 1.797e-02 3.084 3.055e-01 9.906e-02 7.072e-02 1.499e-02
LPAL13_220019500 LPAL13_220019500 hypothetical protein protein coding LpaL13_22 578260 578538 + forward 22 279.0 278 3.728 0.0000 3.718 0.0000 3.0580 0.1006 3.4430 0.4137 2.469 0.5415 3.029 7.310 0.0000 0.0000 293.40 0.5593 6.666 0e+00 14.01 3.808 32.1428 450.373 315.460 8.620e+04 12.863 0.0000 1e+00 0.0000 3.3810 35.24 0e+00 2.2920 2.2240 -3.505 0.0298 1.381e-01 1.324e-08 8.333e-07 9.374e-01 5.624e-06 3.660 5.323e-01 1.454e-01 9.943e-03 2.966e-04
knitr::kable(head(sus_sig$deseq$downs$sensitive_vs_resistant, n = 20))
gid annotgeneproduct annotgenetype chromosome start end strand annotgeneentrezid annotgenename annotstrand annotchromosome annotcdslength length deseq_logfc deseq_adjp edger_logfc edger_adjp limma_logfc limma_adjp basic_nummed basic_denmed basic_numvar basic_denvar basic_logfc basic_t basic_p basic_adjp deseq_basemean deseq_lfcse deseq_stat deseq_p ebseq_fc ebseq_logfc ebseq_c1mean ebseq_c2mean ebseq_mean ebseq_var ebseq_postfc ebseq_ppee ebseq_ppde ebseq_adjp edger_logcpm edger_lr edger_p limma_ave limma_t limma_b limma_p limma_adjp_ihw deseq_adjp_ihw edger_adjp_ihw ebseq_adjp_ihw basic_adjp_ihw lfc_meta lfc_var lfc_varbymed p_meta p_var
LPAL13_000033300 LPAL13_000033300 hypothetical protein, conserved protein coding LPAL13_SCAF000463 551 811 + forward Not Assigned 261.0 260 -4.504 0.0070 -4.456 0.0091 -5.418 0.0008 -3.3890 3.4650 11.9880 0.0692 -6.854 -9.018 0e+00 0.0000 145.00 1.3510 -3.334 0.0009 0.1473 -2.763 334.10 49.211 141.11 2.626e+04 0.1572 0.0000 0.0000 0.0000 2.3450 10.580 0.0011 -0.8790 -4.596 1.8400 0.0000 8.912e-04 7.024e-03 9.134e-03 0.000e+00 2.773e-06 -4.793 0.000e+00 0.000e+00 6.730e-04 3.382e-07
LPAL13_350063000 LPAL13_350063000 hypothetical protein protein coding LpaL13_35 1964328 1964543 - reverse 35 216.0 215 -2.512 0.0000 -2.508 0.0000 -3.193 0.0000 -2.1690 1.1700 1.8625 0.2316 -3.339 -9.984 0e+00 0.0000 22.70 0.5013 -5.011 0.0000 0.1525 -2.713 57.00 8.684 24.27 6.867e+02 0.1773 0.0000 1.0000 0.0000 -0.3296 24.650 0.0000 -1.4050 -5.953 4.9380 0.0000 5.034e-05 2.800e-05 3.518e-05 9.654e-01 3.167e-07 -2.765 4.934e-03 -1.784e-03 4.554e-07 8.173e-14
LPAL13_000038400 LPAL13_000038400 expression-site associated gene (esag3), putative protein coding LPAL13_SCAF000573 101 1360 + forward Not Assigned 1260.0 1259 -2.423 0.0004 -2.436 0.0003 -3.096 0.0009 4.8300 8.2430 3.4691 0.0311 -3.413 -8.320 0e+00 0.0000 4056.00 0.5625 -4.308 0.0000 0.1984 -2.333 9497.20 1884.665 4340.32 1.809e+07 0.2036 0.0041 0.9959 0.0041 7.1510 19.200 0.0000 5.9840 -4.556 2.2780 0.0000 8.078e-04 3.599e-04 2.776e-04 1.000e+00 5.556e-06 -2.668 7.259e-04 -2.720e-04 1.785e-05 4.702e-11
LPAL13_140019300 LPAL13_140019300 bt1 family, putative protein coding LpaL13_14 530784 531350 + forward 14 567.0 566 -2.404 0.0000 -2.412 0.0000 -2.304 0.0001 4.7300 7.1060 0.5266 1.2139 -2.377 -6.210 0e+00 0.0007 2043.00 0.4233 -5.680 0.0000 0.1739 -2.524 5153.16 896.019 2269.29 6.616e+06 0.1790 0.0000 1.0000 0.0000 6.1610 36.940 0.0000 5.4500 -5.675 6.2410 0.0000 6.916e-05 2.037e-06 2.981e-07 9.721e-01 6.951e-04 -2.426 1.344e-01 -5.541e-02 1.382e-07 5.141e-14
LPAL13_310039200 LPAL13_310039200 hypothetical protein protein coding LpaL13_31 1301745 1301972 - reverse 31 228.0 227 -2.365 0.0000 -2.374 0.0000 -2.221 0.0009 1.2770 3.6780 1.5657 0.1132 -2.401 -8.193 0e+00 0.0000 206.20 0.4475 -5.286 0.0000 0.2907 -1.782 388.76 113.020 201.97 3.130e+04 0.3039 0.6977 0.3023 0.6977 2.8640 30.930 0.0000 2.0860 -4.529 2.3810 0.0000 1.047e-03 1.525e-05 3.270e-06 3.606e-01 2.773e-06 -2.358 1.602e-01 -6.795e-02 9.304e-06 2.555e-10
LPAL13_000012000 LPAL13_000012000 hypothetical protein protein coding LPAL13_SCAF000080 710 1159 - reverse Not Assigned 450.0 449 -2.230 0.0045 -2.239 0.0024 -2.507 0.0229 0.5477 3.9890 7.6848 0.1885 -3.441 -5.548 0e+00 0.0004 233.40 0.6365 -3.504 0.0005 0.2564 -1.964 497.60 127.578 246.94 5.850e+04 0.2678 0.2261 0.7739 0.2261 3.0330 13.970 0.0002 1.6770 -3.036 -1.8890 0.0035 2.624e-02 4.497e-03 3.182e-03 7.867e-01 3.726e-04 -2.325 4.070e-02 -1.750e-02 1.385e-03 3.406e-06
LPAL13_310031000 LPAL13_310031000 hypothetical protein, conserved protein coding LpaL13_31 1075172 1075459 - reverse 31 288.0 287 -2.130 0.0001 -2.318 0.0000 -2.622 0.0005 -1.6880 0.9482 3.1681 0.5642 -2.637 -5.791 0e+00 0.0001 28.86 0.4397 -4.843 0.0000 0.3251 -1.621 57.85 18.798 31.40 1.200e+03 0.3516 0.6385 0.3615 0.6385 0.0980 27.610 0.0000 -1.0560 -4.824 2.2340 0.0000 6.313e-04 5.455e-05 1.116e-05 3.844e-01 1.199e-04 -2.347 2.712e-02 -1.155e-02 3.690e-06 2.689e-11
LPAL13_140019100 LPAL13_140019100 bt1 family, putative protein coding LpaL13_14 525164 525514 + forward 14 351.0 350 -1.931 0.0000 -1.938 0.0000 -1.988 0.0000 3.8960 6.0140 0.4121 0.6077 -2.118 -7.470 0e+00 0.0001 942.10 0.3337 -5.788 0.0000 0.2279 -2.133 2123.48 484.016 1012.87 8.904e+05 0.2321 0.0000 1.0000 0.0000 5.0450 40.060 0.0000 4.6010 -6.309 8.6430 0.0000 2.849e-05 1.111e-06 1.031e-07 9.669e-01 8.680e-05 -1.980 1.012e-01 -5.111e-02 1.384e-08 3.207e-16
LPAL13_000012100 LPAL13_000012100 hypothetical protein protein coding LPAL13_SCAF000080 1637 1894 - reverse Not Assigned 258.0 257 -1.915 0.0334 -1.922 0.0324 -2.822 0.0053 -1.9250 1.1810 6.1476 0.7484 -3.106 -5.123 0e+00 0.0005 34.08 0.7101 -2.697 0.0070 0.3103 -1.688 70.75 21.945 37.69 2.259e+03 0.3428 0.1419 0.8581 0.1419 0.2875 7.434 0.0064 -1.2630 -3.757 -0.4155 0.0004 5.953e-03 3.345e-02 3.191e-02 8.560e-01 5.444e-04 -2.232 2.045e-02 -9.161e-03 4.594e-03 1.338e-05
LPAL13_340039600 LPAL13_340039600 hypothetical protein protein coding LpaL13_34 1247554 1247757 - reverse 34 204.0 203 -1.888 0.0022 -1.898 0.0012 -2.235 0.0084 1.4450 4.2360 3.3827 0.0499 -2.791 -6.848 0e+00 0.0000 245.70 0.5037 -3.749 0.0002 0.2426 -2.043 569.47 138.142 277.28 5.427e+04 0.2480 0.0000 1.0000 0.0000 3.0980 15.690 0.0001 2.1550 -3.532 -0.6309 0.0008 9.586e-03 2.198e-03 1.165e-03 9.654e-01 4.562e-05 -2.034 1.216e-02 -5.979e-03 3.468e-04 1.487e-07
LPAL13_310031300 LPAL13_310031300 hypothetical protein, conserved protein coding LpaL13_31 1084772 1085059 - reverse 31 288.0 287 -1.740 0.0189 -1.745 0.0185 -2.897 0.0034 -1.0220 1.9180 4.2414 0.5013 -2.940 -5.855 0e+00 0.0001 64.63 0.5905 -2.947 0.0032 0.3268 -1.613 114.26 37.337 62.15 3.847e+03 0.3505 0.3354 0.6646 0.3354 1.2040 8.861 0.0029 -0.2008 -3.946 0.2801 0.0002 3.931e-03 2.683e-02 2.372e-02 6.860e-01 1.203e-04 -2.098 1.102e-01 -5.252e-02 2.111e-03 2.743e-06
LPAL13_050005000 LPAL13_050005000 hypothetical protein protein coding LpaL13_05 3394 3612 - reverse 5 219.0 218 -1.729 0.0368 -1.738 0.0286 -2.611 0.0027 0.1637 2.6120 2.4370 0.1404 -2.448 -6.788 0e+00 0.0000 96.84 0.6522 -2.650 0.0080 0.3127 -1.677 180.74 56.508 96.58 6.519e+03 0.3232 0.0055 0.9945 0.0055 1.7580 7.761 0.0053 0.4879 -4.068 0.8488 0.0001 3.099e-03 3.679e-02 3.807e-02 9.791e-01 3.283e-05 -1.980 2.139e-02 -1.080e-02 4.505e-03 1.613e-05
LPAL13_340039700 LPAL13_340039700 snare domain containing protein, putative protein coding LpaL13_34 1248192 1248947 - reverse 34 756.0 755 -1.557 0.0001 -1.566 0.0000 -1.742 0.0001 4.6900 6.6660 0.7805 0.0554 -1.977 -9.565 0e+00 0.0000 1492.00 0.3243 -4.802 0.0000 0.3017 -1.729 3050.36 920.141 1607.31 1.346e+06 0.3058 0.0001 0.9999 0.0001 5.7080 27.180 0.0000 5.3190 -5.454 5.4250 0.0000 1.000e-04 6.443e-05 1.300e-05 9.506e-01 5.982e-07 -1.665 1.815e-02 -1.090e-02 8.951e-07 4.816e-13
LPAL13_040007800 LPAL13_040007800 hypothetical protein, conserved protein coding LpaL13_04 77524 78306 + forward 4 783.0 782 -1.472 0.0000 -1.481 0.0000 -1.327 0.0001 6.4970 7.7730 0.1173 0.4170 -1.275 -5.865 1e-04 0.0014 4344.00 0.2563 -5.745 0.0000 0.3607 -1.471 7519.30 2712.403 4263.02 9.689e+06 0.3659 0.0002 0.9998 0.0002 7.2500 41.510 0.0000 7.0160 -5.559 5.7890 0.0000 8.891e-05 1.403e-06 8.301e-08 1.000e+00 1.713e-03 -1.475 9.708e-02 -6.583e-02 2.102e-07 1.267e-13
LPAL13_170015400 LPAL13_170015400 hypothetical protein, conserved protein coding LpaL13_17 395975 396307 + forward 17 333.0 332 -1.471 0.0001 -1.480 0.0001 -1.588 0.0024 1.3290 3.1710 1.0588 0.0350 -1.842 -7.933 0e+00 0.0000 162.10 0.3162 -4.652 0.0000 0.3712 -1.430 264.81 98.292 152.01 1.093e+04 0.3763 0.0000 1.0000 0.0000 2.5070 21.670 0.0000 2.1060 -4.114 1.0820 0.0001 2.769e-03 1.792e-04 1.151e-04 9.374e-01 5.982e-06 -1.579 1.517e-02 -9.610e-03 4.128e-05 4.334e-09
LPAL13_350073400 LPAL13_350073400 hypothetical protein protein coding LpaL13_35 2342701 2342883 + forward 35 183.0 182 -1.422 0.0237 -1.428 0.0286 -2.011 0.0031 -0.0868 1.8030 1.0264 0.9772 -1.890 -4.936 1e-04 0.0015 51.13 0.4994 -2.847 0.0044 0.3131 -1.675 124.94 39.114 66.80 8.632e+03 0.3400 0.0019 0.9981 0.0019 0.7997 7.760 0.0053 -0.0785 -3.994 0.5267 0.0002 3.520e-03 3.385e-02 2.831e-02 1.000e+00 1.541e-03 -1.627 5.880e-04 -3.615e-04 3.309e-03 7.577e-06
LPAL13_340016200 LPAL13_340016200 nadh-dependent fumarate reductase, putative protein coding LpaL13_34 396984 398009 + forward 34 1026.0 1025 -1.392 0.0000 -1.399 0.0000 -1.468 0.0000 4.5730 5.9380 0.2396 0.2268 -1.365 -7.391 0e+00 0.0000 1000.00 0.2302 -6.048 0.0000 0.3960 -1.337 1890.93 748.759 1117.20 5.033e+05 0.3998 0.0000 1.0000 0.0000 5.1310 39.150 0.0000 4.8980 -6.903 10.8900 0.0000 8.385e-06 5.056e-07 1.808e-07 9.489e-01 3.350e-05 -1.498 1.307e-02 -8.722e-03 1.710e-09 2.123e-18
LPAL13_320038700 LPAL13_320038700 hypothetical protein, conserved protein coding LpaL13_32 1175024 1175257 + forward 32 234.0 233 -1.389 0.0000 -1.398 0.0000 -1.403 0.0002 2.5630 3.9230 0.4803 0.1093 -1.361 -7.402 0e+00 0.0000 277.00 0.2560 -5.425 0.0000 0.4313 -1.213 459.38 198.144 282.41 2.456e+04 0.4357 0.0073 0.9927 0.0073 3.2820 30.670 0.0000 3.0340 -5.189 4.6000 0.0000 2.364e-04 7.194e-06 3.557e-06 9.634e-01 5.822e-06 -1.425 2.173e-02 -1.526e-02 8.675e-07 2.033e-12
LPAL13_230021300 LPAL13_230021300 hypothetical protein, conserved protein coding LpaL13_23 513359 513691 + forward 23 333.0 332 -1.364 0.0046 -1.367 0.0030 -1.623 0.0228 0.9943 2.5460 0.5807 0.3004 -1.551 -6.459 0e+00 0.0001 143.40 0.3898 -3.498 0.0005 0.4141 -1.272 173.67 71.917 104.74 5.745e+03 0.4215 0.0000 1.0000 0.0000 2.3520 13.340 0.0003 1.7190 -3.040 -1.8510 0.0035 2.610e-02 4.558e-03 3.927e-03 9.374e-01 4.499e-05 -1.455 8.533e-05 -5.863e-05 1.402e-03 3.238e-06
LPAL13_140019200 LPAL13_140019200 inositol-3-phosphate synthase protein coding LpaL13_14 527711 529291 + INO1 forward 14 1581.0 1580 -1.356 0.0001 -1.365 0.0000 -1.426 0.0001 8.8430 10.4600 0.1893 0.4665 -1.619 -6.861 0e+00 0.0004 20630.00 0.2895 -4.684 0.0000 0.3429 -1.544 40652.04 13937.657 22555.20 2.313e+08 0.3457 0.0000 1.0000 0.0000 9.4970 32.430 0.0000 9.2670 -5.524 5.7440 0.0000 9.605e-05 9.872e-05 1.943e-06 1.000e+00 3.611e-04 -1.383 8.392e-03 -6.066e-03 1.178e-06 2.124e-12
sus_ma <- sus_table[["plots"]][["sensitive_vs_resistant"]][["deseq_ma_plots"]][["plot"]]
pp(file = "images/sus_ma.png", image = sus_ma)

## test <- ggplt(sus_ma)

5.4 Ontology searches

Now let us look for ontology categories which are increased in the 2.3 samples followed by the 2.2 samples.

## Gene categories more represented in the 2.3 group.
zy_go_up <- sm(simple_goseq(sig_genes = zy_sig[["deseq"]][["ups"]][[1]],
                            go_db = lp_go, length_db = lp_lengths))

## Gene categories more represented in the 2.2 group.
zy_go_down <- sm(simple_goseq(sig_genes = zy_sig[["deseq"]][["downs"]][[1]],
                              go_db = lp_go, length_db = lp_lengths))

5.4.1 A couple plots from the differential expression

5.4.1.1 Number of genes in agreement among DE methods, 2.3 more than 2.2

In the function ‘combined_de_tables()’ above, one of the tasks performed is to look at the agreement among DESeq2, limma, and edgeR. The following show a couple of these for the set of genes observed with a fold-change >= |2| and adjusted p-value <= 0.05.

zy_table[["venns"]][[1]][["p_lfc1"]][["up_noweight"]]

5.4.1.2 Number of genes in agreement among DE methods, 2.2 more than 2.3

zy_table[["venns"]][[1]][["p_lfc1"]][["down_noweight"]]

5.4.1.3 goseq ontology plots of groups of genes, 2.3 more than 2.2

zy_go_up$pvalue_plots$bpp_plot_over

5.4.1.4 goseq ontology plots of groups of genes, 2.2 more than 2.3

zy_go_down$pvalue_plots$bpp_plot_over

5.5 Look for agreement between sensitivity and zymodemes

Remind myself, the data structures are (zy|sus)_(de|table|sig).

zy_df <- zy_table[["data"]][["z23_vs_z22"]]
sus_df <- sus_table[["data"]][["sensitive_vs_resistant"]]

both_df <- merge(zy_df, sus_df, by = "row.names")
plot_df <- both_df[, c("deseq_logfc.x", "deseq_logfc.y")]
rownames(plot_df) <- both_df[["Row.names"]]
colnames(plot_df) <- c("z23_vs_z22", "sensitive_vs_resistant")

compare <- plot_linear_scatter(plot_df)
## Warning in plot_multihistogram(df): NAs introduced by coercion
pp(file = "images/compare_sus_zy.png", image = compare$scatter)

compare$cor
## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = -174, df = 8542, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8881 -0.8788
## sample estimates:
##     cor 
## -0.8836

5.6 Zymodeme enzyme gene IDs

Najib read me an email listing off the gene names associated with the zymodeme classification. I took those names and cross referenced them against the Leishmania panamensis gene annotations and found the following:

They are:

  1. ALAT: LPAL13_120010900 – alanine aminotransferase
  2. ASAT: LPAL13_340013000 – aspartate aminotransferase
  3. G6PD: LPAL13_000054100 – glucase-6-phosphate 1-dehydrogenase
  4. NH: LPAL13_14006100, LPAL13_180018500 – inosine-guanine nucleoside hydrolase
  5. MPI: LPAL13_320022300 (maybe) – mannose phosphate isomerase (I chose phosphomannose isomerase)

Given these 6 gene IDs (NH has two gene IDs associated with it), I can do some looking for specific differences among the various samples.

5.6.1 Expression levels of zymodeme genes

The following creates a colorspace (red to green) heatmap showing the observed expression of these genes in every sample.

my_genes <- c("LPAL13_120010900", "LPAL13_340013000", "LPAL13_000054100",
              "LPAL13_140006100", "LPAL13_180018500", "LPAL13_320022300",
              "other")
my_names <- c("ALAT", "ASAT", "G6PD", "NHv1", "NHv2", "MPI", "other")

zymo_expt <- exclude_genes_expt(zy_norm, ids = my_genes, method = "keep")
## Before removal, there were 8544 genes, now there are 6.
## There are 29 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20065 TMRC20005 TMRC20066 TMRC20039 TMRC20037 TMRC20038 TMRC20067 
##    0.1309    0.1246    0.1318    0.1057    0.1298    0.1099    0.1127    0.1162 
## TMRC20068 TMRC20041 TMRC20015 TMRC20009 TMRC20010 TMRC20016 TMRC20011 TMRC20012 
##    0.1152    0.1178    0.1145    0.1134    0.1097    0.1058    0.1100    0.1204 
## TMRC20013 TMRC20017 TMRC20014 TMRC20018 TMRC20021 TMRC20022 TMRC20053 TMRC20052 
##    0.1203    0.1062    0.1088    0.1144    0.1060    0.1304    0.1180    0.1103 
## TMRC20064 TMRC20051 TMRC20050 TMRC20062 TMRC20054 
##    0.1137    0.1279    0.1151    0.1282    0.1275
zymo_heatmap <- plot_sample_heatmap(zymo_expt, row_label = my_names)
zymo_heatmap

5.7 Empirically observed Zymodeme genes from differential expression analysis

In contrast, the following plots take the set of genes which are shared among all differential expression methods (|lfc| >= 1.0 and adjp <= 0.05) and use them to make categories of genes which are increased in 2.3 or 2.2.

shared_zymo <- intersect_significant(zy_table)
## Deleting the file excel/intersect_significant.xlsx before writing the tables.
up_shared <- shared_zymo[["ups"]][[1]][["data"]][["all"]]
rownames(up_shared)
##  [1] "LPAL13_000033300" "LPAL13_000012000" "LPAL13_310031300" "LPAL13_000038400"
##  [5] "LPAL13_000038500" "LPAL13_000012100" "LPAL13_340039600" "LPAL13_050005000"
##  [9] "LPAL13_310031000" "LPAL13_310039200" "LPAL13_210015500" "LPAL13_350063000"
## [13] "LPAL13_140019300" "LPAL13_270034100" "LPAL13_340039700" "LPAL13_180013900"
## [17] "LPAL13_170015400" "LPAL13_350013200" "LPAL13_140019100" "LPAL13_330021800"
## [21] "LPAL13_240009700" "LPAL13_140019200" "LPAL13_330021900" "LPAL13_250025700"
## [25] "LPAL13_320038700" "LPAL13_210005000" "LPAL13_000052700" "LPAL13_350073200"
## [29] "LPAL13_230011400" "LPAL13_310028500" "LPAL13_230011200" "LPAL13_310032500"
## [33] "LPAL13_230011500" "LPAL13_040007800" "LPAL13_300031600" "LPAL13_230011300"
## [37] "LPAL13_000010600" "LPAL13_110015700" "LPAL13_230011600"
upshared_expt <- exclude_genes_expt(zy_norm, ids = rownames(up_shared), method = "keep")
## Before removal, there were 8544 genes, now there are 39.
## There are 29 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20065 TMRC20005 TMRC20066 TMRC20039 TMRC20037 TMRC20038 TMRC20067 
##    0.3545    0.4356    0.1178    0.4012    0.1655    0.4374    0.5572    0.3396 
## TMRC20068 TMRC20041 TMRC20015 TMRC20009 TMRC20010 TMRC20016 TMRC20011 TMRC20012 
##    0.3829    0.1645    0.4265    0.1429    0.4065    0.3056    0.1531    0.1272 
## TMRC20013 TMRC20017 TMRC20014 TMRC20018 TMRC20021 TMRC20022 TMRC20053 TMRC20052 
##    0.3744    0.1659    0.1624    0.3562    0.3943    0.1420    0.1856    0.4531 
## TMRC20064 TMRC20051 TMRC20050 TMRC20062 TMRC20054 
##    0.4197    0.6309    0.1624    0.6503    0.5515

We can plot a quick heatmap to get a sense of the differences observed between the genes which are different between the two zymodemes.

5.7.1 Heatmap of zymodeme gene expression increased in 2.3 vs. 2.2

high_23_heatmap <- plot_sample_heatmap(upshared_expt, row_label = rownames(up_shared))
high_23_heatmap

5.7.2 Heatmap of zymodeme gene expression increased in 2.2 vs. 2.3

down_shared <- shared_zymo[["downs"]][[1]][["data"]][["all"]]
downshared_expt <- exclude_genes_expt(zy_norm, ids = rownames(down_shared), method = "keep")
## Before removal, there were 8544 genes, now there are 68.
## There are 29 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20065 TMRC20005 TMRC20066 TMRC20039 TMRC20037 TMRC20038 TMRC20067 
##    0.1901    0.1825    0.6478    0.2105    0.6455    0.1951    0.1864    0.2296 
## TMRC20068 TMRC20041 TMRC20015 TMRC20009 TMRC20010 TMRC20016 TMRC20011 TMRC20012 
##    0.1877    0.6764    0.1808    0.6169    0.1643    0.2087    0.5629    0.5490 
## TMRC20013 TMRC20017 TMRC20014 TMRC20018 TMRC20021 TMRC20022 TMRC20053 TMRC20052 
##    0.1598    0.6316    0.6411    0.1508    0.1488    0.6381    0.5372    0.1660 
## TMRC20064 TMRC20051 TMRC20050 TMRC20062 TMRC20054 
##    0.1832    0.1744    0.5981    0.1720    0.1829
high_22_heatmap <- plot_sample_heatmap(downshared_expt, row_label = rownames(down_shared))
high_22_heatmap

6 SNP profiles

Now I will combine our previous samples and our new samples in the hopes of finding variant positions which help elucidate currently unknown aspects of either group via their clustering to known samples from the other group. In other words, we do not know the zymodeme annotations for the old samples nor the strain identities (or the shortcut ‘chronic vs. self-healing’) for the new samples. I hope to make educated guesses given the variant profiles. There are some differences in how the previous and current data sets were analyzed (though I have since redone the old samples so it should be trivial to remove those differences now).

I added our 2016 data to a specific TMRC2 sample sheet, dated 20191203. Thus I will load the data here. That previous data was mapped using tophat, so I will also need to make some changes to the gene names to accomodate the two mappings.

old_expt <- sm(create_expt("sample_sheets/tmrc2_samples_20191203.xlsx",
                           file_column = "tophat2file"))

tt <- lp_expt[["expressionset"]]
rownames(tt) <- gsub(pattern = "^exon_", replacement = "", x = rownames(tt))
rownames(tt) <- gsub(pattern = "\\.E1$", replacement = "", x = rownames(tt))
lp_expt$expressionset <- tt

tt <- old_expt$expressionset
rownames(tt) <- gsub(pattern = "^exon_", replacement = "", x = rownames(tt))
rownames(tt) <- gsub(pattern = "\\.1$", replacement = "", x = rownames(tt))
old_expt$expressionset <- tt
rm(tt)

6.1 Create the SNP expressionset

One other important caveat, we have a group of new samples which have not yet run through the variant search pipeline, so I need to remove them from consideration. Though it looks like they finished overnight…

## The next line drops the samples which are missing the SNP pipeline.
lp_snp <- subset_expt(lp_expt, subset="!is.na(pData(lp_expt)[['bcftable']])")
## subset_expt(): There were 63, now there are 46 samples.
new_snps <- sm(count_expt_snps(lp_snp, annot_column = "bcftable"))
old_snps <- sm(count_expt_snps(old_expt, annot_column = "bcftable", snp_column = 2))

both_snps <- combine_expts(new_snps, old_snps)
both_norm <- sm(normalize_expt(both_snps, transform = "log2", convert = "cpm", filter = TRUE))

## strains <- both_norm[["design"]][["strain"]]
both_strain <- set_expt_conditions(both_norm, fact = "strain")

The data structure ‘both_norm’ now contains our 2016 data along with the newer data collected since 2019.

6.2 Plot of SNP profiles for zymodemes

The following plot shows the SNP profiles of all samples (old and new) where the colors at the top show either the 2.2 strains (orange), 2.3 strains (green), the previous samples (purple), or the various lab strains (pink etc).

old_new_variant_heatmap <- plot_disheat(both_norm)
pp(file = "images/raw_snp_disheat.png", image = old_new_variant_heatmap,
   height = 12, width = 12)

The function get_snp_sets() takes the provided metadata factor (in this case ‘condition’) and looks for variants which are exclusive to each element in it. In this case, this is looking for differences between 2.2 and 2.3, as well as the set shared among them.

snp_sets <- get_snp_sets(both_snps, factor = "condition")
## The factor z2.3 has 14 rows.
## The factor z2.2 has 11 rows.
## The factor unknown has 21 rows.
## The factor sh has 13 rows.
## The factor chr has 14 rows.
## The factor inf has 6 rows.
## Iterating over 727 elements.
both_expt <- combine_expts(lp_expt, old_expt)

snp_genes <- sm(snps_vs_genes(both_expt, snp_sets, expt_name_col = "chromosome"))
## I think we have some metrics here we can plot...
snp_subset <- sm(snp_subset_genes(
  both_expt, both_snps,
  genes = c("LPAL13_120010900", "LPAL13_340013000", "LPAL13_000054100",
            "LPAL13_140006100", "LPAL13_180018500", "LPAL13_320022300")))
zymo_heat <- plot_sample_heatmap(snp_subset, row_label = rownames(exprs(snp_subset)))
zymo_heat

Didn’t I create a set of densities by chromosome? Oh I think they come in from get_snp_sets()

6.3 SNPS associated with clinical response in the TMRC samples

clinical_sets <- get_snp_sets(new_snps, factor = "clinicalresponse")
## The factor cure has 17 rows.
## The factor failure has 15 rows.
## The factor laboratory line has only 1 row.
## The factor nd has 3 rows.
## The factor reference strain has 3 rows.
## The factor unknown has 7 rows.
## Iterating over 693 elements.
density_vec <- clinical_sets[["density"]]
chromosome_idx <- grep(pattern = "LpaL", x = names(density_vec))
density_df <- as.data.frame(density_vec[chromosome_idx])
density_df[["chr"]] <- rownames(density_df)
colnames(density_df) <- c("density_vec", "chr")
ggplot(density_df, aes_string(x = "chr", y = "density_vec")) +
  ggplot2::geom_col() +
  ggplot2::theme(axis.text = ggplot2::element_text(size = 10, colour = "black"),
                 axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5))

## clinical_written <- write_variants(new_snps)

6.3.1 Cross reference these variants by gene

clinical_genes <- sm(snps_vs_genes(lp_expt, clinical_sets, expt_name_col = "chromosome"))

snp_density <- merge(as.data.frame(clinical_genes[["summary_by_gene"]]),
                     as.data.frame(fData(lp_expt)),
                     by = "row.names")
snp_density <- snp_density[, c(1, 2, 4, 15)]
colnames(snp_density) <- c("name", "snps", "product", "length")
snp_density[["product"]] <- tolower(snp_density[["product"]])
snp_density[["length"]] <- as.numeric(snp_density[["length"]])
snp_density[["density"]] <- snp_density[["snps"]] / snp_density[["length"]]
snp_idx <- order(snp_density[["density"]], decreasing = TRUE)
snp_density <- snp_density[snp_idx, ]

removers <- c("amastin", "gp63", "leishmanolysin")
for (r in removers) {
  drop_idx <- grepl(pattern = r, x = snp_density[["product"]])
  snp_density <- snp_density[!drop_idx, ]
}
## Filter these for [A|a]mastin gp63 Leishmanolysin
clinical_snps <- snps_intersections(lp_expt, clinical_sets, chr_column = "chromosome")

fail_ref_snps <- as.data.frame(clinical_snps[["inters"]][["failure, reference strain"]])
cure_snps <- as.data.frame(clinical_snps[["inters"]][["cure"]])

head(fail_ref_snps)
##                                       seqnames  start    end width strand
## chr_LpaL13-10_pos_233490_ref_C_alt_G LpaL13-10 233490 233491     2      +
## chr_LpaL13-15_pos_42885_ref_A_alt_G  LpaL13-15  42885  42886     2      +
## chr_LpaL13-24_pos_163196_ref_C_alt_A LpaL13-24 163196 163197     2      +
## chr_LpaL13-31_pos_852703_ref_C_alt_A LpaL13-31 852703 852704     2      +
head(cure_snps)
##                                       seqnames  start    end width strand
## chr_LpaL13-01_pos_169299_ref_A_alt_G LpaL13-01 169299 169300     2      +
## chr_LpaL13-08_pos_184791_ref_T_alt_A LpaL13-08 184791 184792     2      +
## chr_LpaL13-10_pos_347757_ref_A_alt_C LpaL13-10 347757 347758     2      +
## chr_LpaL13-11_pos_433123_ref_C_alt_T LpaL13-11 433123 433124     2      +
## chr_LpaL13-15_pos_47170_ref_G_alt_C  LpaL13-15  47170  47171     2      +
## chr_LpaL13-16_pos_456493_ref_A_alt_G LpaL13-16 456493 456494     2      +
annot <- fData(lp_expt)
clinical_interest <- as.data.frame(clinical_snps[["gene_summaries"]][["cure"]])
clinical_interest <- merge(clinical_interest,
                           as.data.frame(clinical_snps[["gene_summaries"]][["failure, reference strain"]]),
                           by = "row.names")
rownames(clinical_interest) <- clinical_interest[["Row.names"]]
clinical_interest[["Row.names"]] <- NULL
colnames(clinical_interest) <- c("cure_snps","fail_snps")
annot <- merge(annot, clinical_interest, by = "row.names")
rownames(annot) <- annot[["Row.names"]]
annot[["Row.names"]] <- NULL
fData(lp_expt$expressionset) <- annot

7 Zymodeme for new samples

The heatmap produced here should show the variants only for the zymodeme genes.

7.1 Hunt for snp clusters

I am thinking that if we find clusters of locations which are variant, that might provide some PCR testing possibilities.

new_sets <- get_snp_sets(new_snps, factor = "phenotypiccharacteristics")
## The factor 22 has 11 rows.
## The factor 23 has 14 rows.
## The factor laboratory line has only 1 row.
## The factor notapplicable has 17 rows.
## The factor reference strain has 3 rows.
## Iterating over 693 elements.
summary(new_sets)
##               Length Class      Mode     
## medians         6    data.frame list     
## possibilities   5    -none-     character
## intersections  31    -none-     list     
## chr_data      693    -none-     list     
## set_names      32    -none-     list     
## invert_names   32    -none-     list     
## density       693    -none-     numeric
## 1000000: 2.2
## 0100000: 2.3

summary(new_sets[["intersections"]][["10000"]])
##    Length     Class      Mode 
##       511 character character
summary(new_sets[["intersections"]][["01000"]])
##    Length     Class      Mode 
##     49790 character character

Thus we see that there are 511 variants associated with 2.2 and 49,790 associated with 2.3.

7.1.1 A small function for searching for potential PCR primers

The following function uses the positional data to look for sequential mismatches associated with zymodeme in the hopes that there will be some regions which would provide good potential targets for a PCR-based assay.

sequential_variants <- function(snp_sets, conditions = NULL, minimum = 3, maximum_separation = 3) {
  if (is.null(conditions)) {
    conditions <- 1
  }
  intersection_sets <- snp_sets[["intersections"]]
  intersection_names <- snp_sets[["set_names"]]
  chosen_intersection <- 1
  if (is.numeric(conditions)) {
    chosen_intersection <- conditions
  } else {
    intersection_idx <- intersection_names == conditions
    chosen_intersection <- names(intersection_names)[intersection_idx]
  }

  possible_positions <- intersection_sets[[chosen_intersection]]
  position_table <- data.frame(row.names = possible_positions)
  pat <- "^chr_(.+)_pos_(.+)_ref_.*$"
  position_table[["chr"]] <- gsub(pattern = pat, replacement = "\\1", x = rownames(position_table))
  position_table[["pos"]] <- as.numeric(gsub(pattern = pat, replacement = "\\2", x = rownames(position_table)))
  position_idx <- order(position_table[, "chr"], position_table[, "pos"])
  position_table <- position_table[position_idx, ]
  position_table[["dist"]] <- 0

  last_chr <- ""
  for (r in 1:nrow(position_table)) {
    this_chr <- position_table[r, "chr"]
    if (r == 1) {
      position_table[r, "dist"] <- position_table[r, "pos"]
      last_chr <- this_chr
      next
    }
    if (this_chr == last_chr) {
      position_table[r, "dist"] <- position_table[r, "pos"] - position_table[r - 1, "pos"]
    } else {
      position_table[r, "dist"] <- position_table[r, "pos"]
    }
    last_chr <- this_chr
  }

  sequentials <- position_table[["dist"]] <= maximum_separation
  message("There are ", sum(sequentials), " candidate regions.")

  ## The following can tell me how many runs of each length occurred, that is not quite what I want.
  ## Now use run length encoding to find the set of sequential sequentials!
  rle_result <- rle(sequentials)
  rle_values <- rle_result[["values"]]
  ## The following line is equivalent to just leaving values alone:
  ## true_values <- rle_result[["values"]] == TRUE
  rle_lengths <- rle_result[["lengths"]]
  true_sequentials <- rle_lengths[rle_values]
  rle_idx <- cumsum(rle_lengths)[which(rle_values)]

  position_table[["last_sequential"]] <- 0
  count <- 0
  for (r in rle_idx) {
    count <- count + 1
    position_table[r, "last_sequential"] <- true_sequentials[count]
  }
  message("The maximum sequential set is: ", max(position_table[["last_sequential"]]), ".")

  wanted_idx <- position_table[["last_sequential"]] >= minimum
  wanted <- position_table[wanted_idx, c("chr", "pos")]
  return(wanted)
}

zymo22_sequentials <- sequential_variants(new_sets, conditions = "22")
## There are 75 candidate regions.
## The maximum sequential set is: 3.
dim(zymo22_sequentials)
## [1] 7 2
## 7 candidate regions for zymodeme 2.2 -- thus I am betting that the reference strain is a 2.2
zymo23_sequentials <- sequential_variants(new_sets, conditions = "23",
                                          minimum = 1, maximum_separation = 3)
## There are 587 candidate regions.
## The maximum sequential set is: 1.
dim(zymo23_sequentials)
## [1] 587   2
## In contrast, there are lots (587) of interesting regions for 2.3!

7.2 Make a heatmap describing the clustering of variants

We can cross reference the variants against the zymodeme status and plot a heatmap of the results and hopefully see how they separate.

snp_genes <- sm(snps_vs_genes(lp_expt, new_sets, expt_name_col = "chromosome"))
new_zymo_norm  <- normalize_expt(new_snps, filter = TRUE, convert = "cpm", norm = "quant", transform = TRUE)
## Removing 0 low-count genes (558524 remaining).
## transform_counts: Found 11978651 values equal to 0, adding 1 to the matrix.
new_zymo_norm <- set_expt_conditions(new_zymo_norm, fact = "phenotypiccharacteristics")

zymo_heat <- plot_disheat(new_zymo_norm)
zymo_heat[["plot"]]

7.2.1 Annotated heatmap of variants

Now let us try to make a heatmap which includes some of the annotation data.

des <- both_norm[["design"]]
undef_idx <- is.na(des[["strain"]])
des[undef_idx, "strain"] <- "unknown"

##hmcols <- colorRampPalette(c("yellow","black","darkblue"))(256)
correlations <- hpgl_cor(exprs(both_norm))

zymo_missing_idx <- is.na(des[["phenotypiccharacteristics"]])
des[["phenotypiccharacteristics"]] <- as.character(des[["phenotypiccharacteristics"]])
des[["clinicalcategorical"]] <- as.character(des[["clinicalcategorical"]])
des[zymo_missing_idx, "phenotypiccharacteristics"] <- "unknown"
mydendro <- list(
  "clustfun" = hclust,
  "lwd" = 2.0)
col_data <- as.data.frame(des[, c("phenotypiccharacteristics", "clinicalcategorical")])

unknown_clinical <- is.na(col_data[["clinicalcategorical"]])
row_data <- as.data.frame(des[, c("strain")])
colnames(col_data) <- c("zymodeme", "outcome")
col_data[unknown_clinical, "outcome"] <- "undefined"

colnames(row_data) <- c("strain")
myannot <- list(
  "Col" = list("data" = col_data),
  "Row" = list("data" = row_data))
myclust <- list("cuth" = 1.0,
                "col" = BrewerClusterCol)
mylabs <- list(
  "Row" = list("nrow" = 4),
  "Col" = list("nrow" = 4))
hmcols <- colorRampPalette(c("darkblue", "beige"))(240)
map1 <- annHeatmap2(
  correlations,
  dendrogram = mydendro,
  annotation = myannot,
  cluster = myclust,
  labels = mylabs,
  ## The following controls if the picture is symmetric
  scale = "none",
  col = hmcols)
## Warning in breakColors(breaks, col): more colors than classes: ignoring 29 last
## colors
pp(file = "images/dendro_heatmap.png", image = map1, height = 20, width = 20)
## annotated Heatmap
## 
## Rows: 'dendrogram' with 2 branches and 79 members total, at height 5.258 
##   11  annotation variable(s)
## Cols: 'dendrogram' with 2 branches and 79 members total, at height 5.258 
##   10  annotation variable(s)

Print the larger heatmap so that all the labels appear. Keep in mind that as we get more samples, this image needs to continue getting bigger.

big heatmap

pheno <- subset_expt(lp_expt, subset = "condition=='z2.2'|condition=='z2.3'")
## subset_expt(): There were 63, now there are 29 samples.
pheno <- subset_expt(pheno, subset="!is.na(pData(pheno)[['bcftable']])")
## subset_expt(): There were 29, now there are 25 samples.
pheno_snps <- sm(count_expt_snps(pheno, annot_column = "bcftable"))

xref_prop <- table(pheno_snps[["conditions"]])
pheno_snps$conditions
##  [1] "z2.3" "z2.2" "z2.2" "z2.3" "z2.3" "z2.2" "z2.3" "z2.2" "z2.3" "z2.3"
## [11] "z2.2" "z2.2" "z2.3" "z2.2" "z2.2" "z2.3" "z2.3" "z2.2" "z2.2" "z2.3"
## [21] "z2.3" "z2.3" "z2.2" "z2.3" "z2.3"
idx_tbl <- exprs(pheno_snps) > 5
new_tbl <- data.frame(row.names = rownames(exprs(pheno_snps)))
for (n in names(xref_prop)) {
  new_tbl[[n]] <- 0
  idx_cols <- which(pheno_snps[["conditions"]] == n)
  prop_col <- rowSums(idx_tbl[, idx_cols]) / xref_prop[n]
  new_tbl[n] <- prop_col
}
keepers <- grepl(x = rownames(new_tbl), pattern = "LpaL13")
new_tbl <- new_tbl[keepers, ]
new_tbl[["strong22"]] <- 1.001 - new_tbl[["z2.2"]]
new_tbl[["strong23"]] <- 1.001 - new_tbl[["z2.3"]]
s22_na <- new_tbl[["strong22"]] > 1
new_tbl[s22_na, "strong22"] <- 1
s23_na <- new_tbl[["strong23"]] > 1
new_tbl[s23_na, "strong23"] <- 1

new_tbl[["SNP"]] <- rownames(new_tbl)
new_tbl[["Chromosome"]] <- gsub(x = new_tbl[["SNP"]], pattern = "chr_(.*)_pos_.*", replacement = "\\1")
new_tbl[["Position"]] <- gsub(x = new_tbl[["SNP"]], pattern = ".*_pos_(\\d+)_.*", replacement = "\\1")
new_tbl <- new_tbl[, c("SNP", "Chromosome", "Position", "strong22", "strong23")]


library(CMplot)
## Much appreciate for using CMplot.
## Full description, Bug report, Suggestion and the latest codes:
## https://github.com/YinLiLin/CMplot
CMplot(new_tbl, bin.size = 100000)
##  SNP-Density Plotting.
##  Circular-Manhattan Plotting strong22.
##  Circular-Manhattan Plotting strong23.
##  Rectangular-Manhattan Plotting strong22.
##  Rectangular-Manhattan Plotting strong23.
##  QQ Plotting strong22.
##  QQ Plotting strong23.
##  Plots are stored in: /mnt/cbcb/fs01_abelew/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019
CMplot(new_tbl, plot.type="m", multracks=TRUE, threshold = c(0.01, 0.05),
       threshold.lwd=c(1,1), threshold.col=c("black","grey"),
       amplify=TRUE, bin.size=1e5,
       chr.den.col=c("darkgreen", "yellow", "red"),
       signal.col=c("red", "green", "blue"),
       signal.cex=1, file="jpg", memo="", dpi=300, file.output=TRUE, verbose=TRUE)
##  Multracks-Manhattan Plotting strong22.
##  Multracks-Manhattan Plotting strong23.
##  Multraits-Rectangular Plotting...(finished 78%)
 Multraits-Rectangular Plotting...(finished 79%)
 Multraits-Rectangular Plotting...(finished 80%)
 Multraits-Rectangular Plotting...(finished 81%)
 Multraits-Rectangular Plotting...(finished 82%)
 Multraits-Rectangular Plotting...(finished 83%)
 Multraits-Rectangular Plotting...(finished 84%)
 Multraits-Rectangular Plotting...(finished 85%)
 Multraits-Rectangular Plotting...(finished 86%)
 Multraits-Rectangular Plotting...(finished 87%)
 Multraits-Rectangular Plotting...(finished 88%)
 Multraits-Rectangular Plotting...(finished 89%)
 Multraits-Rectangular Plotting...(finished 90%)
 Multraits-Rectangular Plotting...(finished 91%)
 Multraits-Rectangular Plotting...(finished 92%)
 Multraits-Rectangular Plotting...(finished 93%)
 Multraits-Rectangular Plotting...(finished 94%)
 Multraits-Rectangular Plotting...(finished 95%)
 Multraits-Rectangular Plotting...(finished 96%)
 Multraits-Rectangular Plotting...(finished 97%)
 Multraits-Rectangular Plotting...(finished 98%)
 Multraits-Rectangular Plotting...(finished 99%)
 Multraits-Rectangular Plotting...(finished 100%)
##  Plots are stored in: /mnt/cbcb/fs01_abelew/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019

SNP Density Circular Manhattan Rectangular Manhattan QQ

if (!isTRUE(get0("skip_load"))) {
  pander::pander(sessionInfo())
  message(paste0("This is hpgltools commit: ", get_git_commit()))
  message(paste0("Saving to ", savefile))
  tmp <- sm(saveme(filename = savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 72947fcc6afe09da22d71967059edd84e3063341
## This is hpgltools commit: Tue Jun 1 15:57:56 2021 -0400: 72947fcc6afe09da22d71967059edd84e3063341
## Saving to tmrc2_02sample_estimation_v202106.rda.xz
tmp <- loadme(filename = savefile)
