1 Annotation version: 20200320

The following section loads the microbesonline and genbank annotations for Mycobacterium tuberculosis.

## Looks like it is taxon ID 83332
mtb_annotations <- as.data.frame(load_microbesonline_annotations(species="Mycobacterium tuberculosis H37Rv"))
## Found 1 entry.
##                                Genome         Phylum Paper     Loaded Complete
## 2178 Mycobacterium tuberculosis H37Rv Actinobacteria   yes 2007-05-08      yes
##      #Chr. #Plasmids #Genes tax_id
## 2178     1         0   4047  83332
## The species being downloaded is: Mycobacterium tuberculosis H37Rv
## Downloading: http://www.microbesonline.org/cgi-bin/genomeInfo.cgi?tId=83332;export=tab
knitr::kable(head(mtb_annotations))
locusId accession GI scaffoldId start stop strand sysName name desc COG COGFun COGDesc TIGRFam TIGRRoles GO EC ECDesc
31772 NP_214515.1 15607143 7022 1 1524 + Rv0001 dnaA chromosomal replication initiation protein (NCBI) COG593 L ATPase involved in DNA replication initiation TIGR00362 chromosomal replication initiator protein DnaA [dnaA] DNA metabolism:DNA replication, recombination, and repair GO:0006270,GO:0006275,GO:0003688,GO:0017111,GO:0005524 NA NA
31773 NP_214516.1 15607144 7022 2052 3260 + Rv0002 dnaN DNA polymerase III subunit beta (NCBI) COG592 L DNA polymerase sliding clamp subunit (PCNA homolog) TIGR00663 DNA polymerase III, beta subunit [dnaN] DNA metabolism:DNA replication, recombination, and repair GO:0006260,GO:0003677,GO:0003893,GO:0008408,GO:0016449,GO:0019984,GO:0003889,GO:0003894,GO:0015999,GO:0016450,GO:0003890,GO:0003895,GO:0016000,GO:0016451,GO:0003891,GO:0016448,GO:0016452 2.7.7.7 DNA-directed DNA polymerase.
31774 NP_214517.1 15607145 7022 3280 4437 + Rv0003 recF recombination protein F (NCBI) COG1195 L Recombinational DNA repair ATPase (RecF pathway) TIGR00611 DNA replication and repair protein RecF [recF] DNA metabolism:DNA replication, recombination, and repair GO:0006281,GO:0005694,GO:0003697,GO:0005524 NA NA
31775 NP_214518.1 15607146 7022 4434 4997 + Rv0004 Rv0004 hypothetical protein (NCBI) COG5512 R Zn-ribbon-containing, possibly RNA-binding protein and truncated derivatives NA NA NA NA NA
31776 NP_214519.1 15607147 7022 5123 7267 + Rv0005 gyrB DNA topoisomerase IV subunit B (NCBI) COG187 L Type IIA topoisomerase (DNA gyrase/topo II, topoisomerase IV), B subunit TIGR01059 DNA gyrase, B subunit [gyrB] DNA metabolism:DNA replication, recombination, and repair GO:0006304,GO:0006265,GO:0005694,GO:0003918,GO:0005524 5.99.1.3 DNA topoisomerase (ATP-hydrolyzing).
31777 NP_214520.1 15607148 7022 7302 9818 + Rv0006 gyrA DNA gyrase subunit A (NCBI) COG188 L Type IIA topoisomerase (DNA gyrase/topo II, topoisomerase IV), A subunit TIGR01063 DNA gyrase, A subunit [gyrA] DNA metabolism:DNA replication, recombination, and repair GO:0006265,GO:0006268,GO:0005694,GO:0003918,GO:0005509,GO:0005524 5.99.1.3 DNA topoisomerase (ATP-hydrolyzing).
mtb_go <- load_microbesonline_go(species="Mycobacterium tuberculosis H37Rv", id_column="sysName")
## Found 1 entry.
##                                Genome         Phylum Paper     Loaded Complete
## 2178 Mycobacterium tuberculosis H37Rv Actinobacteria   yes 2007-05-08      yes
##      #Chr. #Plasmids #Genes tax_id
## 2178     1         0   4047  83332
## The species being downloaded is: Mycobacterium tuberculosis H37Rv and is being downloaded as 83332.tab.
colnames(mtb_go) <- c("ID", "GO")

mtb_gff <- load_gff_annotations(gff="~/scratch/libraries/genome/mtuberculosis_h37rv.gff")
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=FALSE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=FALSE)
## Returning a df with 15 columns and 4008 rows.
rownames(mtb_gff) <- mtb_gff[["locus_tag"]]

mtb_annot <- merge(mtb_gff, mtb_annotations, by.x="row.names", by.y="sysName", all.x=TRUE)
rownames(mtb_annot) <- mtb_annot[["Row.names"]]
mtb_annot[["Row.names"]] <- NULL

2 Local Samples Estimation

This is the group of samples which were collected by the Briken lab and previously analyzed by members of the El-Sayed lab.

local_expt <- sm(create_expt(metadata="sample_sheets/local_samples.xlsx",
                             file_column="mtbfile",
                             gene_info=mtb_annot))

2.1 Just take the samples of immediate interest

Najib and Volker would like to focus for the moment on only hpgl IDs: 130-132, 330-332.

few_expt <- subset_expt(local_expt, subset="condition=='in_vitro'|condition=='thp1'")
## Using a subset expression.
## There were 44, now there are 6 samples.
few_filt <- normalize_expt(few_expt, filter=TRUE)
## This function will replace the expt$expressionset slot with:
## cbcb(data)
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
##  some metrics are easier to see when the data is log2 transformed, but
##  EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted.  It is often advisable to cpm/rpkm
##  the data to normalize for sampling differences, keep in mind though that rpkm
##  has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
##  will try to detect this).
## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (4008 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
few_write <- write_expt(few_expt, excel="excel/few_written.xlsx")
## Writing the first sheet, containing a legend and some summary data.
## Writing the raw reads.
## Graphing the raw reads.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete

## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete
## varpart sees only 1 batch, adjusting the model accordingly.
## Attempting mixed linear model with: ~  (1|condition)
## Fitting the expressionset to the model, this is slow.
## Dividing work into 100 chunks...
## 
## Total:18 s
## Placing factor: condition at the beginning of the model.
## Writing the normalized reads.
## Error in density.default(x, adjust = adj) : 'x' contains missing values
## Error in density.default(x, adjust = adj) : 'x' contains missing values
## Graphing the normalized reads.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete

## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete
## varpart sees only 1 batch, adjusting the model accordingly.
## Attempting mixed linear model with: ~  (1|condition)
## Fitting the expressionset to the model, this is slow.
## Dividing work into 100 chunks...
## 
## Total:24 s
## Placing factor: condition at the beginning of the model.
## Writing the median reads by factor.
few_de <- all_pairwise(few_filt)
## Plotting a PCA before surrogates/batch inclusion.
## Not putting labels on the plot.
## Using limma's removeBatchEffect to visualize with(out) batch inclusion.
## Error in density.default(x, adjust = adj) : 'x' contains missing values
## Error in density.default(x, adjust = adj) : 'x' contains missing values
## Not putting labels on the plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
few_table <- combine_de_tables(few_de, excel="excel/few_samples_table.xlsx")
## Deleting the file excel/few_samples_table.xlsx before writing the tables.
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on table 1/1: thp1_vs_in_vitro
## Adding venn plots for thp1_vs_in_vitro.
## Limma expression coefficients for thp1_vs_in_vitro; R^2: 0.612; equation: y = 0.671x + 1.72
## Deseq expression coefficients for thp1_vs_in_vitro; R^2: 0.641; equation: y = 0.831x + 0.579
## Edger expression coefficients for thp1_vs_in_vitro; R^2: 0.581; equation: y = 1.15x - 1.67
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/few_samples_table.xlsx.
few_sig <- extract_significant_genes(few_table,
                                     excel="excel/few_samples_sig.xlsx")
## Writing a legend of columns.
## Printing significant genes to the file: excel/few_samples_sig.xlsx
## 1/1: Creating significant table up_limma_thp1_vs_in_vitro
## Printing significant genes to the file: excel/few_samples_sig.xlsx
## 1/1: Creating significant table up_edger_thp1_vs_in_vitro
## Printing significant genes to the file: excel/few_samples_sig.xlsx
## 1/1: Creating significant table up_deseq_thp1_vs_in_vitro
## Printing significant genes to the file: excel/few_samples_sig.xlsx
## 1/1: Creating significant table up_ebseq_thp1_vs_in_vitro
## Printing significant genes to the file: excel/few_samples_sig.xlsx
## 1/1: Creating significant table up_basic_thp1_vs_in_vitro
## Adding significance bar plots.
mtb_lengths <- mtb_annot[, c("seqnames", "width")]
mtb_lengths[["seqnames"]] <- rownames(mtb_lengths)
colnames(mtb_lengths) <- c("ID", "length")

up_genes <- few_sig[["deseq"]][["ups"]][[1]]
up_go <- simple_goseq(sig_genes=up_genes, go_db=mtb_go, length_db=mtb_lengths,
                      excel="excel/up_goseq.xlsx")
## Using the row names of your table.
## Found 210 genes out of 390 from the sig_genes in the go_db.
## Found 390 genes out of 390 from the sig_genes in the length_db.
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
## Writing data to: excel/up_goseq.xlsx.
down_genes <- few_sig[["deseq"]][["downs"]][[1]]
down <- rownames(down_genes)
down_go <- simple_goseq(sig_genes=down, go_db=mtb_go, length_db=mtb_lengths)
## Found 319 genes out of 457 from the sig_genes in the go_db.
## Found 457 genes out of 457 from the sig_genes in the length_db.
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
few_write[["norm_pca"]]

few_table[["plots"]][[1]][["deseq_ma_plots"]][["plot"]]

up_go$pvalue_plots[[1]]

down_go$pvalue_plots[[1]]

3 Exogenous Samples Estimation

In this context, exogenous just means samples which were not created here. E.g. samples I downloaded from SRA.

exo_annot <- mtb_annot
rownames(exo_annot) <- exo_annot[["db_xref"]]
exo_expt <- create_expt(metadata="sample_sheets/exo_samples.xlsx",
                           file_column="mtbfile",
                           gene_info=exo_annot)
## Reading the sample metadata.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## The sample definitions comprises: 19 rows(samples) and 12 columns(metadata fields).
## Reading count tables.
## Reading count files with read.table().
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/SRR9214125/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows.
## preprocessing/SRR9214126/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214127/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214128/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214129/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214130/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214131/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214132/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214133/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214134/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214135/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214136/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214137/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214138/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214139/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214140/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214141/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214142/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214143/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## Finished reading count data.
## Matched 4008 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 4008 rows and 19 columns.

3.1 Create some plots of the new data

The following blocks will plot and print a few common metrics of the new data.

exo_plots <- sm(graph_metrics(exo_expt))
exo_norm <- sm(normalize_expt(exo_expt, transform="log2", norm="quant", filter=TRUE))
exon_plots <- sm(graph_metrics(exo_norm))

3.2 Now show some plots!

exo_plots$libsize

exo_plots$density

tn <- normalize_expt(exo_expt, transform="log2")
## This function will replace the expt$expressionset slot with:
## log2(data)
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Filter is false, this should likely be set to something, good
##  choices include cbcb, kofa, pofa (anything but FALSE).  If you want this to
##  stay FALSE, keep in mind that if other normalizations are performed, then the
##  resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data unconverted.  It is often advisable to cpm/rpkm
##  the data to normalize for sampling differences, keep in mind though that rpkm
##  has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
##  will try to detect this).
## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 1020 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
tnp <- plot_density(tn)

tmp_ggstats <- ggstatsplot::ggbetweenstats(
                                data=tnp$table, x=sample, y=counts,
                                notch=TRUE, mean.ci=TRUE, k=3,
                                pairwise.comparisons=FALSE)
## Registered S3 methods overwritten by 'broom.mixed':
##   method         from 
##   augment.lme    broom
##   augment.merMod broom
##   glance.lme     broom
##   glance.merMod  broom
##   glance.stanreg broom
##   tidy.brmsfit   broom
##   tidy.gamlss    broom
##   tidy.lme       broom
##   tidy.merMod    broom
##   tidy.rjags     broom
##   tidy.stanfit   broom
##   tidy.stanreg   broom
## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4
## Registered S3 method overwritten by 'DescTools':
##   method         from 
##   reorder.factor gdata
## Warning:  Number of labels is greater than default palette color count.
##  Try using another color `palette` (and/or `package`).
## 
tmp_ggstats

tmp_ggstats <- ggstatsplot::grouped_ggbetweenstats(
                                grouping.var=condition,
                                data=tnp$table, x=sample, y=counts,
                                notch=TRUE, mean.ci=TRUE, k=3,
                                pairwise.comparisons=FALSE)
tmp_ggstats

## Quick PCA
exon_pc_expt <- normalize_expt(exo_expt, transform="log2", filter=TRUE, convert="cpm",
                               norm="quant", batch="svaseq")
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(quant(cbcb(data)))))
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Warning in normalize_expt(exo_expt, transform = "log2", filter = TRUE, convert =
## "cpm", : Quantile normalization and sva do not always play well together.
## Step 1: performing count filter with option: cbcb
## Removing 20 low-count genes (3988 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 11 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self:  If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 75205 entries are x>1: 99%.
## batch_counts: Before batch/surrogate estimation, 11 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 556 entries are 0<x<1: 1%.
## The be method chose 2 surrogate variable(s).
## Attempting svaseq estimation with 2 surrogates.
## There are 13 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
pp(file="images/exo_pc.png", image=plot_pca(exon_pc_expt)$plot)
## Writing the image to: images/exo_pc.png and calling dev.off().

pander::pander(sessionInfo())

R version 3.6.1 (2019-07-05)

Platform: x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_US.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_US.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_US.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.UTF-8 and LC_IDENTIFICATION=C

attached base packages: grid, parallel, stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: Rgraphviz(v.2.30.0), graph(v.1.64.0), SparseM(v.1.78), topGO(v.2.38.1), ruv(v.0.9.7.1), BiocParallel(v.1.20.1), variancePartition(v.1.16.1), hpgltools(v.1.0), Biobase(v.2.46.0) and BiocGenerics(v.0.32.0)

loaded via a namespace (and not attached): PMCMRplus(v.1.4.4), rcompanion(v.2.3.25), TMB(v.1.7.16), pander(v.0.6.3), graphlayouts(v.0.7.0), pbapply(v.1.4-2), lattice(v.0.20-41), haven(v.2.3.1), vctrs(v.0.3.1), expm(v.0.999-4), usethis(v.1.6.1), mgcv(v.1.8-31), gmp(v.0.6-0), blob(v.1.2.1), survival(v.3.2-3), RBGL(v.1.62.1), later(v.1.1.0.1), nloptr(v.1.2.2.1), DBI(v.1.1.0), R.utils(v.2.9.2), rappdirs(v.0.3.1), selectr(v.0.4-2), jpeg(v.0.1-8.1), zlibbioc(v.1.32.0), MatrixModels(v.0.4-1), htmlwidgets(v.1.5.1), mvtnorm(v.1.1-1), inline(v.0.3.15), broomExtra(v.4.0.2), pairwiseComparisons(v.1.0.0), pbkrtest(v.0.4-8.6), ez(v.4.4-0), DEoptimR(v.1.0-8), tidygraph(v.1.2.0), Rcpp(v.1.0.4.6), readr(v.1.3.1), KernSmooth(v.2.23-17), promises(v.1.1.1), kSamples(v.1.2-9), gdata(v.2.18.0), DelayedArray(v.0.12.3), limma(v.3.42.2), pkgload(v.1.1.0), statsExpressions(v.0.4.1), RcppParallel(v.5.0.1), Hmisc(v.4.4-0), geneLenDataBase(v.1.22.0), fs(v.1.4.1), fastmatch(v.1.1-0), fastGHQuad(v.1.0), digest(v.0.6.25), png(v.0.1-7), cowplot(v.1.0.0), DOSE(v.3.12.0), ggraph(v.2.0.3), pkgconfig(v.2.0.3), GO.db(v.3.10.0), iterators(v.1.0.12), minqa(v.1.2.4), dunn.test(v.1.3.5), clusterProfiler(v.3.14.3), SummarizedExperiment(v.1.16.1), rstan(v.2.19.3), modeltools(v.0.2-23), nortest(v.1.0-4), xfun(v.0.14), zoo(v.1.8-8), tidyselect(v.1.1.0), performance(v.0.4.7), reshape2(v.1.4.4), purrr(v.0.3.4), viridisLite(v.0.3.0), rtracklayer(v.1.46.0), pkgbuild(v.1.0.8), rlang(v.0.4.6), groupedstats(v.1.0.1), Rmpfr(v.0.8-1), glue(v.1.4.1), RColorBrewer(v.1.1-2), matrixStats(v.0.56.0), ggcorrplot(v.0.1.3), stringr(v.1.4.0), multcompView(v.0.1-8), europepmc(v.0.4), sva(v.3.34.0), ggsignif(v.0.6.0), bayestestR(v.0.6.0), DESeq2(v.1.26.0), LaplacesDemon(v.16.1.4), labeling(v.0.3), httpuv(v.1.5.4), preprocessCore(v.1.48.0), TH.data(v.1.0-10), corpcor(v.1.6.9), DO.db(v.2.9), annotate(v.1.64.0), jsonlite(v.1.6.1), XVector(v.0.26.0), bit(v.1.1-15.2), mime(v.0.9), gridExtra(v.2.3), gplots(v.3.0.3), Rsamtools(v.2.2.3), stringi(v.1.4.6), insight(v.0.8.5), BWStest(v.0.2.2), processx(v.3.4.2), logspline(v.2.1.16), quadprog(v.1.5-8), bitops(v.1.0-6), cli(v.2.0.2), RSQLite(v.2.2.0), tidyr(v.1.1.0), libcoin(v.1.0-5), broom.mixed(v.0.2.6), data.table(v.1.12.8), correlation(v.0.2.1), rstudioapi(v.0.11), GenomicAlignments(v.1.22.1), nlme(v.3.1-148), qvalue(v.2.18.0), fastcluster(v.1.1.25), locfit(v.1.5-9.4), miniUI(v.0.1.1.1), gridGraphics(v.0.5-0), R.oo(v.1.23.0), dbplyr(v.1.4.4), sessioninfo(v.1.1.1), readxl(v.1.3.1), lifecycle(v.0.2.0), munsell(v.0.5.0), cellranger(v.1.1.0), R.methodsS3(v.1.8.0), caTools(v.1.18.0), codetools(v.0.2-16), coda(v.0.19-3), GenomeInfoDb(v.1.22.1), lmtest(v.0.9-37), htmlTable(v.1.13.3), triebeard(v.0.3.0), rstantools(v.2.0.0), tidyBF(v.0.2.0), xtable(v.1.8-4), BiocManager(v.1.30.10), directlabels(v.2020.1.31), StanHeaders(v.2.21.0-5), abind(v.1.4-5), farver(v.2.0.3), ggExtra(v.0.9), askpass(v.1.1), SuppDists(v.1.1-9.5), GenomicRanges(v.1.38.0), tibble(v.3.0.1), cluster(v.2.1.0), zeallot(v.0.1.0), Matrix(v.1.2-18), ellipsis(v.0.3.1), prettyunits(v.1.1.1), ggridges(v.0.5.2), goseq(v.1.38.0), prismatic(v.0.2.0), colorRamps(v.2.3), EMT(v.1.1), igraph(v.1.2.5), fgsea(v.1.12.0), remotes(v.2.1.1), paletteer(v.1.2.0), Vennerable(v.3.1.0.9000), parameters(v.0.8.0), testthat(v.2.3.2), mc2d(v.0.1-18), htmltools(v.0.4.0), BiocFileCache(v.1.10.2), yaml(v.2.2.1), ipmisc(v.3.0.0), GenomicFeatures(v.1.38.2), loo(v.2.2.0), XML(v.3.99-0.3), foreign(v.0.8-76), withr(v.2.2.0), BayesFactor(v.0.9.12-4.2), bit64(v.0.9-7), BiasedUrn(v.1.07), effectsize(v.0.3.1), multcomp(v.1.4-13), foreach(v.1.5.0), robustbase(v.0.93-6), Biostrings(v.2.54.0), GOSemSim(v.2.12.1), devtools(v.2.3.0), memoise(v.1.1.0), evaluate(v.0.14), forcats(v.0.5.0), rio(v.0.5.16), geneplotter(v.1.64.0), callr(v.3.4.3), ps(v.1.3.3), curl(v.4.3), metafor(v.2.4-0), fansi(v.0.4.1), highr(v.0.8), urltools(v.1.7.3), acepack(v.1.4.1), edgeR(v.3.28.1), checkmate(v.2.0.0), desc(v.1.2.0), ggplot2(v.3.3.1), openxlsx(v.4.1.5), ggrepel(v.0.8.2), rprojroot(v.1.3-2), tools(v.3.6.1), sandwich(v.2.5-1), magrittr(v.1.5), RCurl(v.1.98-1.2), car(v.3.0-8), ggplotify(v.0.0.5), xml2(v.1.3.2), httr(v.1.4.1), assertthat(v.0.2.1), rmarkdown(v.2.2), boot(v.1.3-25), R6(v.2.4.1), nnet(v.7.3-14), progress(v.1.2.2), genefilter(v.1.68.0), Brobdingnag(v.1.2-6), gtools(v.3.8.2), statmod(v.1.4.34), coin(v.1.3-1), repr(v.1.1.0), rematch2(v.2.1.2), splines(v.3.6.1), carData(v.3.0-4), colorspace(v.1.4-1), generics(v.0.0.2), stats4(v.3.6.1), base64enc(v.0.1-3), metaBMA(v.0.6.3), pillar(v.1.4.4), tweenr(v.1.0.1), WRS2(v.1.0-0), rvcheck(v.0.1.8), GenomeInfoDbData(v.1.2.2), plyr(v.1.8.6), gtable(v.0.3.0), bdsmatrix(v.1.3-4), rvest(v.0.3.5), zip(v.2.0.4), knitr(v.1.28), latticeExtra(v.0.6-29), biomaRt(v.2.42.1), IRanges(v.2.20.2), fastmap(v.1.0.1), metaplus(v.0.7-11), doParallel(v.1.0.15), AnnotationDbi(v.1.48.0), broom(v.0.5.6), openssl(v.1.4.1), scales(v.1.1.1), backports(v.1.1.7), S4Vectors(v.0.24.4), lme4(v.1.1-23), enrichplot(v.1.6.1), hms(v.0.5.3), ggforce(v.0.3.1), Rtsne(v.0.15), dplyr(v.1.0.0), shiny(v.1.4.0.2), polyclip(v.1.10-0), numDeriv(v.2016.8-1.1), ggstatsplot(v.0.5.0), bbmle(v.1.0.23.1), DescTools(v.0.99.36), Formula(v.1.2-3), crayon(v.1.3.4), MASS(v.7.3-51.6), skimr(v.2.1.1), viridis(v.0.5.1), reshape(v.0.8.8), bridgesampling(v.1.0-0), rpart(v.4.1-15) and compiler(v.3.6.1)

message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 5f94c6aed834674e1567e1992ac0bdefab60ead1
## This is hpgltools commit: Tue Jun 9 09:50:10 2020 -0400: 5f94c6aed834674e1567e1992ac0bdefab60ead1
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to 01_mtb_analyses_20200320-v20200320.rda.xz
tmp <- sm(saveme(filename=this_save))
LS0tCnRpdGxlOiAiMjAyMDAzMjAgTXRiLiBhbmFseXNlcy4iCmF1dGhvcjogImF0YiBhYmVsZXdAZ21haWwuY29tIgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKIGh0bWxfZG9jdW1lbnQ6CiAgY29kZV9kb3dubG9hZDogdHJ1ZQogIGNvZGVfZm9sZGluZzogc2hvdwogIGZpZ19jYXB0aW9uOiB0cnVlCiAgZmlnX2hlaWdodDogNwogIGZpZ193aWR0aDogNwogIGhpZ2hsaWdodDogZGVmYXVsdAogIGtlZXBfbWQ6IGZhbHNlCiAgbW9kZTogc2VsZmNvbnRhaW5lZAogIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogIHNlbGZfY29udGFpbmVkOiB0cnVlCiAgdGhlbWU6IHJlYWRhYmxlCiAgdG9jOiB0cnVlCiAgdG9jX2Zsb2F0OgogICAgY29sbGFwc2VkOiBmYWxzZQogICAgc21vb3RoX3Njcm9sbDogZmFsc2UKLS0tCgo8c3R5bGU+CiAgYm9keSAubWFpbi1jb250YWluZXIgewogICAgbWF4LXdpZHRoOiAxNjAwcHg7CiAgfQo8L3N0eWxlPgoKYGBge3Igb3B0aW9ucywgaW5jbHVkZT1GQUxTRX0KbGlicmFyeShocGdsdG9vbHMpCnR0IDwtIGRldnRvb2xzOjpsb2FkX2FsbCgifi9ocGdsdG9vbHMiKQprbml0cjo6b3B0c19rbml0JHNldChwcm9ncmVzcz1UUlVFLCB2ZXJib3NlPVRSVUUsIHdpZHRoPTkwLCBlY2hvPVRSVUUpCmtuaXRyOjpvcHRzX2NodW5rJHNldChlcnJvcj1UUlVFLCBmaWcud2lkdGg9OCwgZmlnLmhlaWdodD04LCBkcGk9OTYpCm9sZF9vcHRpb25zIDwtIG9wdGlvbnMoZGlnaXRzPTQsIHN0cmluZ3NBc0ZhY3RvcnM9RkFMU0UsIGtuaXRyLmR1cGxpY2F0ZS5sYWJlbD0iYWxsb3ciKQpnZ3Bsb3QyOjp0aGVtZV9zZXQoZ2dwbG90Mjo6dGhlbWVfYncoYmFzZV9zaXplPTEwKSkKdmVyIDwtICIyMDIwMDMyMCIKcHJldmlvdXNfZmlsZSA8LSAiaW5kZXguUm1kIgoKdG1wIDwtIHRyeShzbShsb2FkbWUoZmlsZW5hbWU9cGFzdGUwKGdzdWIocGF0dGVybj0iXFwuUm1kIiwgcmVwbGFjZT0iIiwgeD1wcmV2aW91c19maWxlKSwgIi12IiwgdmVyLCAiLnJkYS54eiIpKSkpCnJtZF9maWxlIDwtICIwMV9tdGJfYW5hbHlzZXNfMjAyMDAzMjAuUm1kIgpgYGAKCiMgQW5ub3RhdGlvbiB2ZXJzaW9uOiBgciB2ZXJgCgpUaGUgZm9sbG93aW5nIHNlY3Rpb24gbG9hZHMgdGhlIG1pY3JvYmVzb25saW5lIGFuZCBnZW5iYW5rIGFubm90YXRpb25zIGZvciBNeWNvYmFjdGVyaXVtIHR1YmVyY3Vsb3Npcy4KCmBgYHtyIGFubm90YXRpb259CiMjIExvb2tzIGxpa2UgaXQgaXMgdGF4b24gSUQgODMzMzIKbXRiX2Fubm90YXRpb25zIDwtIGFzLmRhdGEuZnJhbWUobG9hZF9taWNyb2Jlc29ubGluZV9hbm5vdGF0aW9ucyhzcGVjaWVzPSJNeWNvYmFjdGVyaXVtIHR1YmVyY3Vsb3NpcyBIMzdSdiIpKQprbml0cjo6a2FibGUoaGVhZChtdGJfYW5ub3RhdGlvbnMpKQoKbXRiX2dvIDwtIGxvYWRfbWljcm9iZXNvbmxpbmVfZ28oc3BlY2llcz0iTXljb2JhY3Rlcml1bSB0dWJlcmN1bG9zaXMgSDM3UnYiLCBpZF9jb2x1bW49InN5c05hbWUiKQpjb2xuYW1lcyhtdGJfZ28pIDwtIGMoIklEIiwgIkdPIikKCm10Yl9nZmYgPC0gbG9hZF9nZmZfYW5ub3RhdGlvbnMoZ2ZmPSJ+L3NjcmF0Y2gvbGlicmFyaWVzL2dlbm9tZS9tdHViZXJjdWxvc2lzX2gzN3J2LmdmZiIpCnJvd25hbWVzKG10Yl9nZmYpIDwtIG10Yl9nZmZbWyJsb2N1c190YWciXV0KCm10Yl9hbm5vdCA8LSBtZXJnZShtdGJfZ2ZmLCBtdGJfYW5ub3RhdGlvbnMsIGJ5Lng9InJvdy5uYW1lcyIsIGJ5Lnk9InN5c05hbWUiLCBhbGwueD1UUlVFKQpyb3duYW1lcyhtdGJfYW5ub3QpIDwtIG10Yl9hbm5vdFtbIlJvdy5uYW1lcyJdXQptdGJfYW5ub3RbWyJSb3cubmFtZXMiXV0gPC0gTlVMTApgYGAKCiMgTG9jYWwgU2FtcGxlcyBFc3RpbWF0aW9uCgpUaGlzIGlzIHRoZSBncm91cCBvZiBzYW1wbGVzIHdoaWNoIHdlcmUgY29sbGVjdGVkIGJ5IHRoZSBCcmlrZW4gbGFiIGFuZApwcmV2aW91c2x5IGFuYWx5emVkIGJ5IG1lbWJlcnMgb2YgdGhlIEVsLVNheWVkIGxhYi4KCmBgYHtyIGxvY2FsX2V4cHR9CmxvY2FsX2V4cHQgPC0gc20oY3JlYXRlX2V4cHQobWV0YWRhdGE9InNhbXBsZV9zaGVldHMvbG9jYWxfc2FtcGxlcy54bHN4IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmaWxlX2NvbHVtbj0ibXRiZmlsZSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZ2VuZV9pbmZvPW10Yl9hbm5vdCkpCmBgYAoKIyMgSnVzdCB0YWtlIHRoZSBzYW1wbGVzIG9mIGltbWVkaWF0ZSBpbnRlcmVzdAoKTmFqaWIgYW5kIFZvbGtlciB3b3VsZCBsaWtlIHRvIGZvY3VzIGZvciB0aGUgbW9tZW50IG9uIG9ubHkgaHBnbCBJRHM6IDEzMC0xMzIsIDMzMC0zMzIuCgpgYGB7ciBhX2ZldywgZmlnLnNob3c9ImhpZGUifQpmZXdfZXhwdCA8LSBzdWJzZXRfZXhwdChsb2NhbF9leHB0LCBzdWJzZXQ9ImNvbmRpdGlvbj09J2luX3ZpdHJvJ3xjb25kaXRpb249PSd0aHAxJyIpCgpmZXdfZmlsdCA8LSBub3JtYWxpemVfZXhwdChmZXdfZXhwdCwgZmlsdGVyPVRSVUUpCmZld193cml0ZSA8LSB3cml0ZV9leHB0KGZld19leHB0LCBleGNlbD0iZXhjZWwvZmV3X3dyaXR0ZW4ueGxzeCIpCmZld19kZSA8LSBhbGxfcGFpcndpc2UoZmV3X2ZpbHQpCmZld190YWJsZSA8LSBjb21iaW5lX2RlX3RhYmxlcyhmZXdfZGUsIGV4Y2VsPSJleGNlbC9mZXdfc2FtcGxlc190YWJsZS54bHN4IikKZmV3X3NpZyA8LSBleHRyYWN0X3NpZ25pZmljYW50X2dlbmVzKGZld190YWJsZSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGV4Y2VsPSJleGNlbC9mZXdfc2FtcGxlc19zaWcueGxzeCIpCgptdGJfbGVuZ3RocyA8LSBtdGJfYW5ub3RbLCBjKCJzZXFuYW1lcyIsICJ3aWR0aCIpXQptdGJfbGVuZ3Roc1tbInNlcW5hbWVzIl1dIDwtIHJvd25hbWVzKG10Yl9sZW5ndGhzKQpjb2xuYW1lcyhtdGJfbGVuZ3RocykgPC0gYygiSUQiLCAibGVuZ3RoIikKCnVwX2dlbmVzIDwtIGZld19zaWdbWyJkZXNlcSJdXVtbInVwcyJdXVtbMV1dCnVwX2dvIDwtIHNpbXBsZV9nb3NlcShzaWdfZ2VuZXM9dXBfZ2VuZXMsIGdvX2RiPW10Yl9nbywgbGVuZ3RoX2RiPW10Yl9sZW5ndGhzLAogICAgICAgICAgICAgICAgICAgICAgZXhjZWw9ImV4Y2VsL3VwX2dvc2VxLnhsc3giKQoKZG93bl9nZW5lcyA8LSBmZXdfc2lnW1siZGVzZXEiXV1bWyJkb3ducyJdXVtbMV1dCmRvd24gPC0gcm93bmFtZXMoZG93bl9nZW5lcykKZG93bl9nbyA8LSBzaW1wbGVfZ29zZXEoc2lnX2dlbmVzPWRvd24sIGdvX2RiPW10Yl9nbywgbGVuZ3RoX2RiPW10Yl9sZW5ndGhzKQpgYGAKCmBgYHtyIGdvX3Bsb3RzfQpmZXdfd3JpdGVbWyJub3JtX3BjYSJdXQpmZXdfdGFibGVbWyJwbG90cyJdXVtbMV1dW1siZGVzZXFfbWFfcGxvdHMiXV1bWyJwbG90Il1dCgp1cF9nbyRwdmFsdWVfcGxvdHNbWzFdXQpkb3duX2dvJHB2YWx1ZV9wbG90c1tbMV1dCmBgYAoKIyBFeG9nZW5vdXMgU2FtcGxlcyBFc3RpbWF0aW9uCgpJbiB0aGlzIGNvbnRleHQsIGV4b2dlbm91cyBqdXN0IG1lYW5zIHNhbXBsZXMgd2hpY2ggd2VyZSBub3QgY3JlYXRlZCBoZXJlLgpFLmcuIHNhbXBsZXMgSSBkb3dubG9hZGVkIGZyb20gU1JBLgoKYGBge3IgY3JlYXRlX2V4cHR9CmV4b19hbm5vdCA8LSBtdGJfYW5ub3QKcm93bmFtZXMoZXhvX2Fubm90KSA8LSBleG9fYW5ub3RbWyJkYl94cmVmIl1dCmV4b19leHB0IDwtIGNyZWF0ZV9leHB0KG1ldGFkYXRhPSJzYW1wbGVfc2hlZXRzL2V4b19zYW1wbGVzLnhsc3giLAogICAgICAgICAgICAgICAgICAgICAgICAgICBmaWxlX2NvbHVtbj0ibXRiZmlsZSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgIGdlbmVfaW5mbz1leG9fYW5ub3QpCmBgYAoKIyMgQ3JlYXRlIHNvbWUgcGxvdHMgb2YgdGhlIG5ldyBkYXRhCgpUaGUgZm9sbG93aW5nIGJsb2NrcyB3aWxsIHBsb3QgYW5kIHByaW50IGEgZmV3IGNvbW1vbiBtZXRyaWNzIG9mIHRoZSBuZXcgZGF0YS4KCmBgYHtyIG5ld19kYXRhLCBmaWcuc2hvdz0iaGlkZSJ9CmV4b19wbG90cyA8LSBzbShncmFwaF9tZXRyaWNzKGV4b19leHB0KSkKZXhvX25vcm0gPC0gc20obm9ybWFsaXplX2V4cHQoZXhvX2V4cHQsIHRyYW5zZm9ybT0ibG9nMiIsIG5vcm09InF1YW50IiwgZmlsdGVyPVRSVUUpKQpleG9uX3Bsb3RzIDwtIHNtKGdyYXBoX21ldHJpY3MoZXhvX25vcm0pKQpgYGAKCiMjIE5vdyBzaG93IHNvbWUgcGxvdHMhCgpgYGB7ciBzaG93X2RhdGF9CmV4b19wbG90cyRsaWJzaXplCmV4b19wbG90cyRkZW5zaXR5Cgp0biA8LSBub3JtYWxpemVfZXhwdChleG9fZXhwdCwgdHJhbnNmb3JtPSJsb2cyIikKdG5wIDwtIHBsb3RfZGVuc2l0eSh0bikKCnRtcF9nZ3N0YXRzIDwtIGdnc3RhdHNwbG90OjpnZ2JldHdlZW5zdGF0cygKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhPXRucCR0YWJsZSwgeD1zYW1wbGUsIHk9Y291bnRzLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5vdGNoPVRSVUUsIG1lYW4uY2k9VFJVRSwgaz0zLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBhaXJ3aXNlLmNvbXBhcmlzb25zPUZBTFNFKQp0bXBfZ2dzdGF0cwoKdG1wX2dnc3RhdHMgPC0gZ2dzdGF0c3Bsb3Q6Omdyb3VwZWRfZ2diZXR3ZWVuc3RhdHMoCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZ3JvdXBpbmcudmFyPWNvbmRpdGlvbiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhPXRucCR0YWJsZSwgeD1zYW1wbGUsIHk9Y291bnRzLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5vdGNoPVRSVUUsIG1lYW4uY2k9VFJVRSwgaz0zLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBhaXJ3aXNlLmNvbXBhcmlzb25zPUZBTFNFKQp0bXBfZ2dzdGF0cwoKIyMgUXVpY2sgUENBCmV4b25fcGNfZXhwdCA8LSBub3JtYWxpemVfZXhwdChleG9fZXhwdCwgdHJhbnNmb3JtPSJsb2cyIiwgZmlsdGVyPVRSVUUsIGNvbnZlcnQ9ImNwbSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBub3JtPSJxdWFudCIsIGJhdGNoPSJzdmFzZXEiKQpwcChmaWxlPSJpbWFnZXMvZXhvX3BjLnBuZyIsIGltYWdlPXBsb3RfcGNhKGV4b25fcGNfZXhwdCkkcGxvdCkKYGBgCgpgYGB7ciBzYXZlbWV9CnBhbmRlcjo6cGFuZGVyKHNlc3Npb25JbmZvKCkpCm1lc3NhZ2UocGFzdGUwKCJUaGlzIGlzIGhwZ2x0b29scyBjb21taXQ6ICIsIGdldF9naXRfY29tbWl0KCkpKQp0aGlzX3NhdmUgPC0gcGFzdGUwKGdzdWIocGF0dGVybj0iXFwuUm1kIiwgcmVwbGFjZT0iIiwgeD1ybWRfZmlsZSksICItdiIsIHZlciwgIi5yZGEueHoiKQptZXNzYWdlKHBhc3RlMCgiU2F2aW5nIHRvICIsIHRoaXNfc2F2ZSkpCnRtcCA8LSBzbShzYXZlbWUoZmlsZW5hbWU9dGhpc19zYXZlKSkKYGBgCg==