1 Annotation version: 20200320

The following section loads the microbesonline and genbank annotations for Mycobacterium tuberculosis.

## Looks like it is taxon ID 83332
mtb_annotations <- as.data.frame(load_microbesonline_annotations(species="Mycobacterium tuberculosis H37Rv"))

## Found 1 entry.

##                                Genome         Phylum Paper     Loaded Complete
## 2178 Mycobacterium tuberculosis H37Rv Actinobacteria   yes 2007-05-08      yes
##      #Chr. #Plasmids #Genes tax_id
## 2178     1         0   4047  83332

## The species being downloaded is: Mycobacterium tuberculosis H37Rv

## Downloading: http://www.microbesonline.org/cgi-bin/genomeInfo.cgi?tId=83332;export=tab

knitr::kable(head(mtb_annotations))

locusId	accession	GI	scaffoldId	start	stop	strand	sysName	name	desc	COG	COGFun	COGDesc	TIGRFam	TIGRRoles	GO	EC	ECDesc
31772	NP_214515.1	15607143	7022	1	1524	+	Rv0001	dnaA	chromosomal replication initiation protein (NCBI)	COG593	L	ATPase involved in DNA replication initiation	TIGR00362 chromosomal replication initiator protein DnaA [dnaA]	DNA metabolism:DNA replication, recombination, and repair	GO:0006270,GO:0006275,GO:0003688,GO:0017111,GO:0005524	NA	NA
31773	NP_214516.1	15607144	7022	2052	3260	+	Rv0002	dnaN	DNA polymerase III subunit beta (NCBI)	COG592	L	DNA polymerase sliding clamp subunit (PCNA homolog)	TIGR00663 DNA polymerase III, beta subunit [dnaN]	DNA metabolism:DNA replication, recombination, and repair	GO:0006260,GO:0003677,GO:0003893,GO:0008408,GO:0016449,GO:0019984,GO:0003889,GO:0003894,GO:0015999,GO:0016450,GO:0003890,GO:0003895,GO:0016000,GO:0016451,GO:0003891,GO:0016448,GO:0016452	2.7.7.7	DNA-directed DNA polymerase.
31774	NP_214517.1	15607145	7022	3280	4437	+	Rv0003	recF	recombination protein F (NCBI)	COG1195	L	Recombinational DNA repair ATPase (RecF pathway)	TIGR00611 DNA replication and repair protein RecF [recF]	DNA metabolism:DNA replication, recombination, and repair	GO:0006281,GO:0005694,GO:0003697,GO:0005524	NA	NA
31775	NP_214518.1	15607146	7022	4434	4997	+	Rv0004	Rv0004	hypothetical protein (NCBI)	COG5512	R	Zn-ribbon-containing, possibly RNA-binding protein and truncated derivatives	NA	NA	NA	NA	NA
31776	NP_214519.1	15607147	7022	5123	7267	+	Rv0005	gyrB	DNA topoisomerase IV subunit B (NCBI)	COG187	L	Type IIA topoisomerase (DNA gyrase/topo II, topoisomerase IV), B subunit	TIGR01059 DNA gyrase, B subunit [gyrB]	DNA metabolism:DNA replication, recombination, and repair	GO:0006304,GO:0006265,GO:0005694,GO:0003918,GO:0005524	5.99.1.3	DNA topoisomerase (ATP-hydrolyzing).
31777	NP_214520.1	15607148	7022	7302	9818	+	Rv0006	gyrA	DNA gyrase subunit A (NCBI)	COG188	L	Type IIA topoisomerase (DNA gyrase/topo II, topoisomerase IV), A subunit	TIGR01063 DNA gyrase, A subunit [gyrA]	DNA metabolism:DNA replication, recombination, and repair	GO:0006265,GO:0006268,GO:0005694,GO:0003918,GO:0005509,GO:0005524	5.99.1.3	DNA topoisomerase (ATP-hydrolyzing).

mtb_go <- load_microbesonline_go(species="Mycobacterium tuberculosis H37Rv", id_column="sysName")

## Found 1 entry.

##                                Genome         Phylum Paper     Loaded Complete
## 2178 Mycobacterium tuberculosis H37Rv Actinobacteria   yes 2007-05-08      yes
##      #Chr. #Plasmids #Genes tax_id
## 2178     1         0   4047  83332

## The species being downloaded is: Mycobacterium tuberculosis H37Rv and is being downloaded as 83332.tab.

colnames(mtb_go) <- c("ID", "GO")

mtb_gff <- load_gff_annotations(gff="~/scratch/libraries/genome/mtuberculosis_h37rv.gff")

## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)

## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=FALSE)

## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=FALSE)

## Returning a df with 15 columns and 4008 rows.

rownames(mtb_gff) <- mtb_gff[["locus_tag"]]

mtb_annot <- merge(mtb_gff, mtb_annotations, by.x="row.names", by.y="sysName", all.x=TRUE)
rownames(mtb_annot) <- mtb_annot[["Row.names"]]
mtb_annot[["Row.names"]] <- NULL

2 Local Samples Estimation

This is the group of samples which were collected by the Briken lab and previously analyzed by members of the El-Sayed lab.

local_expt <- sm(create_expt(metadata="sample_sheets/local_samples.xlsx",
                             file_column="mtbfile",
                             gene_info=mtb_annot))

2.1 Just take the samples of immediate interest

Najib and Volker would like to focus for the moment on only hpgl IDs: 130-132, 330-332.

few_expt <- subset_expt(local_expt, subset="condition=='in_vitro'|condition=='thp1'")

## Using a subset expression.

## There were 44, now there are 6 samples.

few_filt <- normalize_expt(few_expt, filter=TRUE)

## This function will replace the expt$expressionset slot with:

## cbcb(data)

## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Leaving the data in its current base format, keep in mind that
##  some metrics are easier to see when the data is log2 transformed, but
##  EdgeR/DESeq do not accept transformed data.

## Leaving the data unconverted.  It is often advisable to cpm/rpkm
##  the data to normalize for sampling differences, keep in mind though that rpkm
##  has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
##  will try to detect this).

## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.

## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.

## Step 1: performing count filter with option: cbcb

## Removing 0 low-count genes (4008 remaining).

## Step 2: not normalizing the data.

## Step 3: not converting the data.

## Step 4: not transforming the data.

## Step 5: not doing batch correction.

few_write <- write_expt(few_expt, excel="excel/few_written.xlsx")

## Writing the first sheet, containing a legend and some summary data.

## Writing the raw reads.

## Graphing the raw reads.

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete

## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete

## varpart sees only 1 batch, adjusting the model accordingly.

## Attempting mixed linear model with: ~  (1|condition)

## Fitting the expressionset to the model, this is slow.

## Dividing work into 100 chunks...

## 
## Total:18 s

## Placing factor: condition at the beginning of the model.

## Writing the normalized reads.

## Error in density.default(x, adjust = adj) : 'x' contains missing values
## Error in density.default(x, adjust = adj) : 'x' contains missing values

## Graphing the normalized reads.

## `geom_smooth()` using formula 'y ~ x'

## `geom_smooth()` using formula 'y ~ x'

## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete

## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete

## varpart sees only 1 batch, adjusting the model accordingly.

## Attempting mixed linear model with: ~  (1|condition)

## Fitting the expressionset to the model, this is slow.

## Dividing work into 100 chunks...

## 
## Total:24 s

## Placing factor: condition at the beginning of the model.

## Writing the median reads by factor.

few_de <- all_pairwise(few_filt)

## Plotting a PCA before surrogates/batch inclusion.

## Not putting labels on the plot.

## Using limma's removeBatchEffect to visualize with(out) batch inclusion.

## Error in density.default(x, adjust = adj) : 'x' contains missing values
## Error in density.default(x, adjust = adj) : 'x' contains missing values

## Not putting labels on the plot.

## Finished running DE analyses, collecting outputs.

## Comparing analyses.

few_table <- combine_de_tables(few_de, excel="excel/few_samples_table.xlsx")

## Deleting the file excel/few_samples_table.xlsx before writing the tables.

## Writing a legend of columns.

## Printing a pca plot before/after surrogates/batch estimation.

## Working on table 1/1: thp1_vs_in_vitro

## Adding venn plots for thp1_vs_in_vitro.

## Limma expression coefficients for thp1_vs_in_vitro; R^2: 0.612; equation: y = 0.671x + 1.72

## Deseq expression coefficients for thp1_vs_in_vitro; R^2: 0.641; equation: y = 0.831x + 0.579

## Edger expression coefficients for thp1_vs_in_vitro; R^2: 0.581; equation: y = 1.15x - 1.67

## Writing summary information, compare_plot is: TRUE.

## Performing save of excel/few_samples_table.xlsx.

few_sig <- extract_significant_genes(few_table,
                                     excel="excel/few_samples_sig.xlsx")

## Writing a legend of columns.

## Printing significant genes to the file: excel/few_samples_sig.xlsx

## 1/1: Creating significant table up_limma_thp1_vs_in_vitro

## Printing significant genes to the file: excel/few_samples_sig.xlsx

## 1/1: Creating significant table up_edger_thp1_vs_in_vitro

## Printing significant genes to the file: excel/few_samples_sig.xlsx

## 1/1: Creating significant table up_deseq_thp1_vs_in_vitro

## Printing significant genes to the file: excel/few_samples_sig.xlsx

## 1/1: Creating significant table up_ebseq_thp1_vs_in_vitro

## Printing significant genes to the file: excel/few_samples_sig.xlsx

## 1/1: Creating significant table up_basic_thp1_vs_in_vitro

## Adding significance bar plots.

mtb_lengths <- mtb_annot[, c("seqnames", "width")]
mtb_lengths[["seqnames"]] <- rownames(mtb_lengths)
colnames(mtb_lengths) <- c("ID", "length")

up_genes <- few_sig[["deseq"]][["ups"]][[1]]
up_go <- simple_goseq(sig_genes=up_genes, go_db=mtb_go, length_db=mtb_lengths,
                      excel="excel/up_goseq.xlsx")

## Using the row names of your table.

## Found 210 genes out of 390 from the sig_genes in the go_db.

## Found 390 genes out of 390 from the sig_genes in the length_db.

## Using manually entered categories.

## Calculating the p-values...

## 'select()' returned 1:1 mapping between keys and columns

## simple_goseq(): Calculating q-values

## simple_goseq(): Filling godata with terms, this is slow.

## Testing that go categories are defined.

## Removing undefined categories.

## Gathering synonyms.

## Gathering category definitions.

## simple_goseq(): Making pvalue plots for the ontologies.

## Writing data to: excel/up_goseq.xlsx.

down_genes <- few_sig[["deseq"]][["downs"]][[1]]
down <- rownames(down_genes)
down_go <- simple_goseq(sig_genes=down, go_db=mtb_go, length_db=mtb_lengths)

## Found 319 genes out of 457 from the sig_genes in the go_db.

## Found 457 genes out of 457 from the sig_genes in the length_db.

## Using manually entered categories.

## Calculating the p-values...

## 'select()' returned 1:1 mapping between keys and columns

## simple_goseq(): Calculating q-values

## simple_goseq(): Filling godata with terms, this is slow.

## Testing that go categories are defined.

## Removing undefined categories.

## Gathering synonyms.

## Gathering category definitions.

## simple_goseq(): Making pvalue plots for the ontologies.

few_write[["norm_pca"]]

few_table[["plots"]][[1]][["deseq_ma_plots"]][["plot"]]

up_go$pvalue_plots[[1]]

down_go$pvalue_plots[[1]]

3 Exogenous Samples Estimation

In this context, exogenous just means samples which were not created here. E.g. samples I downloaded from SRA.

exo_annot <- mtb_annot
rownames(exo_annot) <- exo_annot[["db_xref"]]
exo_expt <- create_expt(metadata="sample_sheets/exo_samples.xlsx",
                           file_column="mtbfile",
                           gene_info=exo_annot)

## Reading the sample metadata.

## Did not find the batch column in the sample sheet.

## Filling it in as undefined.

## The sample definitions comprises: 19 rows(samples) and 12 columns(metadata fields).

## Reading count tables.

## Reading count files with read.table().

## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/SRR9214125/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows.

## preprocessing/SRR9214126/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214127/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214128/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214129/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214130/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214131/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214132/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214133/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214134/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214135/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214136/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214137/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214138/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214139/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214140/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214141/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214142/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## preprocessing/SRR9214143/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.

## Finished reading count data.

## Matched 4008 annotations and counts.

## Bringing together the count matrix and gene information.

## Some annotations were lost in merging, setting them to 'undefined'.

## Saving the expressionset to 'expt.rda'.

## The final expressionset has 4008 rows and 19 columns.

3.1 Create some plots of the new data

The following blocks will plot and print a few common metrics of the new data.

exo_plots <- sm(graph_metrics(exo_expt))

exo_norm <- sm(normalize_expt(exo_expt, transform="log2", norm="quant", filter=TRUE))
exon_plots <- sm(graph_metrics(exo_norm))

3.2 Now show some plots!

exo_plots$libsize

exo_plots$density

tn <- normalize_expt(exo_expt, transform="log2")

## This function will replace the expt$expressionset slot with:

## log2(data)

## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Filter is false, this should likely be set to something, good
##  choices include cbcb, kofa, pofa (anything but FALSE).  If you want this to
##  stay FALSE, keep in mind that if other normalizations are performed, then the
##  resulting libsizes are likely to be strange (potentially negative!)

## Leaving the data unconverted.  It is often advisable to cpm/rpkm
##  the data to normalize for sampling differences, keep in mind though that rpkm
##  has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
##  will try to detect this).

## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.

## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.

## Step 1: not doing count filtering.

## Step 2: not normalizing the data.

## Step 3: not converting the data.

## Step 4: transforming the data with log2.

## transform_counts: Found 1020 values equal to 0, adding 1 to the matrix.

## Step 5: not doing batch correction.

tnp <- plot_density(tn)

tmp_ggstats <- ggstatsplot::ggbetweenstats(
                                data=tnp$table, x=sample, y=counts,
                                notch=TRUE, mean.ci=TRUE, k=3,
                                pairwise.comparisons=FALSE)

## Registered S3 methods overwritten by 'broom.mixed':
##   method         from 
##   augment.lme    broom
##   augment.merMod broom
##   glance.lme     broom
##   glance.merMod  broom
##   glance.stanreg broom
##   tidy.brmsfit   broom
##   tidy.gamlss    broom
##   tidy.lme       broom
##   tidy.merMod    broom
##   tidy.rjags     broom
##   tidy.stanfit   broom
##   tidy.stanreg   broom

## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4

## Registered S3 method overwritten by 'DescTools':
##   method         from 
##   reorder.factor gdata

## Warning:  Number of labels is greater than default palette color count.
##  Try using another color `palette` (and/or `package`).

##

tmp_ggstats

tmp_ggstats <- ggstatsplot::grouped_ggbetweenstats(
                                grouping.var=condition,
                                data=tnp$table, x=sample, y=counts,
                                notch=TRUE, mean.ci=TRUE, k=3,
                                pairwise.comparisons=FALSE)
tmp_ggstats

## Quick PCA
exon_pc_expt <- normalize_expt(exo_expt, transform="log2", filter=TRUE, convert="cpm",
                               norm="quant", batch="svaseq")

## This function will replace the expt$expressionset slot with:

## log2(svaseq(cpm(quant(cbcb(data)))))

## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Warning in normalize_expt(exo_expt, transform = "log2", filter = TRUE, convert =
## "cpm", : Quantile normalization and sva do not always play well together.

## Step 1: performing count filter with option: cbcb

## Removing 20 low-count genes (3988 remaining).

## Step 2: normalizing the data with quant.

## Step 3: converting the data with cpm.

## Step 4: transforming the data with log2.

## transform_counts: Found 11 values equal to 0, adding 1 to the matrix.

## Step 5: doing batch correction with svaseq.

## Note to self:  If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.

## Passing off to all_adjusters.

## batch_counts: Before batch/surrogate estimation, 75205 entries are x>1: 99%.

## batch_counts: Before batch/surrogate estimation, 11 entries are x==0: 0%.

## batch_counts: Before batch/surrogate estimation, 556 entries are 0<x<1: 1%.

## The be method chose 2 surrogate variable(s).

## Attempting svaseq estimation with 2 surrogates.

## There are 13 (0%) elements which are < 0 after batch correction.

## Setting low elements to zero.

pp(file="images/exo_pc.png", image=plot_pca(exon_pc_expt)$plot)

## Writing the image to: images/exo_pc.png and calling dev.off().

pander::pander(sessionInfo())

R version 3.6.1 (2019-07-05)

Platform: x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_US.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_US.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_US.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.UTF-8 and LC_IDENTIFICATION=C

attached base packages: grid, parallel, stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: Rgraphviz(v.2.30.0), graph(v.1.64.0), SparseM(v.1.78), topGO(v.2.38.1), ruv(v.0.9.7.1), BiocParallel(v.1.20.1), variancePartition(v.1.16.1), hpgltools(v.1.0), Biobase(v.2.46.0) and BiocGenerics(v.0.32.0)

loaded via a namespace (and not attached): PMCMRplus(v.1.4.4), rcompanion(v.2.3.25), TMB(v.1.7.16), pander(v.0.6.3), graphlayouts(v.0.7.0), pbapply(v.1.4-2), lattice(v.0.20-41), haven(v.2.3.1), vctrs(v.0.3.1), expm(v.0.999-4), usethis(v.1.6.1), mgcv(v.1.8-31), gmp(v.0.6-0), blob(v.1.2.1), survival(v.3.2-3), RBGL(v.1.62.1), later(v.1.1.0.1), nloptr(v.1.2.2.1), DBI(v.1.1.0), R.utils(v.2.9.2), rappdirs(v.0.3.1), selectr(v.0.4-2), jpeg(v.0.1-8.1), zlibbioc(v.1.32.0), MatrixModels(v.0.4-1), htmlwidgets(v.1.5.1), mvtnorm(v.1.1-1), inline(v.0.3.15), broomExtra(v.4.0.2), pairwiseComparisons(v.1.0.0), pbkrtest(v.0.4-8.6), ez(v.4.4-0), DEoptimR(v.1.0-8), tidygraph(v.1.2.0), Rcpp(v.1.0.4.6), readr(v.1.3.1), KernSmooth(v.2.23-17), promises(v.1.1.1), kSamples(v.1.2-9), gdata(v.2.18.0), DelayedArray(v.0.12.3), limma(v.3.42.2), pkgload(v.1.1.0), statsExpressions(v.0.4.1), RcppParallel(v.5.0.1), Hmisc(v.4.4-0), geneLenDataBase(v.1.22.0), fs(v.1.4.1), fastmatch(v.1.1-0), fastGHQuad(v.1.0), digest(v.0.6.25), png(v.0.1-7), cowplot(v.1.0.0), DOSE(v.3.12.0), ggraph(v.2.0.3), pkgconfig(v.2.0.3), GO.db(v.3.10.0), iterators(v.1.0.12), minqa(v.1.2.4), dunn.test(v.1.3.5), clusterProfiler(v.3.14.3), SummarizedExperiment(v.1.16.1), rstan(v.2.19.3), modeltools(v.0.2-23), nortest(v.1.0-4), xfun(v.0.14), zoo(v.1.8-8), tidyselect(v.1.1.0), performance(v.0.4.7), reshape2(v.1.4.4), purrr(v.0.3.4), viridisLite(v.0.3.0), rtracklayer(v.1.46.0), pkgbuild(v.1.0.8), rlang(v.0.4.6), groupedstats(v.1.0.1), Rmpfr(v.0.8-1), glue(v.1.4.1), RColorBrewer(v.1.1-2), matrixStats(v.0.56.0), ggcorrplot(v.0.1.3), stringr(v.1.4.0), multcompView(v.0.1-8), europepmc(v.0.4), sva(v.3.34.0), ggsignif(v.0.6.0), bayestestR(v.0.6.0), DESeq2(v.1.26.0), LaplacesDemon(v.16.1.4), labeling(v.0.3), httpuv(v.1.5.4), preprocessCore(v.1.48.0), TH.data(v.1.0-10), corpcor(v.1.6.9), DO.db(v.2.9), annotate(v.1.64.0), jsonlite(v.1.6.1), XVector(v.0.26.0), bit(v.1.1-15.2), mime(v.0.9), gridExtra(v.2.3), gplots(v.3.0.3), Rsamtools(v.2.2.3), stringi(v.1.4.6), insight(v.0.8.5), BWStest(v.0.2.2), processx(v.3.4.2), logspline(v.2.1.16), quadprog(v.1.5-8), bitops(v.1.0-6), cli(v.2.0.2), RSQLite(v.2.2.0), tidyr(v.1.1.0), libcoin(v.1.0-5), broom.mixed(v.0.2.6), data.table(v.1.12.8), correlation(v.0.2.1), rstudioapi(v.0.11), GenomicAlignments(v.1.22.1), nlme(v.3.1-148), qvalue(v.2.18.0), fastcluster(v.1.1.25), locfit(v.1.5-9.4), miniUI(v.0.1.1.1), gridGraphics(v.0.5-0), R.oo(v.1.23.0), dbplyr(v.1.4.4), sessioninfo(v.1.1.1), readxl(v.1.3.1), lifecycle(v.0.2.0), munsell(v.0.5.0), cellranger(v.1.1.0), R.methodsS3(v.1.8.0), caTools(v.1.18.0), codetools(v.0.2-16), coda(v.0.19-3), GenomeInfoDb(v.1.22.1), lmtest(v.0.9-37), htmlTable(v.1.13.3), triebeard(v.0.3.0), rstantools(v.2.0.0), tidyBF(v.0.2.0), xtable(v.1.8-4), BiocManager(v.1.30.10), directlabels(v.2020.1.31), StanHeaders(v.2.21.0-5), abind(v.1.4-5), farver(v.2.0.3), ggExtra(v.0.9), askpass(v.1.1), SuppDists(v.1.1-9.5), GenomicRanges(v.1.38.0), tibble(v.3.0.1), cluster(v.2.1.0), zeallot(v.0.1.0), Matrix(v.1.2-18), ellipsis(v.0.3.1), prettyunits(v.1.1.1), ggridges(v.0.5.2), goseq(v.1.38.0), prismatic(v.0.2.0), colorRamps(v.2.3), EMT(v.1.1), igraph(v.1.2.5), fgsea(v.1.12.0), remotes(v.2.1.1), paletteer(v.1.2.0), Vennerable(v.3.1.0.9000), parameters(v.0.8.0), testthat(v.2.3.2), mc2d(v.0.1-18), htmltools(v.0.4.0), BiocFileCache(v.1.10.2), yaml(v.2.2.1), ipmisc(v.3.0.0), GenomicFeatures(v.1.38.2), loo(v.2.2.0), XML(v.3.99-0.3), foreign(v.0.8-76), withr(v.2.2.0), BayesFactor(v.0.9.12-4.2), bit64(v.0.9-7), BiasedUrn(v.1.07), effectsize(v.0.3.1), multcomp(v.1.4-13), foreach(v.1.5.0), robustbase(v.0.93-6), Biostrings(v.2.54.0), GOSemSim(v.2.12.1), devtools(v.2.3.0), memoise(v.1.1.0), evaluate(v.0.14), forcats(v.0.5.0), rio(v.0.5.16), geneplotter(v.1.64.0), callr(v.3.4.3), ps(v.1.3.3), curl(v.4.3), metafor(v.2.4-0), fansi(v.0.4.1), highr(v.0.8), urltools(v.1.7.3), acepack(v.1.4.1), edgeR(v.3.28.1), checkmate(v.2.0.0), desc(v.1.2.0), ggplot2(v.3.3.1), openxlsx(v.4.1.5), ggrepel(v.0.8.2), rprojroot(v.1.3-2), tools(v.3.6.1), sandwich(v.2.5-1), magrittr(v.1.5), RCurl(v.1.98-1.2), car(v.3.0-8), ggplotify(v.0.0.5), xml2(v.1.3.2), httr(v.1.4.1), assertthat(v.0.2.1), rmarkdown(v.2.2), boot(v.1.3-25), R6(v.2.4.1), nnet(v.7.3-14), progress(v.1.2.2), genefilter(v.1.68.0), Brobdingnag(v.1.2-6), gtools(v.3.8.2), statmod(v.1.4.34), coin(v.1.3-1), repr(v.1.1.0), rematch2(v.2.1.2), splines(v.3.6.1), carData(v.3.0-4), colorspace(v.1.4-1), generics(v.0.0.2), stats4(v.3.6.1), base64enc(v.0.1-3), metaBMA(v.0.6.3), pillar(v.1.4.4), tweenr(v.1.0.1), WRS2(v.1.0-0), rvcheck(v.0.1.8), GenomeInfoDbData(v.1.2.2), plyr(v.1.8.6), gtable(v.0.3.0), bdsmatrix(v.1.3-4), rvest(v.0.3.5), zip(v.2.0.4), knitr(v.1.28), latticeExtra(v.0.6-29), biomaRt(v.2.42.1), IRanges(v.2.20.2), fastmap(v.1.0.1), metaplus(v.0.7-11), doParallel(v.1.0.15), AnnotationDbi(v.1.48.0), broom(v.0.5.6), openssl(v.1.4.1), scales(v.1.1.1), backports(v.1.1.7), S4Vectors(v.0.24.4), lme4(v.1.1-23), enrichplot(v.1.6.1), hms(v.0.5.3), ggforce(v.0.3.1), Rtsne(v.0.15), dplyr(v.1.0.0), shiny(v.1.4.0.2), polyclip(v.1.10-0), numDeriv(v.2016.8-1.1), ggstatsplot(v.0.5.0), bbmle(v.1.0.23.1), DescTools(v.0.99.36), Formula(v.1.2-3), crayon(v.1.3.4), MASS(v.7.3-51.6), skimr(v.2.1.1), viridis(v.0.5.1), reshape(v.0.8.8), bridgesampling(v.1.0-0), rpart(v.4.1-15) and compiler(v.3.6.1)

message(paste0("This is hpgltools commit: ", get_git_commit()))

## If you wish to reproduce this exact build of hpgltools, invoke the following:

## > git clone http://github.com/abelew/hpgltools.git

## > git reset 5f94c6aed834674e1567e1992ac0bdefab60ead1

## This is hpgltools commit: Tue Jun 9 09:50:10 2020 -0400: 5f94c6aed834674e1567e1992ac0bdefab60ead1

this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))

## Saving to 01_mtb_analyses_20200320-v20200320.rda.xz

tmp <- sm(saveme(filename=this_save))

LS0tCnRpdGxlOiAiMjAyMDAzMjAgTXRiLiBhbmFseXNlcy4iCmF1dGhvcjogImF0YiBhYmVsZXdAZ21haWwuY29tIgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKIGh0bWxfZG9jdW1lbnQ6CiAgY29kZV9kb3dubG9hZDogdHJ1ZQogIGNvZGVfZm9sZGluZzogc2hvdwogIGZpZ19jYXB0aW9uOiB0cnVlCiAgZmlnX2hlaWdodDogNwogIGZpZ193aWR0aDogNwogIGhpZ2hsaWdodDogZGVmYXVsdAogIGtlZXBfbWQ6IGZhbHNlCiAgbW9kZTogc2VsZmNvbnRhaW5lZAogIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogIHNlbGZfY29udGFpbmVkOiB0cnVlCiAgdGhlbWU6IHJlYWRhYmxlCiAgdG9jOiB0cnVlCiAgdG9jX2Zsb2F0OgogICAgY29sbGFwc2VkOiBmYWxzZQogICAgc21vb3RoX3Njcm9sbDogZmFsc2UKLS0tCgo8c3R5bGU+CiAgYm9keSAubWFpbi1jb250YWluZXIgewogICAgbWF4LXdpZHRoOiAxNjAwcHg7CiAgfQo8L3N0eWxlPgoKYGBge3Igb3B0aW9ucywgaW5jbHVkZT1GQUxTRX0KbGlicmFyeShocGdsdG9vbHMpCnR0IDwtIGRldnRvb2xzOjpsb2FkX2FsbCgifi9ocGdsdG9vbHMiKQprbml0cjo6b3B0c19rbml0JHNldChwcm9ncmVzcz1UUlVFLCB2ZXJib3NlPVRSVUUsIHdpZHRoPTkwLCBlY2hvPVRSVUUpCmtuaXRyOjpvcHRzX2NodW5rJHNldChlcnJvcj1UUlVFLCBmaWcud2lkdGg9OCwgZmlnLmhlaWdodD04LCBkcGk9OTYpCm9sZF9vcHRpb25zIDwtIG9wdGlvbnMoZGlnaXRzPTQsIHN0cmluZ3NBc0ZhY3RvcnM9RkFMU0UsIGtuaXRyLmR1cGxpY2F0ZS5sYWJlbD0iYWxsb3ciKQpnZ3Bsb3QyOjp0aGVtZV9zZXQoZ2dwbG90Mjo6dGhlbWVfYncoYmFzZV9zaXplPTEwKSkKdmVyIDwtICIyMDIwMDMyMCIKcHJldmlvdXNfZmlsZSA8LSAiaW5kZXguUm1kIgoKdG1wIDwtIHRyeShzbShsb2FkbWUoZmlsZW5hbWU9cGFzdGUwKGdzdWIocGF0dGVybj0iXFwuUm1kIiwgcmVwbGFjZT0iIiwgeD1wcmV2aW91c19maWxlKSwgIi12IiwgdmVyLCAiLnJkYS54eiIpKSkpCnJtZF9maWxlIDwtICIwMV9tdGJfYW5hbHlzZXNfMjAyMDAzMjAuUm1kIgpgYGAKCiMgQW5ub3RhdGlvbiB2ZXJzaW9uOiBgciB2ZXJgCgpUaGUgZm9sbG93aW5nIHNlY3Rpb24gbG9hZHMgdGhlIG1pY3JvYmVzb25saW5lIGFuZCBnZW5iYW5rIGFubm90YXRpb25zIGZvciBNeWNvYmFjdGVyaXVtIHR1YmVyY3Vsb3Npcy4KCmBgYHtyIGFubm90YXRpb259CiMjIExvb2tzIGxpa2UgaXQgaXMgdGF4b24gSUQgODMzMzIKbXRiX2Fubm90YXRpb25zIDwtIGFzLmRhdGEuZnJhbWUobG9hZF9taWNyb2Jlc29ubGluZV9hbm5vdGF0aW9ucyhzcGVjaWVzPSJNeWNvYmFjdGVyaXVtIHR1YmVyY3Vsb3NpcyBIMzdSdiIpKQprbml0cjo6a2FibGUoaGVhZChtdGJfYW5ub3RhdGlvbnMpKQoKbXRiX2dvIDwtIGxvYWRfbWljcm9iZXNvbmxpbmVfZ28oc3BlY2llcz0iTXljb2JhY3Rlcml1bSB0dWJlcmN1bG9zaXMgSDM3UnYiLCBpZF9jb2x1bW49InN5c05hbWUiKQpjb2xuYW1lcyhtdGJfZ28pIDwtIGMoIklEIiwgIkdPIikKCm10Yl9nZmYgPC0gbG9hZF9nZmZfYW5ub3RhdGlvbnMoZ2ZmPSJ+L3NjcmF0Y2gvbGlicmFyaWVzL2dlbm9tZS9tdHViZXJjdWxvc2lzX2gzN3J2LmdmZiIpCnJvd25hbWVzKG10Yl9nZmYpIDwtIG10Yl9nZmZbWyJsb2N1c190YWciXV0KCm10Yl9hbm5vdCA8LSBtZXJnZShtdGJfZ2ZmLCBtdGJfYW5ub3RhdGlvbnMsIGJ5Lng9InJvdy5uYW1lcyIsIGJ5Lnk9InN5c05hbWUiLCBhbGwueD1UUlVFKQpyb3duYW1lcyhtdGJfYW5ub3QpIDwtIG10Yl9hbm5vdFtbIlJvdy5uYW1lcyJdXQptdGJfYW5ub3RbWyJSb3cubmFtZXMiXV0gPC0gTlVMTApgYGAKCiMgTG9jYWwgU2FtcGxlcyBFc3RpbWF0aW9uCgpUaGlzIGlzIHRoZSBncm91cCBvZiBzYW1wbGVzIHdoaWNoIHdlcmUgY29sbGVjdGVkIGJ5IHRoZSBCcmlrZW4gbGFiIGFuZApwcmV2aW91c2x5IGFuYWx5emVkIGJ5IG1lbWJlcnMgb2YgdGhlIEVsLVNheWVkIGxhYi4KCmBgYHtyIGxvY2FsX2V4cHR9CmxvY2FsX2V4cHQgPC0gc20oY3JlYXRlX2V4cHQobWV0YWRhdGE9InNhbXBsZV9zaGVldHMvbG9jYWxfc2FtcGxlcy54bHN4IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmaWxlX2NvbHVtbj0ibXRiZmlsZSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZ2VuZV9pbmZvPW10Yl9hbm5vdCkpCmBgYAoKIyMgSnVzdCB0YWtlIHRoZSBzYW1wbGVzIG9mIGltbWVkaWF0ZSBpbnRlcmVzdAoKTmFqaWIgYW5kIFZvbGtlciB3b3VsZCBsaWtlIHRvIGZvY3VzIGZvciB0aGUgbW9tZW50IG9uIG9ubHkgaHBnbCBJRHM6IDEzMC0xMzIsIDMzMC0zMzIuCgpgYGB7ciBhX2ZldywgZmlnLnNob3c9ImhpZGUifQpmZXdfZXhwdCA8LSBzdWJzZXRfZXhwdChsb2NhbF9leHB0LCBzdWJzZXQ9ImNvbmRpdGlvbj09J2luX3ZpdHJvJ3xjb25kaXRpb249PSd0aHAxJyIpCgpmZXdfZmlsdCA8LSBub3JtYWxpemVfZXhwdChmZXdfZXhwdCwgZmlsdGVyPVRSVUUpCmZld193cml0ZSA8LSB3cml0ZV9leHB0KGZld19leHB0LCBleGNlbD0iZXhjZWwvZmV3X3dyaXR0ZW4ueGxzeCIpCmZld19kZSA8LSBhbGxfcGFpcndpc2UoZmV3X2ZpbHQpCmZld190YWJsZSA8LSBjb21iaW5lX2RlX3RhYmxlcyhmZXdfZGUsIGV4Y2VsPSJleGNlbC9mZXdfc2FtcGxlc190YWJsZS54bHN4IikKZmV3X3NpZyA8LSBleHRyYWN0X3NpZ25pZmljYW50X2dlbmVzKGZld190YWJsZSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGV4Y2VsPSJleGNlbC9mZXdfc2FtcGxlc19zaWcueGxzeCIpCgptdGJfbGVuZ3RocyA8LSBtdGJfYW5ub3RbLCBjKCJzZXFuYW1lcyIsICJ3aWR0aCIpXQptdGJfbGVuZ3Roc1tbInNlcW5hbWVzIl1dIDwtIHJvd25hbWVzKG10Yl9sZW5ndGhzKQpjb2xuYW1lcyhtdGJfbGVuZ3RocykgPC0gYygiSUQiLCAibGVuZ3RoIikKCnVwX2dlbmVzIDwtIGZld19zaWdbWyJkZXNlcSJdXVtbInVwcyJdXVtbMV1dCnVwX2dvIDwtIHNpbXBsZV9nb3NlcShzaWdfZ2VuZXM9dXBfZ2VuZXMsIGdvX2RiPW10Yl9nbywgbGVuZ3RoX2RiPW10Yl9sZW5ndGhzLAogICAgICAgICAgICAgICAgICAgICAgZXhjZWw9ImV4Y2VsL3VwX2dvc2VxLnhsc3giKQoKZG93bl9nZW5lcyA8LSBmZXdfc2lnW1siZGVzZXEiXV1bWyJkb3ducyJdXVtbMV1dCmRvd24gPC0gcm93bmFtZXMoZG93bl9nZW5lcykKZG93bl9nbyA8LSBzaW1wbGVfZ29zZXEoc2lnX2dlbmVzPWRvd24sIGdvX2RiPW10Yl9nbywgbGVuZ3RoX2RiPW10Yl9sZW5ndGhzKQpgYGAKCmBgYHtyIGdvX3Bsb3RzfQpmZXdfd3JpdGVbWyJub3JtX3BjYSJdXQpmZXdfdGFibGVbWyJwbG90cyJdXVtbMV1dW1siZGVzZXFfbWFfcGxvdHMiXV1bWyJwbG90Il1dCgp1cF9nbyRwdmFsdWVfcGxvdHNbWzFdXQpkb3duX2dvJHB2YWx1ZV9wbG90c1tbMV1dCmBgYAoKIyBFeG9nZW5vdXMgU2FtcGxlcyBFc3RpbWF0aW9uCgpJbiB0aGlzIGNvbnRleHQsIGV4b2dlbm91cyBqdXN0IG1lYW5zIHNhbXBsZXMgd2hpY2ggd2VyZSBub3QgY3JlYXRlZCBoZXJlLgpFLmcuIHNhbXBsZXMgSSBkb3dubG9hZGVkIGZyb20gU1JBLgoKYGBge3IgY3JlYXRlX2V4cHR9CmV4b19hbm5vdCA8LSBtdGJfYW5ub3QKcm93bmFtZXMoZXhvX2Fubm90KSA8LSBleG9fYW5ub3RbWyJkYl94cmVmIl1dCmV4b19leHB0IDwtIGNyZWF0ZV9leHB0KG1ldGFkYXRhPSJzYW1wbGVfc2hlZXRzL2V4b19zYW1wbGVzLnhsc3giLAogICAgICAgICAgICAgICAgICAgICAgICAgICBmaWxlX2NvbHVtbj0ibXRiZmlsZSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgIGdlbmVfaW5mbz1leG9fYW5ub3QpCmBgYAoKIyMgQ3JlYXRlIHNvbWUgcGxvdHMgb2YgdGhlIG5ldyBkYXRhCgpUaGUgZm9sbG93aW5nIGJsb2NrcyB3aWxsIHBsb3QgYW5kIHByaW50IGEgZmV3IGNvbW1vbiBtZXRyaWNzIG9mIHRoZSBuZXcgZGF0YS4KCmBgYHtyIG5ld19kYXRhLCBmaWcuc2hvdz0iaGlkZSJ9CmV4b19wbG90cyA8LSBzbShncmFwaF9tZXRyaWNzKGV4b19leHB0KSkKZXhvX25vcm0gPC0gc20obm9ybWFsaXplX2V4cHQoZXhvX2V4cHQsIHRyYW5zZm9ybT0ibG9nMiIsIG5vcm09InF1YW50IiwgZmlsdGVyPVRSVUUpKQpleG9uX3Bsb3RzIDwtIHNtKGdyYXBoX21ldHJpY3MoZXhvX25vcm0pKQpgYGAKCiMjIE5vdyBzaG93IHNvbWUgcGxvdHMhCgpgYGB7ciBzaG93X2RhdGF9CmV4b19wbG90cyRsaWJzaXplCmV4b19wbG90cyRkZW5zaXR5Cgp0biA8LSBub3JtYWxpemVfZXhwdChleG9fZXhwdCwgdHJhbnNmb3JtPSJsb2cyIikKdG5wIDwtIHBsb3RfZGVuc2l0eSh0bikKCnRtcF9nZ3N0YXRzIDwtIGdnc3RhdHNwbG90OjpnZ2JldHdlZW5zdGF0cygKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhPXRucCR0YWJsZSwgeD1zYW1wbGUsIHk9Y291bnRzLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5vdGNoPVRSVUUsIG1lYW4uY2k9VFJVRSwgaz0zLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBhaXJ3aXNlLmNvbXBhcmlzb25zPUZBTFNFKQp0bXBfZ2dzdGF0cwoKdG1wX2dnc3RhdHMgPC0gZ2dzdGF0c3Bsb3Q6Omdyb3VwZWRfZ2diZXR3ZWVuc3RhdHMoCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZ3JvdXBpbmcudmFyPWNvbmRpdGlvbiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhPXRucCR0YWJsZSwgeD1zYW1wbGUsIHk9Y291bnRzLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5vdGNoPVRSVUUsIG1lYW4uY2k9VFJVRSwgaz0zLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBhaXJ3aXNlLmNvbXBhcmlzb25zPUZBTFNFKQp0bXBfZ2dzdGF0cwoKIyMgUXVpY2sgUENBCmV4b25fcGNfZXhwdCA8LSBub3JtYWxpemVfZXhwdChleG9fZXhwdCwgdHJhbnNmb3JtPSJsb2cyIiwgZmlsdGVyPVRSVUUsIGNvbnZlcnQ9ImNwbSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBub3JtPSJxdWFudCIsIGJhdGNoPSJzdmFzZXEiKQpwcChmaWxlPSJpbWFnZXMvZXhvX3BjLnBuZyIsIGltYWdlPXBsb3RfcGNhKGV4b25fcGNfZXhwdCkkcGxvdCkKYGBgCgpgYGB7ciBzYXZlbWV9CnBhbmRlcjo6cGFuZGVyKHNlc3Npb25JbmZvKCkpCm1lc3NhZ2UocGFzdGUwKCJUaGlzIGlzIGhwZ2x0b29scyBjb21taXQ6ICIsIGdldF9naXRfY29tbWl0KCkpKQp0aGlzX3NhdmUgPC0gcGFzdGUwKGdzdWIocGF0dGVybj0iXFwuUm1kIiwgcmVwbGFjZT0iIiwgeD1ybWRfZmlsZSksICItdiIsIHZlciwgIi5yZGEueHoiKQptZXNzYWdlKHBhc3RlMCgiU2F2aW5nIHRvICIsIHRoaXNfc2F2ZSkpCnRtcCA8LSBzbShzYXZlbWUoZmlsZW5hbWU9dGhpc19zYXZlKSkKYGBgCg==

20200320 Mtb. analyses.

atb abelew@gmail.com

2020-06-15