1 Introduction

This dataset contains multiple experiments.

2 Annotations

pa14_gff <- load_gff_annotations("reference/paeruginosa_pa14.gff", id_col="gene_id")
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo = TRUE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo = TRUE)
## Returning a df with 16 columns and 11946 rows.
rownames(pa14_gff) <- pa14_gff[["gene_id"]]
## The Alias column has PA14_00010

pa14_microbes <- as.data.frame(load_microbesonline_annotations("PA14"))
## Found 1 entry.
## Pseudomonas aeruginosa UCBPP-PA14Proteobacteria2006-11-22yes105972208963
## The species being downloaded is: Pseudomonas aeruginosa UCBPP-PA14
## Downloading: http://www.microbesonline.org/cgi-bin/genomeInfo.cgi?tId=208963;export=tab
## The sysName column has PA14_0010

pa14_annot <- merge(pa14_gff, pa14_microbes, by.x="Alias", by.y="sysName")
rownames(pa14_annot) <- pa14_annot[["gene_id"]]

## The identifiers are a bit odd, so we need to do a little work
pa14_length <- pa14_annot[, c("gene_id", "width", "Alias")]
pa14_go <- load_microbesonline_go(species="PA14", id_column="sysName")
## Found 1 entry.
## Pseudomonas aeruginosa UCBPP-PA14Proteobacteria2006-11-22yes105972208963
## The species being downloaded is: Pseudomonas aeruginosa UCBPP-PA14 and is being downloaded as 208963.tab.
pa14_go_length <- merge(pa14_go, pa14_length, by.x="sysName", by.y="Alias")
pa14_go <- pa14_go_length[, c("gene_id", "GO")]
colnames(pa14_go) <- c("ID", "GO")
pa14_length <- pa14_go_length[, c("gene_id", "width")]
pa14_length_ids <- unique(pa14_length)[["gene_id"]]
pa14_length <- pa14_length[pa14_length_ids, ]
rownames(pa14_length) <- make.names(pa14_length[["gene_id"]], unique=TRUE)
colnames(pa14_length) <- c("ID", "width")

3 Make the expressionset

Given the above annotations, now lets pull in the counts.

I am switching to the sheet all_samples_modified_gcd.xlsx for the moment because I added a space in the strain name for the gcd sampl

pa14_expt <- create_expt("sample_sheets/all_samples_modified_gcd_202206.xlsx",
                         gene_info=pa14_annot, file_column="hisatcounttablereverse")
## Reading the sample metadata.
## Did not find the condition column in the sample sheet.
## Filling it in as undefined.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## The sample definitions comprises: 105 rows(samples) and 32 columns(metadata fields).
## Matched 5972 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 5979 features and 105 samples.

4 Quick global look at the data

While we are at it, lets drop the two sad samples.

pa14_libsize <- plot_libsize(pa14_expt)
pa14_libsize$plot

pa14_nonzero <- plot_nonzero(pa14_expt)
pa14_nonzero$plot
## Warning: ggrepel: 100 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

pa14_expt <- subset_expt(pa14_expt, nonzero=5500)
## The samples (and read coverage) removed when filtering 5500 non-zero genes are:
##  SM040  SM046  SM048  SM053 
##  41248 787576 301043 844448
## subset_expt(): There were 105, now there are 101 samples.

4.1 A few queries

I know a priori that April is interested to see how the 4 library preparations look with respect to each other. Let us therefore create a data structure to look explicitly at that.

pa14_libprep <- set_expt_conditions(pa14_expt, fact="libraryprepbatch") %>%
  set_expt_batches(fact="organisms")

pa14_lib_norm <- normalize_expt(pa14_libprep, filter=TRUE,
                                convert="cpm", norm="quant", transform="log2")
## Removing 13 low-count genes (5966 remaining).
## transform_counts: Found 29 values equal to 0, adding 1 to the matrix.
plot_pca(pa14_lib_norm)$plot
## plot labels was not set and there are more than 100 samples, disabling it.
## Error: Continuous value supplied to discrete scale

plot_corheat(pa14_lib_norm)$plot

libprep_pca_info <- pca_information(
    pa14_lib_norm, plot_pcas=TRUE,
    expt_factors=c("libraryprepbatch", "organisms", "strains", "media", "bioreplicate"))
## plot labels was not set and there are more than 100 samples, disabling it.
libprep_pca_info$anova_f_heatmap

libprep_pca_info$pca_plots[[2]]
## Error: Continuous value supplied to discrete scale
libprep_pca_info$pca_plots[[3]]
## Error: Continuous value supplied to discrete scale
libprep_pca_info$pca_plots[[4]]
## Error: Continuous value supplied to discrete scale
libprep_pca_info$pca_plots[[5]]
## Error: Continuous value supplied to discrete scale

To my eyes they look reasonably mixed, suggesting that library prep batch is not a dominant factor in the data.

At this point, I am thinking that we should separate the data by the experiments, but I will first just show the relationships among all the data.

5 Looking at other factors

As far as I see, there are three factors which are of primary interest:

  1. PA14 strain
  2. Media used
  3. Bioreplicate

The last will be used as batch in the following plots.

pa14_media <- set_expt_conditions(pa14_expt, fact="media") %>%
  set_expt_batches(fact="bioreplicate")
pa14_media_norm <- normalize_expt(pa14_media, transform="log2", convert="cpm",
                                  filter=TRUE, norm="quant")
## Removing 13 low-count genes (5966 remaining).
## transform_counts: Found 29 values equal to 0, adding 1 to the matrix.
plot_pca(pa14_media_norm)$plot
## plot labels was not set and there are more than 100 samples, disabling it.

pa14_strains <- set_expt_conditions(pa14_expt, fact="strains") %>%
  set_expt_batches(fact="bioreplicate")
pa14_strains_norm <- normalize_expt(pa14_strains, transform="log2", convert="cpm",
                                  filter=TRUE, norm="quant")
## Removing 13 low-count genes (5966 remaining).
## transform_counts: Found 29 values equal to 0, adding 1 to the matrix.
plot_pca(pa14_strains_norm)$plot
## plot labels was not set and there are more than 100 samples, disabling it.

Disregarding the various experiments performed, I think we can state that media separates the data in a fashion which is more interesting than strain. Given the number of (what I assume are) closely related strains, I am thinking it might prove to be a good idea to perform my variant search tool on this data and see how well they held up with respect to the reference strain.

6 Separate the experiments

I have been told repeatedly that there are multiple experiments in this data, but apparently I have not paid proper attention because I cannot remember which is which, and to my eyes it is not obvious in the sample sheet.

With this in mind, I spoke with Solomon briefly and have an idea of the 3 logical groups in his data. Let us therefore separate and examine those first.

6.1 Metabolism and infection

For the moment, I am going to call Solomon’s samples ‘metabolism and infection.’ I will also complicate the ‘condition’ of the data by combining the media and strain, but shortly thereafter will split that back. I think the reason why will become clear.

initials_factor <- gsub(x=rownames(pData(pa14_expt)), pattern="^(..).*$", replacement="\\1")
pData(pa14_expt)[["initials"]] <- as.factor(initials_factor)
strain_media <- paste0(pData(pa14_expt)[["strains"]], "_",
                       pData(pa14_expt)[["media"]])
pData(pa14_expt)[["strain_media"]] <- strain_media

## Lets set some colors
## WT: grayscale, eda: blue, edd: green, gcd: purple, pgl: red, zwf: yellow
colors_by_strain <- list(
    "PA14 WT" = "#000000",
    "PA14 eda" = "#0000dd",
    "PA14 edd" = "#00dd00",
    "PA14 gcd" = "#dd00dd",
    "PA14 pgl" = "#dd0000",
    "PA14 zwf" = "#dddd00")

infect_metabolism <- subset_expt(pa14_expt, subset="initials=='SM'") %>%
  set_expt_conditions(fact="strains") %>%
  set_expt_batches(fact="media")
## subset_expt(): There were 101, now there are 56 samples.
plot_legend(infect_metabolism)$plot

written <- write_expt(infect_metabolism, excel="excel/pa14_sm_written.xlsx")
## Deleting the file excel/pa14_sm_written.xlsx before writing the tables.
## Writing the first sheet, containing a legend and some summary data.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
## 
## Total:65 s
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## 
## Total:58 s
metabolism_control <- subset_expt(infect_metabolism,
                                  subset="media=='LB'|media=='LB + 0.5 M urea'") %>%
  set_expt_conditions(fact="media") %>%
  set_expt_batches("bioreplicate")
## subset_expt(): There were 56, now there are 6 samples.
metabolism_starvation <- subset_expt(infect_metabolism,
                                     subset="media=='PBST'|media=='Urine'") %>%
  set_expt_colors(colors=colors_by_strain) %>%
  set_expt_conditions(fact="media") %>%
  set_expt_batches(fact="strains")
## subset_expt(): There were 56, now there are 35 samples.
metabolism_starvation_strain <- set_expt_conditions(metabolism_starvation, fact="strains") %>%
  set_expt_batches(fact="media")

metabolism_exudate <- subset_expt(infect_metabolism,
                                  subset="media=='Instilled'")
## subset_expt(): There were 56, now there are 15 samples.

6.2 Glance at these 4 subsets

As a whole group, these samples are a bit confusing. The mouse instiled samples are prety obvious, but the other sources of variance remain a bit of a mystery to me.

global_norm <- normalize_expt(infect_metabolism, filter=TRUE, convert="cpm",
                              norm="quant", transform="log2") %>%
  set_expt_conditions(fact="media")
## Removing 40 low-count genes (5939 remaining).
## transform_counts: Found 6 values equal to 0, adding 1 to the matrix.
plot_pca(global_norm)$plot
## Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

tmp <- global_norm %>%
  set_expt_conditions(fact="strains")
plot_pca(tmp)$plot
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning: ggrepel: 47 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

7 The smallest experiment: LB vs. LB+urea

This is a group of 6 samples, 3 in LB and three in LB+urea. This should therefore be the most straight forward comparison.

mc_norm <- normalize_expt(metabolism_control, transform="log2",
                          convert="cpm", norm="quant", filter=TRUE)
## Removing 253 low-count genes (5726 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
plot_pca(mc_norm)$plot

7.1 Metabolism control experiment, DE

mc_san <- sanitize_expt(metabolism_control)
mc_de <- all_pairwise(mc_san, model_batch=TRUE, filter=TRUE)
## Using limma's removeBatchEffect to visualize with(out) batch inclusion.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
mc_tables <- combine_de_tables(
    mc_de,
    excel=glue::glue("excel/metabolism_control_tables-v{ver}.xlsx"))
mc_sig <- extract_significant_genes(
    mc_tables,
    excel=glue::glue("excel/metabolism_control_sig-v{ver}.xlsx"))

8 Starving strains

The second group is a little more complex, it seeks to simultaneously compare the strains (WT vs. mutants) and the environment (PBS vs. urine).

This design is complex enough that I think we need to choose colors more carefully.

Here is a query from Solomon:

Could you please generate me a table for the metabolism starvation strain analyses where the urine is the numerator and the PBST is the denominator?

I am interpreting this to mean we should have an experimental design in which we ignore the strains and just compare all urine and pbst samples. When I ran the following block, it looks to me that we currently return this in the following block:

ms_norm <- normalize_expt(metabolism_starvation, filter=TRUE, norm="quant",
                          convert="cpm", transform="log2")
## Removing 70 low-count genes (5909 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
plot_pca(ms_norm)$plot

ms_de <- all_pairwise(metabolism_starvation, model_batch=TRUE)
## Using limma's removeBatchEffect to visualize with(out) batch inclusion.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
ms_de$comparison$comp
##                Urine_vs_PBST
## limma_vs_deseq        0.9846
## limma_vs_edger        0.9848
## limma_vs_ebseq        0.9800
## limma_vs_basic        0.9993
## deseq_vs_edger        0.9999
## deseq_vs_ebseq        0.9958
## deseq_vs_basic        0.9851
## edger_vs_ebseq        0.9964
## edger_vs_basic        0.9856
## ebseq_vs_basic        0.9828
ms_tables <- combine_de_tables(
    ms_de,
    excel=glue::glue("excel/metabolism_starvation_tables-v{ver}.xlsx"))
ms_sig <- extract_significant_genes(
    ms_tables,
    excel=glue::glue("excel/metabolism_starvation_sig-v{ver}.xlsx"))

9 Metabolism Starvation GO

ms_up <- ms_tables[["significant"]][["deseq"]][["ups"]][[1]]
ms_down <- ms_tables[["significant"]][["deseq"]][["downs"]][[1]]

## The go data from microbesonline is keyed by the gene name
## (e.g. dnaA), not gene ID or PA id or whatever.
pa14_lengths <- pa14_annot[, c("name.x", "width")]
colnames(pa14_lengths) <- c("ID", "width")

rownames(ms_up) <- make.names(ms_up[["namex"]], unique=TRUE)
## Error in `rownames<-`(`*tmp*`, value = character(0)): attempt to set 'rownames' on an object with no dimensions
ms_up_goseq <- simple_goseq(ms_up, go_db=pa14_go, length_db=pa14_lengths)
## Error in simple_goseq(ms_up, go_db = pa14_go, length_db = pa14_lengths): Not sure how to handle your set of significant gene ids.
ms_up_goseq[["pvalue_plots"]][["bpp_plot_over"]]
## Error in eval(expr, envir, enclos): object 'ms_up_goseq' not found
ms_up_goseq[["pvalue_plots"]][["mfp_plot_over"]]
## Error in eval(expr, envir, enclos): object 'ms_up_goseq' not found
rownames(ms_down) <- make.names(ms_down[["namex"]], unique=TRUE)
## Error in `rownames<-`(`*tmp*`, value = character(0)): attempt to set 'rownames' on an object with no dimensions
ms_down_goseq <- simple_goseq(ms_down, go_db=pa14_go, length_db=pa14_lengths)
## Error in simple_goseq(ms_down, go_db = pa14_go, length_db = pa14_lengths): Not sure how to handle your set of significant gene ids.
ms_down_goseq[["pvalue_plots"]][["bpp_plot_over"]]
## Error in eval(expr, envir, enclos): object 'ms_down_goseq' not found
ms_down_goseq[["pvalue_plots"]][["mfp_plot_over"]]
## Error in eval(expr, envir, enclos): object 'ms_down_goseq' not found

9.1 Compare strains

This time let us compare the strains and lower the variance from media.

mss_norm <- normalize_expt(metabolism_starvation_strain, filter=TRUE, convert="cpm", norm="quant",
                           transform="log2")
## Removing 70 low-count genes (5909 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
plot_pca(mss_norm)$plot

mss_urine <- subset_expt(metabolism_starvation_strain, subset="batch=='Urine'")
## subset_expt(): There were 35, now there are 18 samples.
mss_urine_norm <- normalize_expt(mss_urine, filter=TRUE, convert="cpm", norm="quant",
                                 batch="svaseq", transform="log2")
## Warning in normalize_expt(mss_urine, filter = TRUE, convert = "cpm", norm =
## "quant", : Quantile normalization and sva do not always play well together.
## Removing 92 low-count genes (5887 remaining).
## Setting 19 low elements to zero.
## transform_counts: Found 19 values equal to 0, adding 1 to the matrix.
plot_pca(mss_urine_norm)$plot

mss_pbst <- subset_expt(metabolism_starvation_strain, subset="batch=='PBST'")
## subset_expt(): There were 35, now there are 17 samples.
mss_pbst_norm <- normalize_expt(mss_pbst, filter=TRUE, convert="cpm",
                                batch="svaseq", transform="log2")
## Removing 177 low-count genes (5802 remaining).
## Setting 50 low elements to zero.
## transform_counts: Found 50 values equal to 0, adding 1 to the matrix.
plot_pca(mss_pbst_norm)$plot

interesting <- list(
    "eda_vs_wt" = c("PA14eda", "PA14WT"),
    "edd_vs_wt" = c("PA14edd", "PA14WT"),
    "gcd_vs_wt" = c("PA14gcd", "PA14WT"),
    "pgl_vs_wt" = c("PA14pgl", "PA14WT"),
    "zfw_vs_wt" = c("PA14zwf", "PA14WT"))
mss_urine_de <- all_pairwise(mss_urine, model_batch="svaseq", filter=TRUE)
## Removing 0 low-count genes (5887 remaining).
## Setting 40 low elements to zero.
## transform_counts: Found 40 values equal to 0, adding 1 to the matrix.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.

mss_urine_table <- combine_de_tables(
    mss_urine_de, keepers=interesting,
    excel=glue::glue("excel/metabolism_starvation_strain_tables-v{ver}.xlsx"))
mss_urine_sig <- extract_significant_genes(
    mss_urine_table,
    excel=glue::glue("excel/metabolism_starvation_strain_sig-v{ver}.xlsx"))

Given that the strains are so similar, we can comfortably compare them across media (PBST/urine).

msm <- set_expt_conditions(metabolism_starvation_strain, fact="batch") %>%
  set_expt_batches(fact="bioreplicate")
msm_norm <- normalize_expt(msm, filter=TRUE, convert="cpm", norm="quant",
                           transform="log2")
## Removing 70 low-count genes (5909 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
plot_pca(msm_norm)$plot

10 Exudate

Compare the strains during the instillation process

exudate_norm <- normalize_expt(metabolism_exudate, filter=TRUE, convert="cpm",
                               norm="quant", transform="log2")
## Removing 227 low-count genes (5752 remaining).
## transform_counts: Found 285 values equal to 0, adding 1 to the matrix.
pp(file="images/compare_strains_exudate.pdf", image=plot_pca(exudate_norm)$plot)

exudate_nb <- normalize_expt(metabolism_exudate, filter=TRUE, convert="cpm",
                             transform="log2", batch="svaseq")
## Removing 227 low-count genes (5752 remaining).
## Setting 460 low elements to zero.
## transform_counts: Found 460 values equal to 0, adding 1 to the matrix.
plot_pca(exudate_nb)$plot

pp(file="images/compare_strains_exudate_sva.pdf", image=plot_pca(exudate_nb)$plot)

10.1 Exudate DE

exudate_de <- all_pairwise(metabolism_exudate, model_batch=TRUE, filter=TRUE)
## Using limma's removeBatchEffect to visualize with(out) batch inclusion.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.

exudate_tables <- combine_de_tables(
    exudate_de, keepers=interesting,
    excel=glue::glue("excel/exudate_tables-v{ver}.xlsx"))
exudate_sig <- extract_significant_genes(
    exudate_tables,
    excel=glue::glue("excel/exudate_sig-v{ver}.xlsx"))
## Warning in max(newdf[["avg"]]): no non-missing arguments to max; returning -Inf
wanted_table <- exudate_tables[["data"]][["eda_vs_wt"]]
eda_wt_volcano <- plot_volcano_de(wanted_table, logfc=2, fc_col="deseq_logfc", p_col="deseq_adjp")
pp(file="images/wt_vs_eda_de_volcano_instilled.pdf",
   image=eda_wt_volcano$plot)

11 Instilled vs. PBST/Urine

Vince and Najib are interested in a slightly different question:

instilled_vs <- subset_expt(
    pa14_expt,
    subset='media=="PBST"|media=="Urine"|media=="Instilled"') %>%
  set_expt_conditions(fact="media") %>%
  set_expt_batches(fact="bioreplicate")
## subset_expt(): There were 101, now there are 50 samples.
instilled_vs_norm <- normalize_expt(instilled_vs, transform="log2", convert="cpm",
                                    norm="quant", filter=TRUE)
## Removing 41 low-count genes (5938 remaining).
## transform_counts: Found 6 values equal to 0, adding 1 to the matrix.
inst_pca <- plot_pca(instilled_vs_norm)$plot
pp(file="images/instilled_vs_pca.pdf", image=inst_pca)
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## ggrepel: 12 unlabeled data points (too many overlaps). Consider increasing max.overlaps

inst_vs_de <- all_pairwise(instilled_vs, filter=TRUE)
## Using limma's removeBatchEffect to visualize with(out) batch inclusion.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.

inst_vs_tables <- combine_de_tables(
    inst_vs_de,
    excel=glue::glue("excel/instilled_vs_tables-v{ver}.xlsx"))
inst_vs_sig <- extract_significant_genes(
    inst_vs_tables,
    excel=glue::glue("excel/instilled_vs_sig-v{ver}.xlsx"))
pp(file="images/pbst_vs_instilled_de_volcano.pdf",
   image=inst_vs_tables[["plots"]][["PBST_vs_Instilled"]][["deseq_vol_plots"]][["plot"]])

12 Check mouse counts

mm_expt <- create_expt("sample_sheets/all_samples_modified2.xlsx",
                       gene_info=pa14_annot, file_column="mousetable")
## Reading the sample metadata.
## Did not find the condition column in the sample sheet.
## Filling it in as undefined.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## The sample definitions comprises: 105 rows(samples) and 32 columns(metadata fields).
## Warning in create_expt("sample_sheets/all_samples_modified2.xlsx", gene_info
## = pa14_annot, : Some samples were removed when cross referencing the samples
## against the count data.
## Warning in create_expt("sample_sheets/all_samples_modified2.xlsx", gene_info =
## pa14_annot, : Even after changing the rownames in gene info, they do not match
## the count table.
## Even after changing the rownames in gene info, they do not match the count table.
## Here are the first few rownames from the count tables:
## gene:ENSMUSG00000000001, gene:ENSMUSG00000000003, gene:ENSMUSG00000000028, gene:ENSMUSG00000000037, gene:ENSMUSG00000000049, gene:ENSMUSG00000000056
## Here are the first few rownames from the gene information table:
## gene1650835, gene1650837, gene1650839, gene1650841, gene1650843, gene1650845
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Warning in create_expt("sample_sheets/all_samples_modified2.xlsx", gene_info =
## pa14_annot, : The following samples have no counts! SM029SM032SM038SM040
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 25753 features and 35 samples.
plot_libsize(mm_expt)$plot
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 4 rows containing missing values (geom_bar).

13 Fun with circos

control_table <- mc_tables[["data"]][["LB05Murea_vs_LB"]]
urine_eda <- mss_urine_table[["data"]][["eda_vs_wt"]]
urine_edd <- mss_urine_table[["data"]][["edd_vs_wt"]]
urine_gcd <- mss_urine_table[["data"]][["gcd_vs_wt"]]
urine_pgl <- mss_urine_table[["data"]][["pgl_vs_wt"]]
urine_zfw <- mss_urine_table[["data"]][["zfw_vs_wt"]]

exudate_eda <- exudate_tables[["data"]][["eda_vs_wt"]]
exudate_edd <- exudate_tables[["data"]][["edd_vs_wt"]]
exudate_gcd <- exudate_tables[["data"]][["gcd_vs_wt"]]
exudate_pgl <- exudate_tables[["data"]][["pgl_vs_wt"]]
exudate_zfw <- exudate_tables[["data"]][["zfw_vs_wt"]]

pa14_annot[["chromosome"]] <- "Pseudomonas_aeruginosa_UCBPP_PA14"

sm_cfg <- circos_prefix(pa14_annot, name="sm", cog_column = "COGFun",
                        start_column="start.x", end_column="end", strand_column="strand.x",
                        chr_column="chromosome", id_column="gene_id")
## This assumes you have a colors.conf in circos/colors/ and fonts.conf in circos/fonts/
## It also assumes you have conf/ideogram.conf, conf/ticks.conf, and conf/housekeeping.conf
## It will write circos/conf/sm.conf with a reasonable first approximation config file.
## Wrote karyotype to circos/conf/ideograms/sm.conf
## This should match the ideogram= line in sm.conf
## Wrote ticks to circos/conf/ticks_sm.conf
sm_kary <- circos_karyotype(sm_cfg, fasta="reference/paeruginosah_pa14.fasta")
## Error in .Call2("new_input_filexp", filepath, PACKAGE = "XVector"): cannot open file 'reference/paeruginosah_pa14.fasta'
sm_plus_minus <- circos_plus_minus(sm_cfg, width=0.06, thickness=40)
## Writing data file: circos/data/sm_plus_go.txt with the + strand GO data.
## Writing data file: circos/data/sm_minus_go.txt with the - strand GO data.
## Wrote the +/- config files.  Appending their inclusion to the master file.
## Returning the inner width: 0.88.  Use it as the outer for the next ring.
## Put the plots here
sm_first_heat <- circos_heatmap(sm_cfg, control_table, colname="deseq_logfc",
                                basename="control", outer=sm_plus_minus, width=0.05)
## Assuming the input is a dataframe.
## Writing data file: circos/data/smdeseq_logfc_heatmap.txt with the controldeseq_logfc column.
## Returning the inner width: 0.81.  Use it as the outer for the next ring.
##sm_eda_hist <- circos_hist(sm_cfg, urine_eda, colname="deseq_logfc",
##                           basename="ureda", outer=sm_first_hist, spacing=-0.05)
##sm_edd_hist <- circos_hist(sm_cfg, urine_edd, colname="deseq_logfc",
##                           basename="uredd", outer=sm_eda_hist, spacing=-0.05)
##sm_gcd_hist <- circos_hist(sm_cfg, urine_gcd, colname="deseq_logfc",
##                           basename="urgcd", outer=sm_edd_hist, spacing=-0.05)
##sm_pgl_hist <- circos_hist(sm_cfg, urine_pgl, colname="deseq_logfc",
##                           basename="urpgl", outer=sm_gcd_hist, spacing=-0.05)
##sm_zfw_hist <- circos_hist(sm_cfg, urine_zfw, colname="deseq_logfc",
##                           basename="urzfw", outer=sm_pgl_hist, spacing=-0.05)
##ex_eda_hist <- circos_hist(sm_cfg, exudate_eda, colname="deseq_logfc",
##                           basename="exeda", outer=sm_zfw_hist, spacing=-0.05)
##ex_edd_hist <- circos_hist(sm_cfg, exudate_edd, colname="deseq_logfc",
##                           basename="exedd", outer=ex_eda_hist, spacing=-0.05)
##ex_gcd_hist <- circos_hist(sm_cfg, exudate_gcd, colname="deseq_logfc",
##                           basename="exgcd", outer=ex_edd_hist, spacing=-0.05)
##ex_pgl_hist <- circos_hist(sm_cfg, exudate_pgl, colname="deseq_logfc",
##                           basename="expgl", outer=ex_gcd_hist, spacing=-0.05)
##ex_zfw_hist <- circos_hist(sm_cfg, exudate_zfw, colname="deseq_logfc",
##                           basename="exzfw", outer=ex_pgl_hist, spacing=-0.05)
sm_finish <- circos_suffix(sm_cfg)
sm_made <- circos_make(sm_cfg, target="sm")
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
---
title: "ED Project: Pseudomonas RNASeq dataset."
author: "atb abelew@gmail.com"
date: "`r Sys.Date()`"
output:
  html_document:
    code_download: true
    code_folding: show
    fig_caption: true
    fig_height: 7
    fig_width: 7
    highlight: tango
    keep_md: false
    mode: selfcontained
    number_sections: true
    self_contained: true
    theme: readable
    toc: true
    toc_float:
      collapsed: false
      smooth_scroll: false
  rmdformats::readthedown:
    code_download: true
    code_folding: show
    df_print: paged
    fig_caption: true
    fig_height: 7
    fig_width: 7
    highlight: tango
    width: 300
    keep_md: false
    mode: selfcontained
    toc_float: true
  BiocStyle::html_document:
    code_download: true
    code_folding: show
    fig_caption: true
    fig_height: 7
    fig_width: 7
    highlight: tango
    keep_md: false
    mode: selfcontained
    toc_float: true
---

<style type="text/css">
body, td {
  font-size: 16px;
}
code.r{
  font-size: 16px;
}
pre {
  font-size: 16px
}
</style>

```{r options, include=FALSE}
library("hpgltools")
tt <- devtools::load_all("~/hpgltools")
knitr::opts_knit$set(width=120,
                     progress=TRUE,
                     verbose=TRUE,
                     echo=TRUE)
knitr::opts_chunk$set(error=TRUE,
                      dpi=96)
old_options <- options(digits=4,
                       stringsAsFactors=FALSE,
                       knitr.duplicate.label="allow")
ggplot2::theme_set(ggplot2::theme_bw(base_size=10))
rundate <- format(Sys.Date(), format="%Y%m%d")
previous_file <- ""
ver <- format(Sys.Date(), "%Y%m%d")

##tmp <- sm(loadme(filename=paste0(gsub(pattern="\\.Rmd", replace="", x=previous_file), "-v", ver, ".rda.xz")))
rmd_file <- "index_sm.Rmd"
```

# Introduction

This dataset contains multiple experiments.

# Annotations

```{r annotation}
pa14_gff <- load_gff_annotations("reference/paeruginosa_pa14.gff", id_col="gene_id")
rownames(pa14_gff) <- pa14_gff[["gene_id"]]
## The Alias column has PA14_00010

pa14_microbes <- as.data.frame(load_microbesonline_annotations("PA14"))
## The sysName column has PA14_0010

pa14_annot <- merge(pa14_gff, pa14_microbes, by.x="Alias", by.y="sysName")
rownames(pa14_annot) <- pa14_annot[["gene_id"]]

## The identifiers are a bit odd, so we need to do a little work
pa14_length <- pa14_annot[, c("gene_id", "width", "Alias")]
pa14_go <- load_microbesonline_go(species="PA14", id_column="sysName")
pa14_go_length <- merge(pa14_go, pa14_length, by.x="sysName", by.y="Alias")
pa14_go <- pa14_go_length[, c("gene_id", "GO")]
colnames(pa14_go) <- c("ID", "GO")
pa14_length <- pa14_go_length[, c("gene_id", "width")]
pa14_length_ids <- unique(pa14_length)[["gene_id"]]
pa14_length <- pa14_length[pa14_length_ids, ]
rownames(pa14_length) <- make.names(pa14_length[["gene_id"]], unique=TRUE)
colnames(pa14_length) <- c("ID", "width")
```

# Make the expressionset

Given the above annotations, now lets pull in the counts.

I am switching to the sheet all_samples_modified_gcd.xlsx for the
moment because I added a space in the strain name for the gcd sampl

```{r expressionset}
pa14_expt <- create_expt("sample_sheets/all_samples_modified_gcd_202206.xlsx",
                         gene_info=pa14_annot, file_column="hisatcounttablereverse")
```


# Quick global look at the data

While we are at it, lets drop the two sad samples.

```{r metrics}
pa14_libsize <- plot_libsize(pa14_expt)
pa14_libsize$plot

pa14_nonzero <- plot_nonzero(pa14_expt)
pa14_nonzero$plot

pa14_expt <- subset_expt(pa14_expt, nonzero=5500)
```

## A few queries

I know a priori that April is interested to see how the 4 library
preparations look with respect to each other.  Let us therefore create
a data structure to look explicitly at that.

```{r libprep}
pa14_libprep <- set_expt_conditions(pa14_expt, fact="libraryprepbatch") %>%
  set_expt_batches(fact="organisms")

pa14_lib_norm <- normalize_expt(pa14_libprep, filter=TRUE,
                                convert="cpm", norm="quant", transform="log2")
plot_pca(pa14_lib_norm)$plot
plot_corheat(pa14_lib_norm)$plot

libprep_pca_info <- pca_information(
    pa14_lib_norm, plot_pcas=TRUE,
    expt_factors=c("libraryprepbatch", "organisms", "strains", "media", "bioreplicate"))
libprep_pca_info$anova_f_heatmap
libprep_pca_info$pca_plots[[2]]
libprep_pca_info$pca_plots[[3]]
libprep_pca_info$pca_plots[[4]]
libprep_pca_info$pca_plots[[5]]
```

To my eyes they look reasonably mixed, suggesting that library prep
batch is not a dominant factor in the data.

At this point, I am thinking that we should separate the data by the
experiments, but I will first just show the relationships among all
the data.

# Looking at other factors

As far as I see, there are three factors which are of primary
interest:

1. PA14 strain
2. Media used
3. Bioreplicate

The last will be used as batch in the following plots.

```{r other_factors}
pa14_media <- set_expt_conditions(pa14_expt, fact="media") %>%
  set_expt_batches(fact="bioreplicate")
pa14_media_norm <- normalize_expt(pa14_media, transform="log2", convert="cpm",
                                  filter=TRUE, norm="quant")
plot_pca(pa14_media_norm)$plot

pa14_strains <- set_expt_conditions(pa14_expt, fact="strains") %>%
  set_expt_batches(fact="bioreplicate")
pa14_strains_norm <- normalize_expt(pa14_strains, transform="log2", convert="cpm",
                                  filter=TRUE, norm="quant")
plot_pca(pa14_strains_norm)$plot
```

Disregarding the various experiments performed, I think we can state
that media separates the data in a fashion which is more interesting
than strain.  Given the number of (what I assume are) closely related
strains, I am thinking it might prove to be a good idea to perform my
variant search tool on this data and see how well they held up with
respect to the reference strain.

# Separate the experiments

I have been told repeatedly that there are multiple experiments in
this data, but apparently I have not paid proper attention because I
cannot remember which is which, and to my eyes it is not obvious in
the sample sheet.

With this in mind, I spoke with Solomon briefly and have an idea of
the 3 logical groups in his data.  Let us therefore separate and
examine those first.

## Metabolism and infection

For the moment, I am going to call Solomon's samples 'metabolism and
infection.'  I will also complicate the 'condition' of the data by
combining the media and strain, but shortly thereafter will split that
back.  I think the reason why will become clear.

```{r metabolism}
initials_factor <- gsub(x=rownames(pData(pa14_expt)), pattern="^(..).*$", replacement="\\1")
pData(pa14_expt)[["initials"]] <- as.factor(initials_factor)
strain_media <- paste0(pData(pa14_expt)[["strains"]], "_",
                       pData(pa14_expt)[["media"]])
pData(pa14_expt)[["strain_media"]] <- strain_media

## Lets set some colors
## WT: grayscale, eda: blue, edd: green, gcd: purple, pgl: red, zwf: yellow
colors_by_strain <- list(
    "PA14 WT" = "#000000",
    "PA14 eda" = "#0000dd",
    "PA14 edd" = "#00dd00",
    "PA14 gcd" = "#dd00dd",
    "PA14 pgl" = "#dd0000",
    "PA14 zwf" = "#dddd00")

infect_metabolism <- subset_expt(pa14_expt, subset="initials=='SM'") %>%
  set_expt_conditions(fact="strains") %>%
  set_expt_batches(fact="media")
plot_legend(infect_metabolism)$plot

written <- write_expt(infect_metabolism, excel="excel/pa14_sm_written.xlsx")

metabolism_control <- subset_expt(infect_metabolism,
                                  subset="media=='LB'|media=='LB + 0.5 M urea'") %>%
  set_expt_conditions(fact="media") %>%
  set_expt_batches("bioreplicate")

metabolism_starvation <- subset_expt(infect_metabolism,
                                     subset="media=='PBST'|media=='Urine'") %>%
  set_expt_colors(colors=colors_by_strain) %>%
  set_expt_conditions(fact="media") %>%
  set_expt_batches(fact="strains")

metabolism_starvation_strain <- set_expt_conditions(metabolism_starvation, fact="strains") %>%
  set_expt_batches(fact="media")

metabolism_exudate <- subset_expt(infect_metabolism,
                                  subset="media=='Instilled'")
```

## Glance at these 4 subsets

As a whole group, these samples are a bit confusing.  The mouse
instiled samples are prety obvious, but the other sources of variance
remain a bit of a mystery to me.

```{r metabolism_subsets}
global_norm <- normalize_expt(infect_metabolism, filter=TRUE, convert="cpm",
                              norm="quant", transform="log2") %>%
  set_expt_conditions(fact="media")
plot_pca(global_norm)$plot

tmp <- global_norm %>%
  set_expt_conditions(fact="strains")
plot_pca(tmp)$plot
```

# The smallest experiment: LB vs. LB+urea

This is a group of 6 samples, 3 in LB and three in LB+urea.  This
should therefore be the most straight forward comparison.

```{r control}
mc_norm <- normalize_expt(metabolism_control, transform="log2",
                          convert="cpm", norm="quant", filter=TRUE)
plot_pca(mc_norm)$plot
```

## Metabolism control experiment, DE

```{r mc_de}
mc_san <- sanitize_expt(metabolism_control)
mc_de <- all_pairwise(mc_san, model_batch=TRUE, filter=TRUE)
mc_tables <- combine_de_tables(
    mc_de,
    excel=glue::glue("excel/metabolism_control_tables-v{ver}.xlsx"))
mc_sig <- extract_significant_genes(
    mc_tables,
    excel=glue::glue("excel/metabolism_control_sig-v{ver}.xlsx"))
```

# Starving strains

The second group is a little more complex, it seeks to simultaneously
compare the strains (WT vs. mutants) and the environment (PBS
vs. urine).

This design is complex enough that I think we need to choose colors
more carefully.

Here is a query from Solomon:

Could you please generate me a table for the metabolism starvation strain analyses where
the urine is the numerator and the PBST is the denominator?

I am interpreting this to mean we should have an experimental design in which we ignore the strains
and just compare all urine and pbst samples.  When I ran the following block, it looks to me that
we currently return this in the following block:

```{r starving_strains}
ms_norm <- normalize_expt(metabolism_starvation, filter=TRUE, norm="quant",
                          convert="cpm", transform="log2")
plot_pca(ms_norm)$plot

ms_de <- all_pairwise(metabolism_starvation, model_batch=TRUE)
ms_de$comparison$comp
ms_tables <- combine_de_tables(
    ms_de,
    excel=glue::glue("excel/metabolism_starvation_tables-v{ver}.xlsx"))
ms_sig <- extract_significant_genes(
    ms_tables,
    excel=glue::glue("excel/metabolism_starvation_sig-v{ver}.xlsx"))
```

# Metabolism Starvation GO

```{r ms_go}
ms_up <- ms_tables[["significant"]][["deseq"]][["ups"]][[1]]
ms_down <- ms_tables[["significant"]][["deseq"]][["downs"]][[1]]

## The go data from microbesonline is keyed by the gene name
## (e.g. dnaA), not gene ID or PA id or whatever.
pa14_lengths <- pa14_annot[, c("name.x", "width")]
colnames(pa14_lengths) <- c("ID", "width")

rownames(ms_up) <- make.names(ms_up[["namex"]], unique=TRUE)
ms_up_goseq <- simple_goseq(ms_up, go_db=pa14_go, length_db=pa14_lengths)
ms_up_goseq[["pvalue_plots"]][["bpp_plot_over"]]
ms_up_goseq[["pvalue_plots"]][["mfp_plot_over"]]

rownames(ms_down) <- make.names(ms_down[["namex"]], unique=TRUE)
ms_down_goseq <- simple_goseq(ms_down, go_db=pa14_go, length_db=pa14_lengths)
ms_down_goseq[["pvalue_plots"]][["bpp_plot_over"]]
ms_down_goseq[["pvalue_plots"]][["mfp_plot_over"]]
```

## Compare strains

This time let us compare the strains and lower the variance from
media.

```{r starvation_by_strain}
mss_norm <- normalize_expt(metabolism_starvation_strain, filter=TRUE, convert="cpm", norm="quant",
                           transform="log2")
plot_pca(mss_norm)$plot

mss_urine <- subset_expt(metabolism_starvation_strain, subset="batch=='Urine'")
mss_urine_norm <- normalize_expt(mss_urine, filter=TRUE, convert="cpm", norm="quant",
                                 batch="svaseq", transform="log2")
plot_pca(mss_urine_norm)$plot

mss_pbst <- subset_expt(metabolism_starvation_strain, subset="batch=='PBST'")
mss_pbst_norm <- normalize_expt(mss_pbst, filter=TRUE, convert="cpm",
                                batch="svaseq", transform="log2")
plot_pca(mss_pbst_norm)$plot

interesting <- list(
    "eda_vs_wt" = c("PA14eda", "PA14WT"),
    "edd_vs_wt" = c("PA14edd", "PA14WT"),
    "gcd_vs_wt" = c("PA14gcd", "PA14WT"),
    "pgl_vs_wt" = c("PA14pgl", "PA14WT"),
    "zfw_vs_wt" = c("PA14zwf", "PA14WT"))
mss_urine_de <- all_pairwise(mss_urine, model_batch="svaseq", filter=TRUE)
mss_urine_table <- combine_de_tables(
    mss_urine_de, keepers=interesting,
    excel=glue::glue("excel/metabolism_starvation_strain_tables-v{ver}.xlsx"))
mss_urine_sig <- extract_significant_genes(
    mss_urine_table,
    excel=glue::glue("excel/metabolism_starvation_strain_sig-v{ver}.xlsx"))
```

Given that the strains are so similar, we can comfortably compare them across media (PBST/urine).

```{r compare_pbst_urine}
msm <- set_expt_conditions(metabolism_starvation_strain, fact="batch") %>%
  set_expt_batches(fact="bioreplicate")
msm_norm <- normalize_expt(msm, filter=TRUE, convert="cpm", norm="quant",
                           transform="log2")
plot_pca(msm_norm)$plot
```

# Exudate

Compare the strains during the instillation process

```{r exudate_experiment}
exudate_norm <- normalize_expt(metabolism_exudate, filter=TRUE, convert="cpm",
                               norm="quant", transform="log2")
pp(file="images/compare_strains_exudate.pdf", image=plot_pca(exudate_norm)$plot)

exudate_nb <- normalize_expt(metabolism_exudate, filter=TRUE, convert="cpm",
                             transform="log2", batch="svaseq")
plot_pca(exudate_nb)$plot
pp(file="images/compare_strains_exudate_sva.pdf", image=plot_pca(exudate_nb)$plot)
```

## Exudate DE

```{r exudate_de}
exudate_de <- all_pairwise(metabolism_exudate, model_batch=TRUE, filter=TRUE)

exudate_tables <- combine_de_tables(
    exudate_de, keepers=interesting,
    excel=glue::glue("excel/exudate_tables-v{ver}.xlsx"))
exudate_sig <- extract_significant_genes(
    exudate_tables,
    excel=glue::glue("excel/exudate_sig-v{ver}.xlsx"))

wanted_table <- exudate_tables[["data"]][["eda_vs_wt"]]
eda_wt_volcano <- plot_volcano_de(wanted_table, logfc=2, fc_col="deseq_logfc", p_col="deseq_adjp")
pp(file="images/wt_vs_eda_de_volcano_instilled.pdf",
   image=eda_wt_volcano$plot)
```

# Instilled vs. PBST/Urine

Vince and Najib are interested in a slightly different question:

```{r instilled_vs_others}
instilled_vs <- subset_expt(
    pa14_expt,
    subset='media=="PBST"|media=="Urine"|media=="Instilled"') %>%
  set_expt_conditions(fact="media") %>%
  set_expt_batches(fact="bioreplicate")

instilled_vs_norm <- normalize_expt(instilled_vs, transform="log2", convert="cpm",
                                    norm="quant", filter=TRUE)
inst_pca <- plot_pca(instilled_vs_norm)$plot
pp(file="images/instilled_vs_pca.pdf", image=inst_pca)

inst_vs_de <- all_pairwise(instilled_vs, filter=TRUE)
inst_vs_tables <- combine_de_tables(
    inst_vs_de,
    excel=glue::glue("excel/instilled_vs_tables-v{ver}.xlsx"))
inst_vs_sig <- extract_significant_genes(
    inst_vs_tables,
    excel=glue::glue("excel/instilled_vs_sig-v{ver}.xlsx"))
pp(file="images/pbst_vs_instilled_de_volcano.pdf",
   image=inst_vs_tables[["plots"]][["PBST_vs_Instilled"]][["deseq_vol_plots"]][["plot"]])
```

# Check mouse counts

```{r mouse_counts}
mm_expt <- create_expt("sample_sheets/all_samples_modified2.xlsx",
                       gene_info=pa14_annot, file_column="mousetable")
plot_libsize(mm_expt)$plot
```

# Fun with circos

```{r sm_circos}
control_table <- mc_tables[["data"]][["LB05Murea_vs_LB"]]
urine_eda <- mss_urine_table[["data"]][["eda_vs_wt"]]
urine_edd <- mss_urine_table[["data"]][["edd_vs_wt"]]
urine_gcd <- mss_urine_table[["data"]][["gcd_vs_wt"]]
urine_pgl <- mss_urine_table[["data"]][["pgl_vs_wt"]]
urine_zfw <- mss_urine_table[["data"]][["zfw_vs_wt"]]

exudate_eda <- exudate_tables[["data"]][["eda_vs_wt"]]
exudate_edd <- exudate_tables[["data"]][["edd_vs_wt"]]
exudate_gcd <- exudate_tables[["data"]][["gcd_vs_wt"]]
exudate_pgl <- exudate_tables[["data"]][["pgl_vs_wt"]]
exudate_zfw <- exudate_tables[["data"]][["zfw_vs_wt"]]

pa14_annot[["chromosome"]] <- "Pseudomonas_aeruginosa_UCBPP_PA14"

sm_cfg <- circos_prefix(pa14_annot, name="sm", cog_column = "COGFun",
                        start_column="start.x", end_column="end", strand_column="strand.x",
                        chr_column="chromosome", id_column="gene_id")
sm_kary <- circos_karyotype(sm_cfg, fasta="reference/paeruginosah_pa14.fasta")
sm_plus_minus <- circos_plus_minus(sm_cfg, width=0.06, thickness=40)
## Put the plots here
sm_first_heat <- circos_heatmap(sm_cfg, control_table, colname="deseq_logfc",
                                basename="control", outer=sm_plus_minus, width=0.05)
##sm_eda_hist <- circos_hist(sm_cfg, urine_eda, colname="deseq_logfc",
##                           basename="ureda", outer=sm_first_hist, spacing=-0.05)
##sm_edd_hist <- circos_hist(sm_cfg, urine_edd, colname="deseq_logfc",
##                           basename="uredd", outer=sm_eda_hist, spacing=-0.05)
##sm_gcd_hist <- circos_hist(sm_cfg, urine_gcd, colname="deseq_logfc",
##                           basename="urgcd", outer=sm_edd_hist, spacing=-0.05)
##sm_pgl_hist <- circos_hist(sm_cfg, urine_pgl, colname="deseq_logfc",
##                           basename="urpgl", outer=sm_gcd_hist, spacing=-0.05)
##sm_zfw_hist <- circos_hist(sm_cfg, urine_zfw, colname="deseq_logfc",
##                           basename="urzfw", outer=sm_pgl_hist, spacing=-0.05)
##ex_eda_hist <- circos_hist(sm_cfg, exudate_eda, colname="deseq_logfc",
##                           basename="exeda", outer=sm_zfw_hist, spacing=-0.05)
##ex_edd_hist <- circos_hist(sm_cfg, exudate_edd, colname="deseq_logfc",
##                           basename="exedd", outer=ex_eda_hist, spacing=-0.05)
##ex_gcd_hist <- circos_hist(sm_cfg, exudate_gcd, colname="deseq_logfc",
##                           basename="exgcd", outer=ex_edd_hist, spacing=-0.05)
##ex_pgl_hist <- circos_hist(sm_cfg, exudate_pgl, colname="deseq_logfc",
##                           basename="expgl", outer=ex_gcd_hist, spacing=-0.05)
##ex_zfw_hist <- circos_hist(sm_cfg, exudate_zfw, colname="deseq_logfc",
##                           basename="exzfw", outer=ex_pgl_hist, spacing=-0.05)
sm_finish <- circos_suffix(sm_cfg)
sm_made <- circos_make(sm_cfg, target="sm")
```

```{r saveme, eval=FALSE}
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
```
