1 S.pyogenes RNASeq version: 20190916

1.1 Collect annotation information

Same two primary annotation sources, the gff file used for mapping/counting, and microbesonline.org. Note that since I moved to just downloading the material from the web interface, I no longer have a handy method to get the taxon ID, so I go there and hunt down the taxId manually.

In addition, the IDs in my count tables are ‘gene0’, so they come from the gff fields of type ‘gene’.

gff_annot <- load_gff_annotations("reference/spyogenes_5005.gff", type="gene")

## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)

## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)

## Returning a df with 25 columns and 1950 rows.

rownames(gff_annot) <- gff_annot[["ID"]]
head(gff_annot)

##        seqnames start  end width strand source type score phase    ID
## gene0 NC_007297   202 1557  1356      + RefSeq gene    NA    NA gene0
## gene1 NC_007297  1712 2848  1137      + RefSeq gene    NA    NA gene1
## gene2 NC_007297  2923 3120   198      + RefSeq gene    NA    NA gene2
## gene3 NC_007297  3450 4565  1116      + RefSeq gene    NA    NA gene3
## gene4 NC_007297  4635 5204   570      + RefSeq gene    NA    NA gene4
## gene5 NC_007297  5207 8710  3504      + RefSeq gene    NA    NA gene5
##               Dbxref Is_circular gbkey genome mol_type strain
## gene0 GeneID:3571011        <NA>  Gene   <NA>     <NA>   <NA>
## gene1 GeneID:3571012        <NA>  Gene   <NA>     <NA>   <NA>
## gene2 GeneID:3571013        <NA>  Gene   <NA>     <NA>   <NA>
## gene3 GeneID:3571014        <NA>  Gene   <NA>     <NA>   <NA>
## gene4 GeneID:3571015        <NA>  Gene   <NA>     <NA>   <NA>
## gene5 GeneID:3571016        <NA>  Gene   <NA>     <NA>   <NA>
##                 Name          Note gene      locus_tag Parent product
## gene0           dnaA M5005_Spy0001 dnaA M5005_Spy_0001           <NA>
## gene1           dnaN M5005_Spy0002 dnaN M5005_Spy_0002           <NA>
## gene2 M5005_Spy_0003 M5005_Spy0003 <NA> M5005_Spy_0003           <NA>
## gene3           ychF M5005_Spy0004 ychF M5005_Spy_0004           <NA>
## gene4            pth M5005_Spy0005  pth M5005_Spy_0005           <NA>
## gene5           trcF M5005_Spy0006 trcF M5005_Spy_0006           <NA>
##       protein_id transl_table gene_synonym
## gene0       <NA>         <NA>             
## gene1       <NA>         <NA>             
## gene2       <NA>         <NA>             
## gene3       <NA>         <NA>             
## gene4       <NA>         <NA>             
## gene5       <NA>         <NA>

mic_annot <- load_microbesonline_annotations("293653")

## The species being downloaded is: Streptococcus pyogenes MGAS5005

head(mic_annot)

## # A tibble: 6 x 18
##   locusId accession     GI scaffoldId start  stop strand sysName name 
##     <dbl> <chr>      <dbl>      <dbl> <dbl> <dbl> <chr>  <chr>   <chr>
## 1  922226 YP_28136… 7.19e7       2067   202  1557 +      M5005_… dnaA 
## 2  922227 YP_28136… 7.19e7       2067  1712  2848 +      M5005_… dnaN 
## 3  922228 YP_28136… 7.19e7       2067  2923  3120 +      M5005_… M500…
## 4  922229 YP_28136… 7.19e7       2067  3450  4565 +      M5005_… M500…
## 5  922230 YP_28136… 7.19e7       2067  4635  5204 +      M5005_… pth  
## 6  922231 YP_28137… 7.19e7       2067  5207  8710 +      M5005_… trcF 
## # … with 9 more variables: desc <chr>, COG <chr>, COGFun <chr>,
## #   COGDesc <chr>, TIGRFam <chr>, TIGRRoles <chr>, GO <chr>, EC <chr>,
## #   ECDesc <chr>

all_annot <- merge(gff_annot, mic_annot, by.x="locus_tag", by.y="sysName")
rownames(all_annot) <- all_annot[["ID"]]

mic_go <- load_microbesonline_go(table_df=all_annot, id_column="ID")
length_db <- all_annot[, c("ID", "width")]

1.2 Create expressionSet

all_expt <- create_expt(metadata="sample_sheets/all_samples_201909.xlsx",
                        gene_info=all_annot, file_column="bt2_file")

## Reading the sample metadata.

## Did not find the batch column in the sample sheet.

## Filling it in as undefined.

## The sample definitions comprises: 24 rows(samples) and 25 columns(metadata fields).

## Reading count tables.

## Reading count tables with read.table().

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1175/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1176/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1177/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1178/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1179/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1180/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1181/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1182/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1183/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1184/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1185/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1186/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1187/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1188/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1189/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1190/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1191/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1192/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1193/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1194/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1195/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1196/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1197/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1198/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.

## Finished reading count tables.

## Matched 1950 annotations and counts.

## Bringing together the count matrix and gene information.

## Some annotations were lost in merging, setting them to 'undefined'.

## The final expressionset has 1950 rows and 24 columns.

1.3 Poke expressionSet

written <- write_expt(all_expt, excel=glue::glue("all_expt-v{ver}.xlsx"))

## Writing the first sheet, containing a legend and some summary data.

## Writing the raw reads.

## Graphing the raw reads.

## Writing the normalized reads.

## Graphing the normalized reads.

## Writing the median reads by factor.

## The factor wt_ll_thy has 4 rows.

## The factor scfd_ll_thy has 4 rows.

## The factor scfab_ll_thy has 4 rows.

## The factor wt_ll_cdm has 4 rows.

## The factor scfd_ll_cdm has 4 rows.

## The factor scfab_ll_cdm has 4 rows.

## Note: zip::zip() is deprecated, please use zip::zipr() instead

all_plots <- graph_metrics(all_expt)

## Graphing number of non-zero genes with respect to CPM by library.

## Graphing library sizes.

## The scale difference between the smallest and largest
## libraries is > 10. Assuming a log10 scale is better, set scale=FALSE if not.

## Graphing a boxplot.

## This data will benefit from being displayed on the log scale.

## If this is not desired, set scale='raw'

## Some entries are 0.  We are on log scale, adding 1 to the data.

## Changed 1402 zero count features.

## Graphing a correlation heatmap.

## Graphing a standard median correlation.

## Performing correlation.

## Graphing a distance heatmap.

## Graphing a standard median distance.

## Performing distance.

## Graphing a PCA plot.

## Graphing a T-SNE plot.

## Plotting a density plot.

## This data will benefit from being displayed on the log scale.

## If this is not desired, set scale='raw'

## Some entries are 0.  We are on log scale, setting them to 0.5.

## Changed 1402 zero count features.

## Plotting a CV plot.

## Naively calculating coefficient of variation/dispersion with respect to condition.

## Finished calculating dispersion estimates.

## Plotting the representation of the top-n genes.

## Plotting the expression of the top-n PC loaded genes.

## Printing a color to condition legend.

all_norm <- normalize_expt(all_expt, filter=TRUE,
                           norm="quant",convert="cpm", transform="log2")

## This function will replace the expt$expressionset slot with:

## log2(cpm(quant(cbcb(data))))

## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.

## Step 1: performing count filter with option: cbcb

## Removing 136 low-count genes (1814 remaining).

## Step 2: normalizing the data with quant.

## Step 3: converting the data with cpm.

## Step 4: transforming the data with log2.

## Step 5: not doing batch correction.

all_normbatch <- normalize_expt(all_expt, filter="pofa",
                                batch="svaseq", norm="quant",
                                convert="cpm", transform="log2")

## This function will replace the expt$expressionset slot with:

## log2(svaseq(cpm(quant(pofa(data)))))

## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Warning in normalize_expt(all_expt, filter = "pofa", batch = "svaseq",
## norm = "quant", : Quantile normalization and sva do not always play well
## together.

## Step 1: performing count filter with option: pofa

## Removing 39 low-count genes (1911 remaining).

## Step 2: normalizing the data with quant.

## Step 3: converting the data with cpm.

## Step 4: transforming the data with log2.

## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.

## Step 5: doing batch correction with svaseq.

## Note to self:  If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.

## Passing off to all_adjusters.

## batch_counts: Before batch/surrogate estimation, 43283 entries are x>1: 94.4%.

## batch_counts: Before batch/surrogate estimation, 2 entries are x==0: 0.00436%.

## batch_counts: Before batch/surrogate estimation, 2579 entries are 0<x<1: 5.62%.

## The be method chose 7 surrogate variable(s).

## Attempting svaseq estimation with 7 surrogates.

## There are 36 (0.0785%) elements which are < 0 after batch correction.

norm_plots <- graph_metrics(all_norm)

## Graphing number of non-zero genes with respect to CPM by library.

## Graphing library sizes.

## Graphing a boxplot.

## Graphing a correlation heatmap.

## Graphing a standard median correlation.

## Performing correlation.

## Graphing a distance heatmap.

## Graphing a standard median distance.

## Performing distance.

## Graphing a PCA plot.

## Graphing a T-SNE plot.

## Plotting a density plot.

## Plotting a CV plot.

## Naively calculating coefficient of variation/dispersion with respect to condition.

## Finished calculating dispersion estimates.

## Plotting the representation of the top-n genes.

## Plotting the expression of the top-n PC loaded genes.

## Printing a color to condition legend.

normbatch_plots <- graph_metrics(all_normbatch)

## Graphing number of non-zero genes with respect to CPM by library.

## Graphing library sizes.

## Graphing a boxplot.

## Graphing a correlation heatmap.

## Graphing a standard median correlation.

## Performing correlation.

## Graphing a distance heatmap.

## Graphing a standard median distance.

## Performing distance.

## Graphing a PCA plot.

## Graphing a T-SNE plot.

## Plotting a density plot.

## Plotting a CV plot.

## Naively calculating coefficient of variation/dispersion with respect to condition.

## Finished calculating dispersion estimates.

## Plotting the representation of the top-n genes.

## Plotting the expression of the top-n PC loaded genes.

## Printing a color to condition legend.

all_plots$libsize

## Crap in a hat.
all_plots$nonzero

norm_plots$corheat

norm_plots$pc_plot

1.4 Simple pairwise

de_result <- all_pairwise(all_expt, model_batch=FALSE)

## This function will replace the expt$expressionset slot with:

## log2(cpm(quant(cbcb(data))))

## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.

## Step 1: performing count filter with option: cbcb

## Removing 136 low-count genes (1814 remaining).

## Step 2: normalizing the data with quant.

## Step 3: converting the data with cpm.

## Step 4: transforming the data with log2.

## Step 5: not doing batch correction.

## Plotting a PCA before surrogates/batch inclusion.

## Assuming no batch in model for testing pca.

## Finished running DE analyses, collecting outputs.

## Comparing analyses.

keepers <- list(
  "thy_scfd_vs_thy_wt" = c("scfd_ll_thy", "wt_ll_thy"),
  "thy_scfab_vs_thy_wt" = c("scfab_ll_thy", "wt_ll_thy"),
  "cdm_scfd_vs_cdm_wt" = c("scfd_ll_cdm", "wt_ll_cdm"),
  "cdm_scfab_vs_cdm_wt" = c("scfab_ll_cdm", "wt_ll_cdm"),
  "wt_cdm_thy" = c("wt_ll_cdm", "wt_ll_thy"),
  "scfd_cdm_thy" = c("scfd_ll_cdm", "scfd_ll_thy"),
  "scfab_cdm_thy" = c("scfab_ll_cdm", "scfab_ll_thy"))
de_tables <- combine_de_tables(de_result, keepers=keepers,
                               excel=glue::glue("excel/de_tables-v{ver}.xlsx"))

## Deleting the file excel/de_tables-v20190916.xlsx before writing the tables.

## Writing a legend of columns.

## Printing a pca plot before/after surrogates/batch estimation.

## Working on 1/7: thy_scfd_vs_thy_wt which is: scfd_ll_thy/wt_ll_thy.

## Found inverse table with wt_ll_thy_vs_scfd_ll_thy

## The ebseq table is null.

## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.

## Working on 2/7: thy_scfab_vs_thy_wt which is: scfab_ll_thy/wt_ll_thy.

## Found inverse table with wt_ll_thy_vs_scfab_ll_thy

## The ebseq table is null.

## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.

## Working on 3/7: cdm_scfd_vs_cdm_wt which is: scfd_ll_cdm/wt_ll_cdm.

## Found inverse table with wt_ll_cdm_vs_scfd_ll_cdm

## The ebseq table is null.

## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.

## Working on 4/7: cdm_scfab_vs_cdm_wt which is: scfab_ll_cdm/wt_ll_cdm.

## Found inverse table with wt_ll_cdm_vs_scfab_ll_cdm

## The ebseq table is null.

## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.

## Working on 5/7: wt_cdm_thy which is: wt_ll_cdm/wt_ll_thy.

## Found inverse table with wt_ll_thy_vs_wt_ll_cdm

## The ebseq table is null.

## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.

## Working on 6/7: scfd_cdm_thy which is: scfd_ll_cdm/scfd_ll_thy.

## Found inverse table with scfd_ll_thy_vs_scfd_ll_cdm

## The ebseq table is null.

## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.

## Working on 7/7: scfab_cdm_thy which is: scfab_ll_cdm/scfab_ll_thy.

## Found inverse table with scfab_ll_thy_vs_scfab_ll_cdm

## The ebseq table is null.

## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.

## Adding venn plots for thy_scfd_vs_thy_wt.

## Limma expression coefficients for thy_scfd_vs_thy_wt; R^2: 0.976; equation: y = 0.988x + 0.00655

## Edger expression coefficients for thy_scfd_vs_thy_wt; R^2: 0.978; equation: y = 0.985x + 0.147

## DESeq2 expression coefficients for thy_scfd_vs_thy_wt; R^2: 0.971; equation: y = 0.977x + 0.259

## Adding venn plots for thy_scfab_vs_thy_wt.

## Limma expression coefficients for thy_scfab_vs_thy_wt; R^2: 0.978; equation: y = 0.983x + 0.0594

## Edger expression coefficients for thy_scfab_vs_thy_wt; R^2: 0.979; equation: y = 0.958x + 0.445

## DESeq2 expression coefficients for thy_scfab_vs_thy_wt; R^2: 0.972; equation: y = 0.947x + 0.605

## Adding venn plots for cdm_scfd_vs_cdm_wt.

## Limma expression coefficients for cdm_scfd_vs_cdm_wt; R^2: 0.953; equation: y = 0.973x + 0.113

## Edger expression coefficients for cdm_scfd_vs_cdm_wt; R^2: 0.955; equation: y = 0.947x + 0.505

## DESeq2 expression coefficients for cdm_scfd_vs_cdm_wt; R^2: 0.943; equation: y = 0.928x + 0.801

## Adding venn plots for cdm_scfab_vs_cdm_wt.

## Limma expression coefficients for cdm_scfab_vs_cdm_wt; R^2: 0.988; equation: y = 0.995x + 0.0121

## Edger expression coefficients for cdm_scfab_vs_cdm_wt; R^2: 0.988; equation: y = 1.02x - 0.264

## DESeq2 expression coefficients for cdm_scfab_vs_cdm_wt; R^2: 0.985; equation: y = 1.03x - 0.419

## Adding venn plots for wt_cdm_thy.

## Limma expression coefficients for wt_cdm_thy; R^2: 0.962; equation: y = 0.969x + 0.213

## Edger expression coefficients for wt_cdm_thy; R^2: 0.964; equation: y = 0.967x + 0.324

## DESeq2 expression coefficients for wt_cdm_thy; R^2: 0.952; equation: y = 0.959x + 0.435

## Adding venn plots for scfd_cdm_thy.

## Limma expression coefficients for scfd_cdm_thy; R^2: 0.92; equation: y = 0.956x + 0.314

## Edger expression coefficients for scfd_cdm_thy; R^2: 0.924; equation: y = 0.928x + 0.705

## DESeq2 expression coefficients for scfd_cdm_thy; R^2: 0.898; equation: y = 0.911x + 0.972

## Adding venn plots for scfab_cdm_thy.

## Limma expression coefficients for scfab_cdm_thy; R^2: 0.924; equation: y = 0.954x + 0.341

## Edger expression coefficients for scfab_cdm_thy; R^2: 0.927; equation: y = 1x - 0.0572

## DESeq2 expression coefficients for scfab_cdm_thy; R^2: 0.907; equation: y = 1.01x - 0.164

## Writing summary information.

## Performing save of excel/de_tables-v20190916.xlsx.

de_sig <- extract_significant_genes(de_tables,
                                    excel=glue::glue("excel/de_sig-v{ver}.xlsx"))

## Writing a legend of columns.

## Did not find the ebseq_logfc, skipping ebseq.

## Writing excel data according to limma for thy_scfd_vs_thy_wt: 1/28.

## After (adj)p filter, the up genes table has 380 genes.

## After (adj)p filter, the down genes table has 441 genes.

## After fold change filter, the up genes table has 136 genes.

## After fold change filter, the down genes table has 37 genes.

## Writing excel data according to limma for thy_scfab_vs_thy_wt: 2/28.

## After (adj)p filter, the up genes table has 392 genes.

## After (adj)p filter, the down genes table has 451 genes.

## After fold change filter, the up genes table has 132 genes.

## After fold change filter, the down genes table has 33 genes.

## Writing excel data according to limma for cdm_scfd_vs_cdm_wt: 3/28.

## After (adj)p filter, the up genes table has 564 genes.

## After (adj)p filter, the down genes table has 659 genes.

## After fold change filter, the up genes table has 226 genes.

## After fold change filter, the down genes table has 171 genes.

## Writing excel data according to limma for cdm_scfab_vs_cdm_wt: 4/28.

## After (adj)p filter, the up genes table has 280 genes.

## After (adj)p filter, the down genes table has 330 genes.

## After fold change filter, the up genes table has 44 genes.

## After fold change filter, the down genes table has 16 genes.

## Writing excel data according to limma for wt_cdm_thy: 5/28.

## After (adj)p filter, the up genes table has 508 genes.

## After (adj)p filter, the down genes table has 503 genes.

## After fold change filter, the up genes table has 137 genes.

## After fold change filter, the down genes table has 208 genes.

## Writing excel data according to limma for scfd_cdm_thy: 6/28.

## After (adj)p filter, the up genes table has 663 genes.

## After (adj)p filter, the down genes table has 643 genes.

## After fold change filter, the up genes table has 313 genes.

## After fold change filter, the down genes table has 324 genes.

## Writing excel data according to limma for scfab_cdm_thy: 7/28.

## After (adj)p filter, the up genes table has 761 genes.

## After (adj)p filter, the down genes table has 630 genes.

## After fold change filter, the up genes table has 286 genes.

## After fold change filter, the down genes table has 326 genes.

## Printing significant genes to the file: excel/de_sig-v20190916.xlsx

## 1/7: Creating significant table up_limma_thy_scfd_vs_thy_wt

## 2/7: Creating significant table up_limma_thy_scfab_vs_thy_wt

## 3/7: Creating significant table up_limma_cdm_scfd_vs_cdm_wt

## 4/7: Creating significant table up_limma_cdm_scfab_vs_cdm_wt

## 5/7: Creating significant table up_limma_wt_cdm_thy

## 6/7: Creating significant table up_limma_scfd_cdm_thy

## 7/7: Creating significant table up_limma_scfab_cdm_thy

## Writing excel data according to edger for thy_scfd_vs_thy_wt: 1/28.

## After (adj)p filter, the up genes table has 411 genes.

## After (adj)p filter, the down genes table has 354 genes.

## After fold change filter, the up genes table has 174 genes.

## After fold change filter, the down genes table has 36 genes.

## Writing excel data according to edger for thy_scfab_vs_thy_wt: 2/28.

## After (adj)p filter, the up genes table has 449 genes.

## After (adj)p filter, the down genes table has 369 genes.

## After fold change filter, the up genes table has 173 genes.

## After fold change filter, the down genes table has 18 genes.

## Writing excel data according to edger for cdm_scfd_vs_cdm_wt: 3/28.

## After (adj)p filter, the up genes table has 566 genes.

## After (adj)p filter, the down genes table has 597 genes.

## After fold change filter, the up genes table has 251 genes.

## After fold change filter, the down genes table has 160 genes.

## Writing excel data according to edger for cdm_scfab_vs_cdm_wt: 4/28.

## After (adj)p filter, the up genes table has 262 genes.

## After (adj)p filter, the down genes table has 284 genes.

## After fold change filter, the up genes table has 38 genes.

## After fold change filter, the down genes table has 48 genes.

## Writing excel data according to edger for wt_cdm_thy: 5/28.

## After (adj)p filter, the up genes table has 512 genes.

## After (adj)p filter, the down genes table has 501 genes.

## After fold change filter, the up genes table has 155 genes.

## After fold change filter, the down genes table has 234 genes.

## Writing excel data according to edger for scfd_cdm_thy: 6/28.

## After (adj)p filter, the up genes table has 659 genes.

## After (adj)p filter, the down genes table has 657 genes.

## After fold change filter, the up genes table has 315 genes.

## After fold change filter, the down genes table has 341 genes.

## Writing excel data according to edger for scfab_cdm_thy: 7/28.

## After (adj)p filter, the up genes table has 655 genes.

## After (adj)p filter, the down genes table has 666 genes.

## After fold change filter, the up genes table has 249 genes.

## After fold change filter, the down genes table has 381 genes.

## Printing significant genes to the file: excel/de_sig-v20190916.xlsx

## 1/7: Creating significant table up_edger_thy_scfd_vs_thy_wt

## 2/7: Creating significant table up_edger_thy_scfab_vs_thy_wt

## 3/7: Creating significant table up_edger_cdm_scfd_vs_cdm_wt

## 4/7: Creating significant table up_edger_cdm_scfab_vs_cdm_wt

## 5/7: Creating significant table up_edger_wt_cdm_thy

## 6/7: Creating significant table up_edger_scfd_cdm_thy

## 7/7: Creating significant table up_edger_scfab_cdm_thy

## Writing excel data according to deseq for thy_scfd_vs_thy_wt: 1/28.

## After (adj)p filter, the up genes table has 454 genes.

## After (adj)p filter, the down genes table has 379 genes.

## After fold change filter, the up genes table has 176 genes.

## After fold change filter, the down genes table has 32 genes.

## Writing excel data according to deseq for thy_scfab_vs_thy_wt: 2/28.

## After (adj)p filter, the up genes table has 464 genes.

## After (adj)p filter, the down genes table has 430 genes.

## After fold change filter, the up genes table has 166 genes.

## After fold change filter, the down genes table has 20 genes.

## Writing excel data according to deseq for cdm_scfd_vs_cdm_wt: 3/28.

## After (adj)p filter, the up genes table has 615 genes.

## After (adj)p filter, the down genes table has 583 genes.

## After fold change filter, the up genes table has 257 genes.

## After fold change filter, the down genes table has 136 genes.

## Writing excel data according to deseq for cdm_scfab_vs_cdm_wt: 4/28.

## After (adj)p filter, the up genes table has 306 genes.

## After (adj)p filter, the down genes table has 335 genes.

## After fold change filter, the up genes table has 38 genes.

## After fold change filter, the down genes table has 47 genes.

## Writing excel data according to deseq for wt_cdm_thy: 5/28.

## After (adj)p filter, the up genes table has 540 genes.

## After (adj)p filter, the down genes table has 527 genes.

## After fold change filter, the up genes table has 148 genes.

## After fold change filter, the down genes table has 232 genes.

## Writing excel data according to deseq for scfd_cdm_thy: 6/28.

## After (adj)p filter, the up genes table has 682 genes.

## After (adj)p filter, the down genes table has 664 genes.

## After fold change filter, the up genes table has 315 genes.

## After fold change filter, the down genes table has 332 genes.

## Writing excel data according to deseq for scfab_cdm_thy: 7/28.

## After (adj)p filter, the up genes table has 680 genes.

## After (adj)p filter, the down genes table has 672 genes.

## After fold change filter, the up genes table has 251 genes.

## After fold change filter, the down genes table has 373 genes.

## Printing significant genes to the file: excel/de_sig-v20190916.xlsx

## 1/7: Creating significant table up_deseq_thy_scfd_vs_thy_wt

## 2/7: Creating significant table up_deseq_thy_scfab_vs_thy_wt

## 3/7: Creating significant table up_deseq_cdm_scfd_vs_cdm_wt

## 4/7: Creating significant table up_deseq_cdm_scfab_vs_cdm_wt

## 5/7: Creating significant table up_deseq_wt_cdm_thy

## 6/7: Creating significant table up_deseq_scfd_cdm_thy

## 7/7: Creating significant table up_deseq_scfab_cdm_thy

## Writing excel data according to basic for thy_scfd_vs_thy_wt: 1/28.

## After (adj)p filter, the up genes table has 307 genes.

## After (adj)p filter, the down genes table has 330 genes.

## After fold change filter, the up genes table has 111 genes.

## After fold change filter, the down genes table has 19 genes.

## Writing excel data according to basic for thy_scfab_vs_thy_wt: 2/28.

## After (adj)p filter, the up genes table has 260 genes.

## After (adj)p filter, the down genes table has 324 genes.

## After fold change filter, the up genes table has 89 genes.

## After fold change filter, the down genes table has 20 genes.

## Writing excel data according to basic for cdm_scfd_vs_cdm_wt: 3/28.

## After (adj)p filter, the up genes table has 421 genes.

## After (adj)p filter, the down genes table has 502 genes.

## After fold change filter, the up genes table has 204 genes.

## After fold change filter, the down genes table has 162 genes.

## Writing excel data according to basic for cdm_scfab_vs_cdm_wt: 4/28.

## After (adj)p filter, the up genes table has 2 genes.

## After (adj)p filter, the down genes table has 4 genes.

## After fold change filter, the up genes table has 0 genes.

## After fold change filter, the down genes table has 3 genes.

## Writing excel data according to basic for wt_cdm_thy: 5/28.

## After (adj)p filter, the up genes table has 387 genes.

## After (adj)p filter, the down genes table has 393 genes.

## After fold change filter, the up genes table has 115 genes.

## After fold change filter, the down genes table has 188 genes.

## Writing excel data according to basic for scfd_cdm_thy: 6/28.

## After (adj)p filter, the up genes table has 592 genes.

## After (adj)p filter, the down genes table has 585 genes.

## After fold change filter, the up genes table has 291 genes.

## After fold change filter, the down genes table has 315 genes.

## Writing excel data according to basic for scfab_cdm_thy: 7/28.

## After (adj)p filter, the up genes table has 641 genes.

## After (adj)p filter, the down genes table has 550 genes.

## After fold change filter, the up genes table has 260 genes.

## After fold change filter, the down genes table has 306 genes.

## Printing significant genes to the file: excel/de_sig-v20190916.xlsx

## 1/7: Creating significant table up_basic_thy_scfd_vs_thy_wt

## 2/7: Creating significant table up_basic_thy_scfab_vs_thy_wt

## 3/7: Creating significant table up_basic_cdm_scfd_vs_cdm_wt

## The up table cdm_scfab_vs_cdm_wt is empty.

## 5/7: Creating significant table up_basic_wt_cdm_thy

## 6/7: Creating significant table up_basic_scfd_cdm_thy

## 7/7: Creating significant table up_basic_scfab_cdm_thy

## Adding significance bar plots.

1.5 Quick ontology searches

thy_scfd_wt_sig <- de_sig[["deseq"]][["ups"]][[1]]
thy_scfab_wt_sig <- de_sig[["deseq"]][["ups"]][[2]]
cdm_scfd_wt_sig <- de_sig[["deseq"]][["ups"]][[3]]
cdm_scfab_wt_sig <- de_sig[["deseq"]][["ups"]][[4]]
wt_cdm_thy <- de_sig[["deseq"]][["ups"]][[5]]
scfd_cdm_thy <- de_sig[["deseq"]][["ups"]][[6]]
scfab_cdm_thy <- de_sig[["deseq"]][["ups"]][[7]]

thy_scfd_wt_goseq <- simple_goseq(sig_genes=thy_scfd_wt_sig, go_db=mic_go, length_db=length_db)

## Using the row names of your table.

## Found 146 genes out of 176 from the sig_genes in the go_db.

## Found 176 genes out of 176 from the sig_genes in the length_db.

## Using manually entered categories.

## Calculating the p-values...

## 'select()' returned 1:1 mapping between keys and columns

## simple_goseq(): Calculating q-values

## simple_goseq(): Filling godata with terms, this is slow.

## Testing that go categories are defined.

## Removing undefined categories.

## Gathering synonyms.

## Gathering category definitions.

## simple_goseq(): Making pvalue plots for the ontologies.

thy_scfd_wt_goseq[["pvalue_plots"]][["bpp_plot_over"]]

thy_scfab_wt_goseq <- simple_goseq(sig_genes=thy_scfab_wt_sig, go_db=mic_go, length_db=length_db)

## Using the row names of your table.

## Found 119 genes out of 166 from the sig_genes in the go_db.

## Found 166 genes out of 166 from the sig_genes in the length_db.

## Using manually entered categories.

## Calculating the p-values...

## 'select()' returned 1:1 mapping between keys and columns

## simple_goseq(): Calculating q-values

## simple_goseq(): Filling godata with terms, this is slow.

## Testing that go categories are defined.

## Removing undefined categories.

## Gathering synonyms.

## Gathering category definitions.

## simple_goseq(): Making pvalue plots for the ontologies.

thy_scfab_wt_goseq[["pvalue_plots"]][["bpp_plot_over"]]

cdm_scfd_wt_goseq <- simple_goseq(sig_genes=cdm_scfd_wt_sig, go_db=mic_go, length_db=length_db)

## Using the row names of your table.

## Found 160 genes out of 257 from the sig_genes in the go_db.

## Found 257 genes out of 257 from the sig_genes in the length_db.

## Using manually entered categories.

## Calculating the p-values...

## 'select()' returned 1:1 mapping between keys and columns

## simple_goseq(): Calculating q-values

## simple_goseq(): Filling godata with terms, this is slow.

## Testing that go categories are defined.

## Removing undefined categories.

## Gathering synonyms.

## Gathering category definitions.

## simple_goseq(): Making pvalue plots for the ontologies.

cdm_scfd_wt_goseq[["pvalue_plots"]][["bpp_plot_over"]]

cdm_scfab_wt_goseq <- simple_goseq(sig_genes=cdm_scfab_wt_sig, go_db=mic_go, length_db=length_db)

## Using the row names of your table.

## Found 20 genes out of 38 from the sig_genes in the go_db.

## Found 38 genes out of 38 from the sig_genes in the length_db.

## Using manually entered categories.

## Calculating the p-values...

## 'select()' returned 1:1 mapping between keys and columns

## simple_goseq(): Calculating q-values

## simple_goseq(): Filling godata with terms, this is slow.

## Testing that go categories are defined.

## Removing undefined categories.

## Gathering synonyms.

## Gathering category definitions.

## simple_goseq(): Making pvalue plots for the ontologies.

cdm_scfab_wt_goseq[["pvalue_plots"]][["bpp_plot_over"]]

wt_cdm_thy_goseq <- simple_goseq(sig_genes=wt_cdm_thy, go_db=mic_go, length_db=length_db)

## Using the row names of your table.

## Found 88 genes out of 148 from the sig_genes in the go_db.

## Found 148 genes out of 148 from the sig_genes in the length_db.

## Using manually entered categories.

## Calculating the p-values...

## 'select()' returned 1:1 mapping between keys and columns

## simple_goseq(): Calculating q-values

## simple_goseq(): Filling godata with terms, this is slow.

## Testing that go categories are defined.

## Removing undefined categories.

## Gathering synonyms.

## Gathering category definitions.

## simple_goseq(): Making pvalue plots for the ontologies.

wt_cdm_thy_goseq[["pvalue_plots"]][["bpp_plot_over"]]

scfd_cdm_thy_goseq <- simple_goseq(sig_genes=scfd_cdm_thy, go_db=mic_go, length_db=length_db)

## Using the row names of your table.

## Found 159 genes out of 315 from the sig_genes in the go_db.

## Found 315 genes out of 315 from the sig_genes in the length_db.

## Using manually entered categories.

## Calculating the p-values...

## 'select()' returned 1:1 mapping between keys and columns

## simple_goseq(): Calculating q-values

## simple_goseq(): Filling godata with terms, this is slow.

## Testing that go categories are defined.

## Removing undefined categories.

## Gathering synonyms.

## Gathering category definitions.

## simple_goseq(): Making pvalue plots for the ontologies.

scfd_cdm_thy_goseq[["pvalue_plots"]][["bpp_plot_over"]]

scfab_cdm_thy_goseq <- simple_goseq(sig_genes=scfab_cdm_thy, go_db=mic_go, length_db=length_db)

## Using the row names of your table.

## Found 148 genes out of 251 from the sig_genes in the go_db.

## Found 251 genes out of 251 from the sig_genes in the length_db.

## Using manually entered categories.

## Calculating the p-values...

## 'select()' returned 1:1 mapping between keys and columns

## simple_goseq(): Calculating q-values

## simple_goseq(): Filling godata with terms, this is slow.

## Testing that go categories are defined.

## Removing undefined categories.

## Gathering synonyms.

## Gathering category definitions.

## simple_goseq(): Making pvalue plots for the ontologies.

scfab_cdm_thy_goseq[["pvalue_plots"]][["bpp_plot_over"]]

if (!isTRUE(get0("skip_load"))) {
  pander::pander(sessionInfo())
  message(paste0("This is hpgltools commit: ", get_git_commit()))
  this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
  message(paste0("Saving to ", this_save))
  tmp <- sm(saveme(filename=this_save))
}

## If you wish to reproduce this exact build of hpgltools, invoke the following:

## > git clone http://github.com/abelew/hpgltools.git

## > git reset f3c1e03852c87dc60c7e72e726bb640572e695ff

## This is hpgltools commit: Thu Aug 22 15:32:44 2019 -0400: f3c1e03852c87dc60c7e72e726bb640572e695ff

## Saving to index-v20190916.rda.xz

LS0tCnRpdGxlOiAiUy5weW9nZW5lcyA1MDA1IHJuYXNlcTogMyBnZW5vdHlwZXMsIDIgbWVkaWEgdHlwZXMuIgphdXRob3I6ICJhdGIgYWJlbGV3QGdtYWlsLmNvbSIKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiBodG1sX2RvY3VtZW50OgogIGNvZGVfZG93bmxvYWQ6IHRydWUKICBjb2RlX2ZvbGRpbmc6IHNob3cKICBmaWdfY2FwdGlvbjogdHJ1ZQogIGZpZ19oZWlnaHQ6IDcKICBmaWdfd2lkdGg6IDcKICBoaWdobGlnaHQ6IGRlZmF1bHQKICBrZWVwX21kOiBmYWxzZQogIG1vZGU6IHNlbGZjb250YWluZWQKICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICBzZWxmX2NvbnRhaW5lZDogdHJ1ZQogIHRoZW1lOiByZWFkYWJsZQogIHRvYzogdHJ1ZQogIHRvY19mbG9hdDoKICAgIGNvbGxhcHNlZDogZmFsc2UKICAgIHNtb290aF9zY3JvbGw6IGZhbHNlCi0tLQoKPHN0eWxlPgogIGJvZHkgLm1haW4tY29udGFpbmVyIHsKICAgIG1heC13aWR0aDogMTYwMHB4OwogIH0KPC9zdHlsZT4KCmBgYHtyIG9wdGlvbnMsIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkoaHBnbHRvb2xzKQp0dCA8LSBkZXZ0b29sczo6bG9hZF9hbGwoIi9kYXRhL2hwZ2x0b29scyIpCmtuaXRyOjpvcHRzX2tuaXQkc2V0KHByb2dyZXNzPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgIHZlcmJvc2U9VFJVRSwKICAgICAgICAgICAgICAgICAgICAgd2lkdGg9OTAsCiAgICAgICAgICAgICAgICAgICAgIGVjaG89VFJVRSkKa25pdHI6Om9wdHNfY2h1bmskc2V0KGVycm9yPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgICBmaWcud2lkdGg9OCwKICAgICAgICAgICAgICAgICAgICAgIGZpZy5oZWlnaHQ9OCwKICAgICAgICAgICAgICAgICAgICAgIGRwaT05NikKb2xkX29wdGlvbnMgPC0gb3B0aW9ucyhkaWdpdHM9NCwKICAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICAgIGtuaXRyLmR1cGxpY2F0ZS5sYWJlbD0iYWxsb3ciKQpnZ3Bsb3QyOjp0aGVtZV9zZXQoZ2dwbG90Mjo6dGhlbWVfYncoYmFzZV9zaXplPTEwKSkKc2V0LnNlZWQoMSkKdmVyIDwtICIyMDE5MDkxNiIKcm1kX2ZpbGUgPC0gImluZGV4LlJtZCIKYGBgCgojIFMucHlvZ2VuZXMgUk5BU2VxIHZlcnNpb246IGByIHZlcmAKCiMjIENvbGxlY3QgYW5ub3RhdGlvbiBpbmZvcm1hdGlvbgoKU2FtZSB0d28gcHJpbWFyeSBhbm5vdGF0aW9uIHNvdXJjZXMsIHRoZSBnZmYgZmlsZSB1c2VkIGZvciBtYXBwaW5nL2NvdW50aW5nLAphbmQgbWljcm9iZXNvbmxpbmUub3JnLiAgTm90ZSB0aGF0IHNpbmNlIEkgbW92ZWQgdG8ganVzdCBkb3dubG9hZGluZyB0aGUKbWF0ZXJpYWwgZnJvbSB0aGUgd2ViIGludGVyZmFjZSwgSSBubyBsb25nZXIgaGF2ZSBhIGhhbmR5IG1ldGhvZCB0byBnZXQgdGhlCnRheG9uIElELCBzbyBJIGdvIHRoZXJlIGFuZCBodW50IGRvd24gdGhlIHRheElkIG1hbnVhbGx5LgoKSW4gYWRkaXRpb24sIHRoZSBJRHMgaW4gbXkgY291bnQgdGFibGVzIGFyZSAnZ2VuZTAnLCBzbyB0aGV5IGNvbWUgZnJvbSB0aGUKZ2ZmIGZpZWxkcyBvZiB0eXBlICdnZW5lJy4KCmBgYHtyIGFubm90YXRpb259CmdmZl9hbm5vdCA8LSBsb2FkX2dmZl9hbm5vdGF0aW9ucygicmVmZXJlbmNlL3NweW9nZW5lc181MDA1LmdmZiIsIHR5cGU9ImdlbmUiKQpyb3duYW1lcyhnZmZfYW5ub3QpIDwtIGdmZl9hbm5vdFtbIklEIl1dCmhlYWQoZ2ZmX2Fubm90KQptaWNfYW5ub3QgPC0gbG9hZF9taWNyb2Jlc29ubGluZV9hbm5vdGF0aW9ucygiMjkzNjUzIikKaGVhZChtaWNfYW5ub3QpCmFsbF9hbm5vdCA8LSBtZXJnZShnZmZfYW5ub3QsIG1pY19hbm5vdCwgYnkueD0ibG9jdXNfdGFnIiwgYnkueT0ic3lzTmFtZSIpCnJvd25hbWVzKGFsbF9hbm5vdCkgPC0gYWxsX2Fubm90W1siSUQiXV0KCm1pY19nbyA8LSBsb2FkX21pY3JvYmVzb25saW5lX2dvKHRhYmxlX2RmPWFsbF9hbm5vdCwgaWRfY29sdW1uPSJJRCIpCmxlbmd0aF9kYiA8LSBhbGxfYW5ub3RbLCBjKCJJRCIsICJ3aWR0aCIpXQpgYGAKCiMjIENyZWF0ZSBleHByZXNzaW9uU2V0CgpgYGB7ciBleHB0fQphbGxfZXhwdCA8LSBjcmVhdGVfZXhwdChtZXRhZGF0YT0ic2FtcGxlX3NoZWV0cy9hbGxfc2FtcGxlc18yMDE5MDkueGxzeCIsCiAgICAgICAgICAgICAgICAgICAgICAgIGdlbmVfaW5mbz1hbGxfYW5ub3QsIGZpbGVfY29sdW1uPSJidDJfZmlsZSIpCmBgYAoKIyMgUG9rZSBleHByZXNzaW9uU2V0CgpgYGB7ciBwb2tlLCBmaWcuc2hvdz0iaGlkZSJ9CndyaXR0ZW4gPC0gd3JpdGVfZXhwdChhbGxfZXhwdCwgZXhjZWw9Z2x1ZTo6Z2x1ZSgiYWxsX2V4cHQtdnt2ZXJ9Lnhsc3giKSkKYWxsX3Bsb3RzIDwtIGdyYXBoX21ldHJpY3MoYWxsX2V4cHQpCmFsbF9ub3JtIDwtIG5vcm1hbGl6ZV9leHB0KGFsbF9leHB0LCBmaWx0ZXI9VFJVRSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgbm9ybT0icXVhbnQiLGNvbnZlcnQ9ImNwbSIsIHRyYW5zZm9ybT0ibG9nMiIpCmFsbF9ub3JtYmF0Y2ggPC0gbm9ybWFsaXplX2V4cHQoYWxsX2V4cHQsIGZpbHRlcj0icG9mYSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYmF0Y2g9InN2YXNlcSIsIG5vcm09InF1YW50IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb252ZXJ0PSJjcG0iLCB0cmFuc2Zvcm09ImxvZzIiKQpub3JtX3Bsb3RzIDwtIGdyYXBoX21ldHJpY3MoYWxsX25vcm0pCm5vcm1iYXRjaF9wbG90cyA8LSBncmFwaF9tZXRyaWNzKGFsbF9ub3JtYmF0Y2gpCmBgYAoKYGBge3IgdW5fbm9ybV9wbG90c30KYWxsX3Bsb3RzJGxpYnNpemUKIyMgQ3JhcCBpbiBhIGhhdC4KYWxsX3Bsb3RzJG5vbnplcm8KCm5vcm1fcGxvdHMkY29yaGVhdApub3JtX3Bsb3RzJHBjX3Bsb3QKYGBgCgojIyBTaW1wbGUgcGFpcndpc2UKCmBgYHtyIHBhaXJ3aXNlfQpkZV9yZXN1bHQgPC0gYWxsX3BhaXJ3aXNlKGFsbF9leHB0LCBtb2RlbF9iYXRjaD1GQUxTRSkKa2VlcGVycyA8LSBsaXN0KAogICJ0aHlfc2NmZF92c190aHlfd3QiID0gYygic2NmZF9sbF90aHkiLCAid3RfbGxfdGh5IiksCiAgInRoeV9zY2ZhYl92c190aHlfd3QiID0gYygic2NmYWJfbGxfdGh5IiwgInd0X2xsX3RoeSIpLAogICJjZG1fc2NmZF92c19jZG1fd3QiID0gYygic2NmZF9sbF9jZG0iLCAid3RfbGxfY2RtIiksCiAgImNkbV9zY2ZhYl92c19jZG1fd3QiID0gYygic2NmYWJfbGxfY2RtIiwgInd0X2xsX2NkbSIpLAogICJ3dF9jZG1fdGh5IiA9IGMoInd0X2xsX2NkbSIsICJ3dF9sbF90aHkiKSwKICAic2NmZF9jZG1fdGh5IiA9IGMoInNjZmRfbGxfY2RtIiwgInNjZmRfbGxfdGh5IiksCiAgInNjZmFiX2NkbV90aHkiID0gYygic2NmYWJfbGxfY2RtIiwgInNjZmFiX2xsX3RoeSIpKQpkZV90YWJsZXMgPC0gY29tYmluZV9kZV90YWJsZXMoZGVfcmVzdWx0LCBrZWVwZXJzPWtlZXBlcnMsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBleGNlbD1nbHVlOjpnbHVlKCJleGNlbC9kZV90YWJsZXMtdnt2ZXJ9Lnhsc3giKSkKZGVfc2lnIDwtIGV4dHJhY3Rfc2lnbmlmaWNhbnRfZ2VuZXMoZGVfdGFibGVzLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBleGNlbD1nbHVlOjpnbHVlKCJleGNlbC9kZV9zaWctdnt2ZXJ9Lnhsc3giKSkKYGBgCgojIyBRdWljayBvbnRvbG9neSBzZWFyY2hlcwoKYGBge3Igb250fQp0aHlfc2NmZF93dF9zaWcgPC0gZGVfc2lnW1siZGVzZXEiXV1bWyJ1cHMiXV1bWzFdXQp0aHlfc2NmYWJfd3Rfc2lnIDwtIGRlX3NpZ1tbImRlc2VxIl1dW1sidXBzIl1dW1syXV0KY2RtX3NjZmRfd3Rfc2lnIDwtIGRlX3NpZ1tbImRlc2VxIl1dW1sidXBzIl1dW1szXV0KY2RtX3NjZmFiX3d0X3NpZyA8LSBkZV9zaWdbWyJkZXNlcSJdXVtbInVwcyJdXVtbNF1dCnd0X2NkbV90aHkgPC0gZGVfc2lnW1siZGVzZXEiXV1bWyJ1cHMiXV1bWzVdXQpzY2ZkX2NkbV90aHkgPC0gZGVfc2lnW1siZGVzZXEiXV1bWyJ1cHMiXV1bWzZdXQpzY2ZhYl9jZG1fdGh5IDwtIGRlX3NpZ1tbImRlc2VxIl1dW1sidXBzIl1dW1s3XV0KCnRoeV9zY2ZkX3d0X2dvc2VxIDwtIHNpbXBsZV9nb3NlcShzaWdfZ2VuZXM9dGh5X3NjZmRfd3Rfc2lnLCBnb19kYj1taWNfZ28sIGxlbmd0aF9kYj1sZW5ndGhfZGIpCnRoeV9zY2ZkX3d0X2dvc2VxW1sicHZhbHVlX3Bsb3RzIl1dW1siYnBwX3Bsb3Rfb3ZlciJdXQoKdGh5X3NjZmFiX3d0X2dvc2VxIDwtIHNpbXBsZV9nb3NlcShzaWdfZ2VuZXM9dGh5X3NjZmFiX3d0X3NpZywgZ29fZGI9bWljX2dvLCBsZW5ndGhfZGI9bGVuZ3RoX2RiKQp0aHlfc2NmYWJfd3RfZ29zZXFbWyJwdmFsdWVfcGxvdHMiXV1bWyJicHBfcGxvdF9vdmVyIl1dCgpjZG1fc2NmZF93dF9nb3NlcSA8LSBzaW1wbGVfZ29zZXEoc2lnX2dlbmVzPWNkbV9zY2ZkX3d0X3NpZywgZ29fZGI9bWljX2dvLCBsZW5ndGhfZGI9bGVuZ3RoX2RiKQpjZG1fc2NmZF93dF9nb3NlcVtbInB2YWx1ZV9wbG90cyJdXVtbImJwcF9wbG90X292ZXIiXV0KCmNkbV9zY2ZhYl93dF9nb3NlcSA8LSBzaW1wbGVfZ29zZXEoc2lnX2dlbmVzPWNkbV9zY2ZhYl93dF9zaWcsIGdvX2RiPW1pY19nbywgbGVuZ3RoX2RiPWxlbmd0aF9kYikKY2RtX3NjZmFiX3d0X2dvc2VxW1sicHZhbHVlX3Bsb3RzIl1dW1siYnBwX3Bsb3Rfb3ZlciJdXQoKd3RfY2RtX3RoeV9nb3NlcSA8LSBzaW1wbGVfZ29zZXEoc2lnX2dlbmVzPXd0X2NkbV90aHksIGdvX2RiPW1pY19nbywgbGVuZ3RoX2RiPWxlbmd0aF9kYikKd3RfY2RtX3RoeV9nb3NlcVtbInB2YWx1ZV9wbG90cyJdXVtbImJwcF9wbG90X292ZXIiXV0KCnNjZmRfY2RtX3RoeV9nb3NlcSA8LSBzaW1wbGVfZ29zZXEoc2lnX2dlbmVzPXNjZmRfY2RtX3RoeSwgZ29fZGI9bWljX2dvLCBsZW5ndGhfZGI9bGVuZ3RoX2RiKQpzY2ZkX2NkbV90aHlfZ29zZXFbWyJwdmFsdWVfcGxvdHMiXV1bWyJicHBfcGxvdF9vdmVyIl1dCgpzY2ZhYl9jZG1fdGh5X2dvc2VxIDwtIHNpbXBsZV9nb3NlcShzaWdfZ2VuZXM9c2NmYWJfY2RtX3RoeSwgZ29fZGI9bWljX2dvLCBsZW5ndGhfZGI9bGVuZ3RoX2RiKQpzY2ZhYl9jZG1fdGh5X2dvc2VxW1sicHZhbHVlX3Bsb3RzIl1dW1siYnBwX3Bsb3Rfb3ZlciJdXQpgYGAKCmBgYHtyIHNhdmVtZX0KaWYgKCFpc1RSVUUoZ2V0MCgic2tpcF9sb2FkIikpKSB7CiAgcGFuZGVyOjpwYW5kZXIoc2Vzc2lvbkluZm8oKSkKICBtZXNzYWdlKHBhc3RlMCgiVGhpcyBpcyBocGdsdG9vbHMgY29tbWl0OiAiLCBnZXRfZ2l0X2NvbW1pdCgpKSkKICB0aGlzX3NhdmUgPC0gcGFzdGUwKGdzdWIocGF0dGVybj0iXFwuUm1kIiwgcmVwbGFjZT0iIiwgeD1ybWRfZmlsZSksICItdiIsIHZlciwgIi5yZGEueHoiKQogIG1lc3NhZ2UocGFzdGUwKCJTYXZpbmcgdG8gIiwgdGhpc19zYXZlKSkKICB0bXAgPC0gc20oc2F2ZW1lKGZpbGVuYW1lPXRoaXNfc2F2ZSkpCn0KYGBgCg==

S.pyogenes 5005 rnaseq: 3 genotypes, 2 media types.

atb abelew@gmail.com

2019-09-17