Same two primary annotation sources, the gff file used for mapping/counting, and microbesonline.org. Note that since I moved to just downloading the material from the web interface, I no longer have a handy method to get the taxon ID, so I go there and hunt down the taxId manually.
In addition, the IDs in my count tables are ‘gene0’, so they come from the gff fields of type ‘gene’.
gff_annot <- load_gff_annotations("reference/spyogenes_5005.gff", type="gene")
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Returning a df with 25 columns and 1950 rows.
rownames(gff_annot) <- gff_annot[["ID"]]
head(gff_annot)
## seqnames start end width strand source type score phase ID
## gene0 NC_007297 202 1557 1356 + RefSeq gene NA NA gene0
## gene1 NC_007297 1712 2848 1137 + RefSeq gene NA NA gene1
## gene2 NC_007297 2923 3120 198 + RefSeq gene NA NA gene2
## gene3 NC_007297 3450 4565 1116 + RefSeq gene NA NA gene3
## gene4 NC_007297 4635 5204 570 + RefSeq gene NA NA gene4
## gene5 NC_007297 5207 8710 3504 + RefSeq gene NA NA gene5
## Dbxref Is_circular gbkey genome mol_type strain
## gene0 GeneID:3571011 <NA> Gene <NA> <NA> <NA>
## gene1 GeneID:3571012 <NA> Gene <NA> <NA> <NA>
## gene2 GeneID:3571013 <NA> Gene <NA> <NA> <NA>
## gene3 GeneID:3571014 <NA> Gene <NA> <NA> <NA>
## gene4 GeneID:3571015 <NA> Gene <NA> <NA> <NA>
## gene5 GeneID:3571016 <NA> Gene <NA> <NA> <NA>
## Name Note gene locus_tag Parent product
## gene0 dnaA M5005_Spy0001 dnaA M5005_Spy_0001 <NA>
## gene1 dnaN M5005_Spy0002 dnaN M5005_Spy_0002 <NA>
## gene2 M5005_Spy_0003 M5005_Spy0003 <NA> M5005_Spy_0003 <NA>
## gene3 ychF M5005_Spy0004 ychF M5005_Spy_0004 <NA>
## gene4 pth M5005_Spy0005 pth M5005_Spy_0005 <NA>
## gene5 trcF M5005_Spy0006 trcF M5005_Spy_0006 <NA>
## protein_id transl_table gene_synonym
## gene0 <NA> <NA>
## gene1 <NA> <NA>
## gene2 <NA> <NA>
## gene3 <NA> <NA>
## gene4 <NA> <NA>
## gene5 <NA> <NA>
mic_annot <- load_microbesonline_annotations("293653")
## The species being downloaded is: Streptococcus pyogenes MGAS5005
head(mic_annot)
## # A tibble: 6 x 18
## locusId accession GI scaffoldId start stop strand sysName name
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 922226 YP_28136… 7.19e7 2067 202 1557 + M5005_… dnaA
## 2 922227 YP_28136… 7.19e7 2067 1712 2848 + M5005_… dnaN
## 3 922228 YP_28136… 7.19e7 2067 2923 3120 + M5005_… M500…
## 4 922229 YP_28136… 7.19e7 2067 3450 4565 + M5005_… M500…
## 5 922230 YP_28136… 7.19e7 2067 4635 5204 + M5005_… pth
## 6 922231 YP_28137… 7.19e7 2067 5207 8710 + M5005_… trcF
## # … with 9 more variables: desc <chr>, COG <chr>, COGFun <chr>,
## # COGDesc <chr>, TIGRFam <chr>, TIGRRoles <chr>, GO <chr>, EC <chr>,
## # ECDesc <chr>
all_annot <- merge(gff_annot, mic_annot, by.x="locus_tag", by.y="sysName")
rownames(all_annot) <- all_annot[["ID"]]
mic_go <- load_microbesonline_go(table_df=all_annot, id_column="ID")
length_db <- all_annot[, c("ID", "width")]
all_expt <- create_expt(metadata="sample_sheets/all_samples_201909.xlsx",
gene_info=all_annot, file_column="bt2_file")
## Reading the sample metadata.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## The sample definitions comprises: 24 rows(samples) and 25 columns(metadata fields).
## Reading count tables.
## Reading count tables with read.table().
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1175/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1176/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1177/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1178/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1179/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1180/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1181/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1182/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1183/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1184/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1185/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1186/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1187/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1188/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1189/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1190/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1191/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1192/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1193/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1194/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1195/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1196/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1197/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/spyogenes_rezia_201908/preprocessing/hpgl1198/outputs/bowtie2_spyogenes_5005/r1_trimmed.count.xz contains 1955 rows and merges to 1955 rows.
## Finished reading count tables.
## Matched 1950 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## The final expressionset has 1950 rows and 24 columns.
written <- write_expt(all_expt, excel=glue::glue("all_expt-v{ver}.xlsx"))
## Writing the first sheet, containing a legend and some summary data.
## Writing the raw reads.
## Graphing the raw reads.
## Writing the normalized reads.
## Graphing the normalized reads.
## Writing the median reads by factor.
## The factor wt_ll_thy has 4 rows.
## The factor scfd_ll_thy has 4 rows.
## The factor scfab_ll_thy has 4 rows.
## The factor wt_ll_cdm has 4 rows.
## The factor scfd_ll_cdm has 4 rows.
## The factor scfab_ll_cdm has 4 rows.
## Note: zip::zip() is deprecated, please use zip::zipr() instead
all_plots <- graph_metrics(all_expt)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## The scale difference between the smallest and largest
## libraries is > 10. Assuming a log10 scale is better, set scale=FALSE if not.
## Graphing a boxplot.
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, adding 1 to the data.
## Changed 1402 zero count features.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, setting them to 0.5.
## Changed 1402 zero count features.
## Plotting a CV plot.
## Naively calculating coefficient of variation/dispersion with respect to condition.
## Finished calculating dispersion estimates.
## Plotting the representation of the top-n genes.
## Plotting the expression of the top-n PC loaded genes.
## Printing a color to condition legend.
all_norm <- normalize_expt(all_expt, filter=TRUE,
norm="quant",convert="cpm", transform="log2")
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 136 low-count genes (1814 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## Step 5: not doing batch correction.
all_normbatch <- normalize_expt(all_expt, filter="pofa",
batch="svaseq", norm="quant",
convert="cpm", transform="log2")
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(quant(pofa(data)))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Warning in normalize_expt(all_expt, filter = "pofa", batch = "svaseq",
## norm = "quant", : Quantile normalization and sva do not always play well
## together.
## Step 1: performing count filter with option: pofa
## Removing 39 low-count genes (1911 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 43283 entries are x>1: 94.4%.
## batch_counts: Before batch/surrogate estimation, 2 entries are x==0: 0.00436%.
## batch_counts: Before batch/surrogate estimation, 2579 entries are 0<x<1: 5.62%.
## The be method chose 7 surrogate variable(s).
## Attempting svaseq estimation with 7 surrogates.
## There are 36 (0.0785%) elements which are < 0 after batch correction.
norm_plots <- graph_metrics(all_norm)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## Plotting a CV plot.
## Naively calculating coefficient of variation/dispersion with respect to condition.
## Finished calculating dispersion estimates.
## Plotting the representation of the top-n genes.
## Plotting the expression of the top-n PC loaded genes.
## Printing a color to condition legend.
normbatch_plots <- graph_metrics(all_normbatch)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## Plotting a CV plot.
## Naively calculating coefficient of variation/dispersion with respect to condition.
## Finished calculating dispersion estimates.
## Plotting the representation of the top-n genes.
## Plotting the expression of the top-n PC loaded genes.
## Printing a color to condition legend.
all_plots$libsize
## Crap in a hat.
all_plots$nonzero
norm_plots$corheat
norm_plots$pc_plot
de_result <- all_pairwise(all_expt, model_batch=FALSE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 136 low-count genes (1814 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## Step 5: not doing batch correction.
## Plotting a PCA before surrogates/batch inclusion.
## Assuming no batch in model for testing pca.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
keepers <- list(
"thy_scfd_vs_thy_wt" = c("scfd_ll_thy", "wt_ll_thy"),
"thy_scfab_vs_thy_wt" = c("scfab_ll_thy", "wt_ll_thy"),
"cdm_scfd_vs_cdm_wt" = c("scfd_ll_cdm", "wt_ll_cdm"),
"cdm_scfab_vs_cdm_wt" = c("scfab_ll_cdm", "wt_ll_cdm"),
"wt_cdm_thy" = c("wt_ll_cdm", "wt_ll_thy"),
"scfd_cdm_thy" = c("scfd_ll_cdm", "scfd_ll_thy"),
"scfab_cdm_thy" = c("scfab_ll_cdm", "scfab_ll_thy"))
de_tables <- combine_de_tables(de_result, keepers=keepers,
excel=glue::glue("excel/de_tables-v{ver}.xlsx"))
## Deleting the file excel/de_tables-v20190916.xlsx before writing the tables.
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on 1/7: thy_scfd_vs_thy_wt which is: scfd_ll_thy/wt_ll_thy.
## Found inverse table with wt_ll_thy_vs_scfd_ll_thy
## The ebseq table is null.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Working on 2/7: thy_scfab_vs_thy_wt which is: scfab_ll_thy/wt_ll_thy.
## Found inverse table with wt_ll_thy_vs_scfab_ll_thy
## The ebseq table is null.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Working on 3/7: cdm_scfd_vs_cdm_wt which is: scfd_ll_cdm/wt_ll_cdm.
## Found inverse table with wt_ll_cdm_vs_scfd_ll_cdm
## The ebseq table is null.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Working on 4/7: cdm_scfab_vs_cdm_wt which is: scfab_ll_cdm/wt_ll_cdm.
## Found inverse table with wt_ll_cdm_vs_scfab_ll_cdm
## The ebseq table is null.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Working on 5/7: wt_cdm_thy which is: wt_ll_cdm/wt_ll_thy.
## Found inverse table with wt_ll_thy_vs_wt_ll_cdm
## The ebseq table is null.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Working on 6/7: scfd_cdm_thy which is: scfd_ll_cdm/scfd_ll_thy.
## Found inverse table with scfd_ll_thy_vs_scfd_ll_cdm
## The ebseq table is null.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Working on 7/7: scfab_cdm_thy which is: scfab_ll_cdm/scfab_ll_thy.
## Found inverse table with scfab_ll_thy_vs_scfab_ll_cdm
## The ebseq table is null.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Adding venn plots for thy_scfd_vs_thy_wt.
## Limma expression coefficients for thy_scfd_vs_thy_wt; R^2: 0.976; equation: y = 0.988x + 0.00655
## Edger expression coefficients for thy_scfd_vs_thy_wt; R^2: 0.978; equation: y = 0.985x + 0.147
## DESeq2 expression coefficients for thy_scfd_vs_thy_wt; R^2: 0.971; equation: y = 0.977x + 0.259
## Adding venn plots for thy_scfab_vs_thy_wt.
## Limma expression coefficients for thy_scfab_vs_thy_wt; R^2: 0.978; equation: y = 0.983x + 0.0594
## Edger expression coefficients for thy_scfab_vs_thy_wt; R^2: 0.979; equation: y = 0.958x + 0.445
## DESeq2 expression coefficients for thy_scfab_vs_thy_wt; R^2: 0.972; equation: y = 0.947x + 0.605
## Adding venn plots for cdm_scfd_vs_cdm_wt.
## Limma expression coefficients for cdm_scfd_vs_cdm_wt; R^2: 0.953; equation: y = 0.973x + 0.113
## Edger expression coefficients for cdm_scfd_vs_cdm_wt; R^2: 0.955; equation: y = 0.947x + 0.505
## DESeq2 expression coefficients for cdm_scfd_vs_cdm_wt; R^2: 0.943; equation: y = 0.928x + 0.801
## Adding venn plots for cdm_scfab_vs_cdm_wt.
## Limma expression coefficients for cdm_scfab_vs_cdm_wt; R^2: 0.988; equation: y = 0.995x + 0.0121
## Edger expression coefficients for cdm_scfab_vs_cdm_wt; R^2: 0.988; equation: y = 1.02x - 0.264
## DESeq2 expression coefficients for cdm_scfab_vs_cdm_wt; R^2: 0.985; equation: y = 1.03x - 0.419
## Adding venn plots for wt_cdm_thy.
## Limma expression coefficients for wt_cdm_thy; R^2: 0.962; equation: y = 0.969x + 0.213
## Edger expression coefficients for wt_cdm_thy; R^2: 0.964; equation: y = 0.967x + 0.324
## DESeq2 expression coefficients for wt_cdm_thy; R^2: 0.952; equation: y = 0.959x + 0.435
## Adding venn plots for scfd_cdm_thy.
## Limma expression coefficients for scfd_cdm_thy; R^2: 0.92; equation: y = 0.956x + 0.314
## Edger expression coefficients for scfd_cdm_thy; R^2: 0.924; equation: y = 0.928x + 0.705
## DESeq2 expression coefficients for scfd_cdm_thy; R^2: 0.898; equation: y = 0.911x + 0.972
## Adding venn plots for scfab_cdm_thy.
## Limma expression coefficients for scfab_cdm_thy; R^2: 0.924; equation: y = 0.954x + 0.341
## Edger expression coefficients for scfab_cdm_thy; R^2: 0.927; equation: y = 1x - 0.0572
## DESeq2 expression coefficients for scfab_cdm_thy; R^2: 0.907; equation: y = 1.01x - 0.164
## Writing summary information.
## Performing save of excel/de_tables-v20190916.xlsx.
de_sig <- extract_significant_genes(de_tables,
excel=glue::glue("excel/de_sig-v{ver}.xlsx"))
## Writing a legend of columns.
## Did not find the ebseq_logfc, skipping ebseq.
## Writing excel data according to limma for thy_scfd_vs_thy_wt: 1/28.
## After (adj)p filter, the up genes table has 380 genes.
## After (adj)p filter, the down genes table has 441 genes.
## After fold change filter, the up genes table has 136 genes.
## After fold change filter, the down genes table has 37 genes.
## Writing excel data according to limma for thy_scfab_vs_thy_wt: 2/28.
## After (adj)p filter, the up genes table has 392 genes.
## After (adj)p filter, the down genes table has 451 genes.
## After fold change filter, the up genes table has 132 genes.
## After fold change filter, the down genes table has 33 genes.
## Writing excel data according to limma for cdm_scfd_vs_cdm_wt: 3/28.
## After (adj)p filter, the up genes table has 564 genes.
## After (adj)p filter, the down genes table has 659 genes.
## After fold change filter, the up genes table has 226 genes.
## After fold change filter, the down genes table has 171 genes.
## Writing excel data according to limma for cdm_scfab_vs_cdm_wt: 4/28.
## After (adj)p filter, the up genes table has 280 genes.
## After (adj)p filter, the down genes table has 330 genes.
## After fold change filter, the up genes table has 44 genes.
## After fold change filter, the down genes table has 16 genes.
## Writing excel data according to limma for wt_cdm_thy: 5/28.
## After (adj)p filter, the up genes table has 508 genes.
## After (adj)p filter, the down genes table has 503 genes.
## After fold change filter, the up genes table has 137 genes.
## After fold change filter, the down genes table has 208 genes.
## Writing excel data according to limma for scfd_cdm_thy: 6/28.
## After (adj)p filter, the up genes table has 663 genes.
## After (adj)p filter, the down genes table has 643 genes.
## After fold change filter, the up genes table has 313 genes.
## After fold change filter, the down genes table has 324 genes.
## Writing excel data according to limma for scfab_cdm_thy: 7/28.
## After (adj)p filter, the up genes table has 761 genes.
## After (adj)p filter, the down genes table has 630 genes.
## After fold change filter, the up genes table has 286 genes.
## After fold change filter, the down genes table has 326 genes.
## Printing significant genes to the file: excel/de_sig-v20190916.xlsx
## 1/7: Creating significant table up_limma_thy_scfd_vs_thy_wt
## 2/7: Creating significant table up_limma_thy_scfab_vs_thy_wt
## 3/7: Creating significant table up_limma_cdm_scfd_vs_cdm_wt
## 4/7: Creating significant table up_limma_cdm_scfab_vs_cdm_wt
## 5/7: Creating significant table up_limma_wt_cdm_thy
## 6/7: Creating significant table up_limma_scfd_cdm_thy
## 7/7: Creating significant table up_limma_scfab_cdm_thy
## Writing excel data according to edger for thy_scfd_vs_thy_wt: 1/28.
## After (adj)p filter, the up genes table has 411 genes.
## After (adj)p filter, the down genes table has 354 genes.
## After fold change filter, the up genes table has 174 genes.
## After fold change filter, the down genes table has 36 genes.
## Writing excel data according to edger for thy_scfab_vs_thy_wt: 2/28.
## After (adj)p filter, the up genes table has 449 genes.
## After (adj)p filter, the down genes table has 369 genes.
## After fold change filter, the up genes table has 173 genes.
## After fold change filter, the down genes table has 18 genes.
## Writing excel data according to edger for cdm_scfd_vs_cdm_wt: 3/28.
## After (adj)p filter, the up genes table has 566 genes.
## After (adj)p filter, the down genes table has 597 genes.
## After fold change filter, the up genes table has 251 genes.
## After fold change filter, the down genes table has 160 genes.
## Writing excel data according to edger for cdm_scfab_vs_cdm_wt: 4/28.
## After (adj)p filter, the up genes table has 262 genes.
## After (adj)p filter, the down genes table has 284 genes.
## After fold change filter, the up genes table has 38 genes.
## After fold change filter, the down genes table has 48 genes.
## Writing excel data according to edger for wt_cdm_thy: 5/28.
## After (adj)p filter, the up genes table has 512 genes.
## After (adj)p filter, the down genes table has 501 genes.
## After fold change filter, the up genes table has 155 genes.
## After fold change filter, the down genes table has 234 genes.
## Writing excel data according to edger for scfd_cdm_thy: 6/28.
## After (adj)p filter, the up genes table has 659 genes.
## After (adj)p filter, the down genes table has 657 genes.
## After fold change filter, the up genes table has 315 genes.
## After fold change filter, the down genes table has 341 genes.
## Writing excel data according to edger for scfab_cdm_thy: 7/28.
## After (adj)p filter, the up genes table has 655 genes.
## After (adj)p filter, the down genes table has 666 genes.
## After fold change filter, the up genes table has 249 genes.
## After fold change filter, the down genes table has 381 genes.
## Printing significant genes to the file: excel/de_sig-v20190916.xlsx
## 1/7: Creating significant table up_edger_thy_scfd_vs_thy_wt
## 2/7: Creating significant table up_edger_thy_scfab_vs_thy_wt
## 3/7: Creating significant table up_edger_cdm_scfd_vs_cdm_wt
## 4/7: Creating significant table up_edger_cdm_scfab_vs_cdm_wt
## 5/7: Creating significant table up_edger_wt_cdm_thy
## 6/7: Creating significant table up_edger_scfd_cdm_thy
## 7/7: Creating significant table up_edger_scfab_cdm_thy
## Writing excel data according to deseq for thy_scfd_vs_thy_wt: 1/28.
## After (adj)p filter, the up genes table has 454 genes.
## After (adj)p filter, the down genes table has 379 genes.
## After fold change filter, the up genes table has 176 genes.
## After fold change filter, the down genes table has 32 genes.
## Writing excel data according to deseq for thy_scfab_vs_thy_wt: 2/28.
## After (adj)p filter, the up genes table has 464 genes.
## After (adj)p filter, the down genes table has 430 genes.
## After fold change filter, the up genes table has 166 genes.
## After fold change filter, the down genes table has 20 genes.
## Writing excel data according to deseq for cdm_scfd_vs_cdm_wt: 3/28.
## After (adj)p filter, the up genes table has 615 genes.
## After (adj)p filter, the down genes table has 583 genes.
## After fold change filter, the up genes table has 257 genes.
## After fold change filter, the down genes table has 136 genes.
## Writing excel data according to deseq for cdm_scfab_vs_cdm_wt: 4/28.
## After (adj)p filter, the up genes table has 306 genes.
## After (adj)p filter, the down genes table has 335 genes.
## After fold change filter, the up genes table has 38 genes.
## After fold change filter, the down genes table has 47 genes.
## Writing excel data according to deseq for wt_cdm_thy: 5/28.
## After (adj)p filter, the up genes table has 540 genes.
## After (adj)p filter, the down genes table has 527 genes.
## After fold change filter, the up genes table has 148 genes.
## After fold change filter, the down genes table has 232 genes.
## Writing excel data according to deseq for scfd_cdm_thy: 6/28.
## After (adj)p filter, the up genes table has 682 genes.
## After (adj)p filter, the down genes table has 664 genes.
## After fold change filter, the up genes table has 315 genes.
## After fold change filter, the down genes table has 332 genes.
## Writing excel data according to deseq for scfab_cdm_thy: 7/28.
## After (adj)p filter, the up genes table has 680 genes.
## After (adj)p filter, the down genes table has 672 genes.
## After fold change filter, the up genes table has 251 genes.
## After fold change filter, the down genes table has 373 genes.
## Printing significant genes to the file: excel/de_sig-v20190916.xlsx
## 1/7: Creating significant table up_deseq_thy_scfd_vs_thy_wt
## 2/7: Creating significant table up_deseq_thy_scfab_vs_thy_wt
## 3/7: Creating significant table up_deseq_cdm_scfd_vs_cdm_wt
## 4/7: Creating significant table up_deseq_cdm_scfab_vs_cdm_wt
## 5/7: Creating significant table up_deseq_wt_cdm_thy
## 6/7: Creating significant table up_deseq_scfd_cdm_thy
## 7/7: Creating significant table up_deseq_scfab_cdm_thy
## Writing excel data according to basic for thy_scfd_vs_thy_wt: 1/28.
## After (adj)p filter, the up genes table has 307 genes.
## After (adj)p filter, the down genes table has 330 genes.
## After fold change filter, the up genes table has 111 genes.
## After fold change filter, the down genes table has 19 genes.
## Writing excel data according to basic for thy_scfab_vs_thy_wt: 2/28.
## After (adj)p filter, the up genes table has 260 genes.
## After (adj)p filter, the down genes table has 324 genes.
## After fold change filter, the up genes table has 89 genes.
## After fold change filter, the down genes table has 20 genes.
## Writing excel data according to basic for cdm_scfd_vs_cdm_wt: 3/28.
## After (adj)p filter, the up genes table has 421 genes.
## After (adj)p filter, the down genes table has 502 genes.
## After fold change filter, the up genes table has 204 genes.
## After fold change filter, the down genes table has 162 genes.
## Writing excel data according to basic for cdm_scfab_vs_cdm_wt: 4/28.
## After (adj)p filter, the up genes table has 2 genes.
## After (adj)p filter, the down genes table has 4 genes.
## After fold change filter, the up genes table has 0 genes.
## After fold change filter, the down genes table has 3 genes.
## Writing excel data according to basic for wt_cdm_thy: 5/28.
## After (adj)p filter, the up genes table has 387 genes.
## After (adj)p filter, the down genes table has 393 genes.
## After fold change filter, the up genes table has 115 genes.
## After fold change filter, the down genes table has 188 genes.
## Writing excel data according to basic for scfd_cdm_thy: 6/28.
## After (adj)p filter, the up genes table has 592 genes.
## After (adj)p filter, the down genes table has 585 genes.
## After fold change filter, the up genes table has 291 genes.
## After fold change filter, the down genes table has 315 genes.
## Writing excel data according to basic for scfab_cdm_thy: 7/28.
## After (adj)p filter, the up genes table has 641 genes.
## After (adj)p filter, the down genes table has 550 genes.
## After fold change filter, the up genes table has 260 genes.
## After fold change filter, the down genes table has 306 genes.
## Printing significant genes to the file: excel/de_sig-v20190916.xlsx
## 1/7: Creating significant table up_basic_thy_scfd_vs_thy_wt
## 2/7: Creating significant table up_basic_thy_scfab_vs_thy_wt
## 3/7: Creating significant table up_basic_cdm_scfd_vs_cdm_wt
## The up table cdm_scfab_vs_cdm_wt is empty.
## 5/7: Creating significant table up_basic_wt_cdm_thy
## 6/7: Creating significant table up_basic_scfd_cdm_thy
## 7/7: Creating significant table up_basic_scfab_cdm_thy
## Adding significance bar plots.
thy_scfd_wt_sig <- de_sig[["deseq"]][["ups"]][[1]]
thy_scfab_wt_sig <- de_sig[["deseq"]][["ups"]][[2]]
cdm_scfd_wt_sig <- de_sig[["deseq"]][["ups"]][[3]]
cdm_scfab_wt_sig <- de_sig[["deseq"]][["ups"]][[4]]
wt_cdm_thy <- de_sig[["deseq"]][["ups"]][[5]]
scfd_cdm_thy <- de_sig[["deseq"]][["ups"]][[6]]
scfab_cdm_thy <- de_sig[["deseq"]][["ups"]][[7]]
thy_scfd_wt_goseq <- simple_goseq(sig_genes=thy_scfd_wt_sig, go_db=mic_go, length_db=length_db)
## Using the row names of your table.
## Found 146 genes out of 176 from the sig_genes in the go_db.
## Found 176 genes out of 176 from the sig_genes in the length_db.
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
thy_scfd_wt_goseq[["pvalue_plots"]][["bpp_plot_over"]]
thy_scfab_wt_goseq <- simple_goseq(sig_genes=thy_scfab_wt_sig, go_db=mic_go, length_db=length_db)
## Using the row names of your table.
## Found 119 genes out of 166 from the sig_genes in the go_db.
## Found 166 genes out of 166 from the sig_genes in the length_db.
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
thy_scfab_wt_goseq[["pvalue_plots"]][["bpp_plot_over"]]
cdm_scfd_wt_goseq <- simple_goseq(sig_genes=cdm_scfd_wt_sig, go_db=mic_go, length_db=length_db)
## Using the row names of your table.
## Found 160 genes out of 257 from the sig_genes in the go_db.
## Found 257 genes out of 257 from the sig_genes in the length_db.
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
cdm_scfd_wt_goseq[["pvalue_plots"]][["bpp_plot_over"]]
cdm_scfab_wt_goseq <- simple_goseq(sig_genes=cdm_scfab_wt_sig, go_db=mic_go, length_db=length_db)
## Using the row names of your table.
## Found 20 genes out of 38 from the sig_genes in the go_db.
## Found 38 genes out of 38 from the sig_genes in the length_db.
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
cdm_scfab_wt_goseq[["pvalue_plots"]][["bpp_plot_over"]]
wt_cdm_thy_goseq <- simple_goseq(sig_genes=wt_cdm_thy, go_db=mic_go, length_db=length_db)
## Using the row names of your table.
## Found 88 genes out of 148 from the sig_genes in the go_db.
## Found 148 genes out of 148 from the sig_genes in the length_db.
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
wt_cdm_thy_goseq[["pvalue_plots"]][["bpp_plot_over"]]
scfd_cdm_thy_goseq <- simple_goseq(sig_genes=scfd_cdm_thy, go_db=mic_go, length_db=length_db)
## Using the row names of your table.
## Found 159 genes out of 315 from the sig_genes in the go_db.
## Found 315 genes out of 315 from the sig_genes in the length_db.
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
scfd_cdm_thy_goseq[["pvalue_plots"]][["bpp_plot_over"]]
scfab_cdm_thy_goseq <- simple_goseq(sig_genes=scfab_cdm_thy, go_db=mic_go, length_db=length_db)
## Using the row names of your table.
## Found 148 genes out of 251 from the sig_genes in the go_db.
## Found 251 genes out of 251 from the sig_genes in the length_db.
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
scfab_cdm_thy_goseq[["pvalue_plots"]][["bpp_plot_over"]]
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset f3c1e03852c87dc60c7e72e726bb640572e695ff
## This is hpgltools commit: Thu Aug 22 15:32:44 2019 -0400: f3c1e03852c87dc60c7e72e726bb640572e695ff
## Saving to index-v20190916.rda.xz