1 M. musculus

This will be a very minimal analysis until we get some replicates.

1.1 Annotations

I am using mm38_100.

## My load_biomart_annotations() function defaults to human, so that will be quick.
mm_annot <- load_biomart_annotations(species="mmusculus")
## The biomart annotations file already exists, loading from it.
mm_annot <- mm_annot[["annotation"]]
mm_annot[["txid"]] <- paste0(mm_annot[["ensembl_transcript_id"]], ".", mm_annot[["version"]])
rownames(mm_annot) <- make.names(mm_annot[["ensembl_gene_id"]], unique=TRUE)

tx_gene_map <- mm_annot[, c("txid", "ensembl_gene_id")]

So, I now have a table of mouse annotations.

1.2 Metadata

I am going to write a quick sample sheet in the current working directory called ‘all_samples.xlsx’ and put the names of the count tables in it.

1.3 Create expressionsets

Here I combine the metadata, count data, and annotations.

It is worth noting that the gene IDs from htseq-count probably do not match the annotations retrieved because they are likely exon-based rather than gene based. This is not really a problem, but don’t forget it!

1.4 Hisat2 expressionset

hisat_annot <- mm_annot
rownames(hisat_annot) <- paste0("gene:", rownames(hisat_annot))
mm38_hisat <- create_expt("sample_sheets/all_samples.xlsx",
                          gene_info=hisat_annot)
## Reading the sample metadata.
## Dropped 1 rows from the sample metadata because they were blank.
## The sample definitions comprises: 23 rows(samples) and 24 columns(metadata fields).
## Reading count tables.
## Reading count files with read.table().
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_01/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_02/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_03/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_04/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_05/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_06/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_07/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_08/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## preprocessing/202009/01_wt_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/02_wt_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/03_wt_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/04_ko_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/05_ko_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/06_ko_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/07_het1_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/08_het2_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/09_het3_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/10_het1_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/11_het2_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/12_het3_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/13_het1_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/14_het2_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/15_het3_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## Finished reading count data.
## Matched 25371 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 25598 rows and 23 columns.
plot_libsize(mm38_hisat)$plot

mm38_first <- normalize_expt(mm38_hisat, filter=TRUE, convert="cpm", norm="quant", transform="log2")
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 11029 low-count genes (14569 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 192 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
pp(file="pca_norm.png", image=plot_pca(mm38_first)$plot)
## Writing the image to: pca_norm.png and calling dev.off().

mm38_norm <- normalize_expt(mm38_hisat, filter=TRUE, convert="cpm", batch="svaseq")
## This function will replace the expt$expressionset slot with:
## svaseq(cpm(cbcb(data)))
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
##  some metrics are easier to see when the data is log2 transformed, but
##  EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 11029 low-count genes (14569 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: not transforming the data.
## Step 5: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 306878 entries are x>1: 92%.
## batch_counts: Before batch/surrogate estimation, 3404 entries are x==0: 1%.
## batch_counts: Before batch/surrogate estimation, 24805 entries are 0<x<1: 7%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 2286 (1%) elements which are < 0 after batch correction.
## Setting low elements to zero.
mm38_norm <- normalize_expt(mm38_norm, transform="log2")
## This function will replace the expt$expressionset slot with:
## log2(data)
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Filter is false, this should likely be set to something, good
##  choices include cbcb, kofa, pofa (anything but FALSE).  If you want this to
##  stay FALSE, keep in mind that if other normalizations are performed, then the
##  resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data unconverted.  It is often advisable to cpm/rpkm
##  the data to normalize for sampling differences, keep in mind though that rpkm
##  has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
##  will try to detect this).
## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 2286 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
pp(file="pca_sva.png", image=plot_pca(mm38_norm)$plot)
## Writing the image to: pca_sva.png and calling dev.off().

new <- subset_expt(mm38_hisat, subset="batch=='b202009'")
## Using a subset expression.
## There were 23, now there are 15 samples.
new_norm <- normalize_expt(new, filter=TRUE, convert="cpm", norm="quant", transform="log2")
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 11363 low-count genes (14235 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 551 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
plot_pca(new_norm)$plot

mm38_salmon <- sm(create_expt("sample_sheets/all_samples.xlsx", tx_gene_map=tx_gene_map,
                              gene_info=mm_annot, file_column="salmonfile"))

mmtx_annot <- mm_annot
rownames(mmtx_annot) <- mm_annot[["txid"]]
mm38_saltx <- sm(create_expt("sample_sheets/all_samples.xlsx",
                             gene_info=mmtx_annot, file_column="salmonfile"))

1.5 Query expressionsets

In this block I will calculate all the diagnostic plots, but not show them. I will show them next with a little annotation.

I will leave the output for the first of each invocation and silence it for the second.

1.5.1 Initial salmon plots

mm38_plots_sa <- sm(graph_metrics(mm38_salmon))
mm38_norm_sa <- normalize_expt(mm38_salmon, norm="quant", convert="cpm",
                            transform="log2", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 2794 low-count genes (3970 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 1105 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
mm38n_plots_sa <- sm(graph_metrics(mm38_norm_sa))
mm38_plots_sa$legend

mm38_plots_sa$libsize

mm38_plots_sa$nonzero

mm38n_plots_sa$density

mm38n_plots_sa$pc_plot

1.5.2 Initial hisat2 plots

mm38_plots_hi <- sm(graph_metrics(mm38_hisat))
mm38_norm_hi <- normalize_expt(mm38_hisat, norm="quant", convert="cpm",
                               transform="log2", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 11029 low-count genes (14569 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 192 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
mm38n_plots_hi <- sm(graph_metrics(mm38_norm_hi))
mm38_plots_hi$libsize

mm38_plots_hi$nonzero

mm38n_plots_hi$density

mm38n_plots_hi$pc_plot

1.6 Do a simple DE

The only interesting DE I see in this is to compare the retinas to the dlgns. I can treat them as replicates and compare.

These differential expression analyses are EXPLICITLY NOT what you care about. However, they are useful for two purposes:

  1. Seeing that the three tissue types are indeed different.
  2. Setting up the table of results with appropriate rows/columns of (rows)genes and (columns) annotations. We will therefore add to these tables the results of the expression analyses that you actually do care about.

When we receive full replicate sets, this cheater method of encapsulating the data will not longer be required.

1.6.1 With salmon

mm_sa <- set_expt_conditions(mm38_salmon, fact="celltype")
mm_norm_sa <- sm(normalize_expt(mm_sa, convert="rpkm", transform="log2", column="cds_length"))
plot_pca(mm_norm_sa)$plot
mm_de_sa <- all_pairwise(mm_sa, model_batch=FALSE)
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Assuming no batch in model for testing pca.
## Not putting labels on the PC plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.

1.6.2 With hisat2

## mm_hi <- set_expt_conditions(mm38_hisat, fact="celltype")
mm_hi <- mm38_hisat
mm_norm_hi <- sm(normalize_expt(mm_hi, convert="rpkm", transform="log2", filter=TRUE,
                                column="cds_length", batch="svaseq"))
## Error in density.default(x, adjust = adj) : 'x' contains missing values
## Error in density.default(x, adjust = adj) : 'x' contains missing values
plot_pca(mm_norm_hi)$plot
keepers <- list(
  "wt_dlgnret" = c("wt_dlgn", "wt_retina"),
  "wt_scnret" = c("wt_scn", "wt_retina"),
  "wt_dlgnscn" = c("wt_dlgn", "wt_scn"),
  "normret" = c("het_retina", "wt_retina"),
  "koret" = c("ko_retina", "wt_retina"),
  "normscn" = c("het_scn", "wt_scn"),
  "koscn" = c("ko_scn", "wt_scn"),
  "normdlgn" = c("het_dlgn", "wt_dlgn"),
  "kodlgn" = c("ko_dlgn", "wt_dlgn"),
  "normdlgn_vs_normret" = c("normdlgn", "normret"),
  "normscn_vs_normret" = c("normscn", "normret"),
  "kodlgn_vs_koret" = c("kodlgn", "koret"),
  "koscn_vs_koret" = c("koscn", "koret"),
  "koscn_vs_kodlgn" = c("koscn", "kodlgn"),
  "koret_vs_normret" = c("ko_retina", "het_retina"),
  "koscn_vs_normscn" = c("ko_scn", "het_scn"),
  "kodlgn_vs_normdlgn" = c("ko_dlgn", "het_dlgn"),
  "normko_retdlgn" = c("normdlgn_vs_normret", "kodlgn_vs_koret"),
  "normko_retscn" = c("normscn_vs_normret", "koscn_vs_koret"))
extras <- "normdlgn_vs_normret = (het_dlgn-wt_dlgn)-(het_retina-wt_retina),
           normscn_vs_normret = (het_scn-wt_scn)-(het_retina-wt_retina),
           kodlgn_vs_koret = (ko_dlgn-wt_dlgn)-(ko_retina-wt_retina),
           koscn_vs_koret = (ko_scn-wt_scn)-(ko_retina-wt_retina),
           koscn_vs_kodlgn = (ko_scn-wt_scn)-(ko_dlgn-wt_dlgn),
           normko_retdlgn = ((het_dlgn-wt_dlgn)-(het_retina-wt_retina)) - ((ko_dlgn-wt_dlgn)-(ko_retina-wt_retina)),
           normko_retscn = ((het_scn-wt_scn)-(het_retina-wt_retina)) - ((ko_scn-wt_scn)-(ko_retina-wt_retina))"
mm_filt <- normalize_expt(mm_hi, filter=TRUE)
## This function will replace the expt$expressionset slot with:
## cbcb(data)
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
##  some metrics are easier to see when the data is log2 transformed, but
##  EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted.  It is often advisable to cpm/rpkm
##  the data to normalize for sampling differences, keep in mind though that rpkm
##  has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
##  will try to detect this).
## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 11029 low-count genes (14569 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
mm_test <- limma_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extras)
## Starting limma pairwise comparison.
## libsize was not specified, this parameter has profound effects on limma's result.
## Using the libsize from expt$best_libsize.
## Limma step 1/6: choosing model.
## Extracting surrogate estimates from svaseq and adding them to the model.
## batch_counts: Before batch/surrogate estimation, 329826 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 3404 entries are x==0: 1%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## Choosing the non-intercept containing model.
## Limma step 2/6: running limma::voom(), switch with the argument 'which_voom'.
## Using normalize.method=quantile for voom.
## Limma step 3/6: running lmFit with method: ls.
## Limma step 4/6: making and fitting contrasts with no intercept. (~ 0 + factors)
## Limma step 5/6: Running eBayes with robust=FALSE and trend=FALSE.
## Limma step 6/6: Writing limma outputs.
## Limma step 6/6: 1/43: Creating table: het_retina_vs_het_dlgn.  Adjust=BH
## Limma step 6/6: 2/43: Creating table: het_scn_vs_het_dlgn.  Adjust=BH
## Limma step 6/6: 3/43: Creating table: ko_dlgn_vs_het_dlgn.  Adjust=BH
## Limma step 6/6: 4/43: Creating table: ko_retina_vs_het_dlgn.  Adjust=BH
## Limma step 6/6: 5/43: Creating table: ko_scn_vs_het_dlgn.  Adjust=BH
## Limma step 6/6: 6/43: Creating table: wt_dlgn_vs_het_dlgn.  Adjust=BH
## Limma step 6/6: 7/43: Creating table: wt_retina_vs_het_dlgn.  Adjust=BH
## Limma step 6/6: 8/43: Creating table: wt_scn_vs_het_dlgn.  Adjust=BH
## Limma step 6/6: 9/43: Creating table: het_scn_vs_het_retina.  Adjust=BH
## Limma step 6/6: 10/43: Creating table: ko_dlgn_vs_het_retina.  Adjust=BH
## Limma step 6/6: 11/43: Creating table: ko_retina_vs_het_retina.  Adjust=BH
## Limma step 6/6: 12/43: Creating table: ko_scn_vs_het_retina.  Adjust=BH
## Limma step 6/6: 13/43: Creating table: wt_dlgn_vs_het_retina.  Adjust=BH
## Limma step 6/6: 14/43: Creating table: wt_retina_vs_het_retina.  Adjust=BH
## Limma step 6/6: 15/43: Creating table: wt_scn_vs_het_retina.  Adjust=BH
## Limma step 6/6: 16/43: Creating table: ko_dlgn_vs_het_scn.  Adjust=BH
## Limma step 6/6: 17/43: Creating table: ko_retina_vs_het_scn.  Adjust=BH
## Limma step 6/6: 18/43: Creating table: ko_scn_vs_het_scn.  Adjust=BH
## Limma step 6/6: 19/43: Creating table: wt_dlgn_vs_het_scn.  Adjust=BH
## Limma step 6/6: 20/43: Creating table: wt_retina_vs_het_scn.  Adjust=BH
## Limma step 6/6: 21/43: Creating table: wt_scn_vs_het_scn.  Adjust=BH
## Limma step 6/6: 22/43: Creating table: ko_retina_vs_ko_dlgn.  Adjust=BH
## Limma step 6/6: 23/43: Creating table: ko_scn_vs_ko_dlgn.  Adjust=BH
## Limma step 6/6: 24/43: Creating table: wt_dlgn_vs_ko_dlgn.  Adjust=BH
## Limma step 6/6: 25/43: Creating table: wt_retina_vs_ko_dlgn.  Adjust=BH
## Limma step 6/6: 26/43: Creating table: wt_scn_vs_ko_dlgn.  Adjust=BH
## Limma step 6/6: 27/43: Creating table: ko_scn_vs_ko_retina.  Adjust=BH
## Limma step 6/6: 28/43: Creating table: wt_dlgn_vs_ko_retina.  Adjust=BH
## Limma step 6/6: 29/43: Creating table: wt_retina_vs_ko_retina.  Adjust=BH
## Limma step 6/6: 30/43: Creating table: wt_scn_vs_ko_retina.  Adjust=BH
## Limma step 6/6: 31/43: Creating table: wt_dlgn_vs_ko_scn.  Adjust=BH
## Limma step 6/6: 32/43: Creating table: wt_retina_vs_ko_scn.  Adjust=BH
## Limma step 6/6: 33/43: Creating table: wt_scn_vs_ko_scn.  Adjust=BH
## Limma step 6/6: 34/43: Creating table: wt_retina_vs_wt_dlgn.  Adjust=BH
## Limma step 6/6: 35/43: Creating table: wt_scn_vs_wt_dlgn.  Adjust=BH
## Limma step 6/6: 36/43: Creating table: wt_scn_vs_wt_retina.  Adjust=BH
## Limma step 6/6: 37/43: Creating table: normdlgn_vs_normret.  Adjust=BH
## Limma step 6/6: 38/43: Creating table: normscn_vs_normret.  Adjust=BH
## Limma step 6/6: 39/43: Creating table: kodlgn_vs_koret.  Adjust=BH
## Limma step 6/6: 40/43: Creating table: koscn_vs_koret.  Adjust=BH
## Limma step 6/6: 41/43: Creating table: koscn_vs_kodlgn.  Adjust=BH
## Limma step 6/6: 42/43: Creating table: normko_retdlgn.  Adjust=BH
## Limma step 6/6: 43/43: Creating table: normko_retscn.  Adjust=BH
## Limma step 6/6: 1/9: Creating table: het_dlgn.  Adjust=BH
## Limma step 6/6: 2/9: Creating table: het_retina.  Adjust=BH
## Limma step 6/6: 3/9: Creating table: het_scn.  Adjust=BH
## Limma step 6/6: 4/9: Creating table: ko_dlgn.  Adjust=BH
## Limma step 6/6: 5/9: Creating table: ko_retina.  Adjust=BH
## Limma step 6/6: 6/9: Creating table: ko_scn.  Adjust=BH
## Limma step 6/6: 7/9: Creating table: wt_dlgn.  Adjust=BH
## Limma step 6/6: 8/9: Creating table: wt_retina.  Adjust=BH
## Limma step 6/6: 9/9: Creating table: wt_scn.  Adjust=BH
mm_test <- deseq_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extras)
## Hey you, use deseq2 pairwise.
## Starting DESeq2 pairwise comparisons.
## The data should be suitable for EdgeR/DESeq/EBSeq. If they freak out, check the state of the count table and ensure that it is in integer counts.
## Extracting surrogate estimates from svaseq and adding them to the model.
## batch_counts: Before batch/surrogate estimation, 329826 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 3404 entries are x==0: 1%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## Choosing the non-intercept containing model.
## DESeq2 step 1/5: Including a matrix of batch estimates in the deseq model.
## converting counts to integer mode
## DESeq2 step 2/5: Estimate size factors.
## DESeq2 step 3/5: Estimate dispersions.
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## Using a parametric fitting seems to have worked.
## DESeq2 step 4/5: nbinomWaldTest.
## The contrast normdlgn is not in the results.
## If this is not an extra contrast, then this is an error.
## The contrast normscn is not in the results.
## If this is not an extra contrast, then this is an error.
## The contrast kodlgn is not in the results.
## If this is not an extra contrast, then this is an error.
## The contrast koscn is not in the results.
## If this is not an extra contrast, then this is an error.
## The contrast koscn is not in the results.
## If this is not an extra contrast, then this is an error.
## The contrast normko_retdlgn is not in the results.
## If this is not an extra contrast, then this is an error.
## The contrast normko_retscn is not in the results.
## If this is not an extra contrast, then this is an error.
mm_test <- edger_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extras)
## Starting edgeR pairwise comparisons.
## The data should be suitable for EdgeR/DESeq/EBSeq. If they freak out, check the state of the count table and ensure that it is in integer counts.
## Extracting surrogate estimates from svaseq and adding them to the model.
## batch_counts: Before batch/surrogate estimation, 329826 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 3404 entries are x==0: 1%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## Choosing the non-intercept containing model.
## EdgeR step 1/9: Importing and normalizing data.
## EdgeR step 2/9: Estimating the common dispersion.
## EdgeR step 3/9: Estimating dispersion across genes.
## EdgeR step 4/9: Estimating GLM Common dispersion.
## EdgeR step 5/9: Estimating GLM Trended dispersion.
## EdgeR step 6/9: Estimating GLM Tagged dispersion.
## EdgeR step 7/9: Running glmFit, switch to glmQLFit by changing the argument 'edger_test'.
## EdgeR step 8/9: Making pairwise contrasts.
mm_test <- ebseq_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extras)
## The data should be suitable for EdgeR/DESeq/EBSeq. If they freak out, check the state of the count table and ensure that it is in integer counts.
## Starting EBSeq pairwise subset.
## Choosing the non-intercept containing model.
## Starting EBTest of het_dlgn vs. het_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. het_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. ko_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. ko_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. ko_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. wt_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. het_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. ko_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. ko_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. ko_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. wt_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_scn vs. ko_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_scn vs. ko_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_scn vs. ko_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_scn vs. wt_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_scn vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_scn vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_dlgn vs. ko_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_dlgn vs. ko_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_dlgn vs. wt_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_dlgn vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_dlgn vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_retina vs. ko_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_retina vs. wt_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_retina vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_retina vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_scn vs. wt_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_scn vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_scn vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of wt_dlgn vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of wt_dlgn vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of wt_retina vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
mm_de_hi <- sm(all_pairwise(mm_filt, model_batch="svaseq", parallel=FALSE, extra_contrasts=extras))
## Error in dimnames(x) <- dn: length of 'dimnames' [2] not equal to array extent
mm_de_tables <- combine_de_tables(mm_de_hi, excel="excel/testing_202010.xlsx")
## Error in combine_de_tables(mm_de_hi, excel = "excel/testing_202010.xlsx"): object 'mm_de_hi' not found

1.7 Set up for initial analysis

Until we get full replicates, I will do simple subtractions.

1.7.1 Term definition

In an attempt to keep some clarity in the terms used, I want to define them now. There are three contexts in which we will consider the data:

  1. The individual sample type. When considering individual samples, I will use three terms in this and only this context: wild-type (wt), het, and mut.

  2. The individual translatome. These are defines as something / baseline. I will exclusively call the wt samples ‘baseline’ when speaking in this context. I will exclusively state ‘normal’ when referring to het / wt samples, and I will state ‘ko’ when referring to mut / wt samples in the translatome context.

  3. Translatome vs. translatome. Whenever comparing translatomes, I will use the names as in #2 and always put the numerator first when writing the name of a comparison.

The most complex example of the above nomenclature is:

“normko_retdlgn is defined as normret_vs_normdlgn - koret_vs_kodlgn”

This states we are examining at the translatome context: (norm(retina translatome) - norm(dlgn translatome)) - (ko(retina translatome) - ko(dlgn translatome))

Which in turn is synonymous to the following at the sample context: ((rethet - retwt) - (dlgnhet - dlgnwt)) - ((retko - retwt) - (dlgnko - dlgnwt))

Now let us associate the various variable names with the appropriate samples:

dlgnwt <- "iprgc_01"
retwt <- "iprgc_02"
scnwt <- "iprgc_03"

dlgnhet <- "iprgc_04"
rethet <- "iprgc_05"
scnhet <- NULL  ## Does not yet exist.

dlgnmut <- "iprgc_06"
retmut <- "iprgc_07"
scnmut <- "iprgc_08"

Give these variable names, now lets associate columns of the expression data with them. These are at the sample context, so the appropriate names are: ‘wt’, ‘het’, and ‘mut’. In each case I will prefix the genotype with the tissue type: ‘ret’, ‘dlgn’, and ‘scn’. Thus ‘retwt’ refers to the sample used to calculate the translatome retina baseline; in contrast ‘dlgnmut’ is the sample which provides the dlgn knockout.

## Sample context
mm38_norm <- mm_norm_sa
dlgnwt <- exprs(mm38_norm)[, dlgnwt]
retwt <- exprs(mm38_norm)[, retwt]
scnwt <- exprs(mm38_norm)[, scnwt]
dlgnhet <- exprs(mm38_norm)[, dlgnhet]
rethet <- exprs(mm38_norm)[, rethet]
dlgnmut <- exprs(mm38_norm)[, dlgnmut]
retmut <- exprs(mm38_norm)[, retmut]
scnmut <- exprs(mm38_norm)[, scnmut]

Each of the above 8 variables provides 1 column of information. We have 3 baseline comparisons available to us. In each of these we compare one wt sample to another.

## Baseline comparisons
wt_dlgnret <- dlgnwt - retwt
wt_scnret <- scnwt - retwt
wt_dlgnscn <- dlgnwt - scnwt

Simultaneously, we have 5 available translatomes. This are provided by comparing each het or mut to the associated wt. These will therefore receive names: ‘norm’ and ‘ko’ instead of ‘het’ and ‘mut’.

## Translatome context
normret <- rethet - retwt
koret <- retmut - retwt
koscn <- scnmut - scnwt
normdlgn <- dlgnhet - dlgnwt
kodlgn <- dlgnmut - dlgnwt

Given these translatomes, there are a few contrasts of likely interest. These are performed by comparing the relevant translatomes.

Will will split these into 4 separate categories: het vs het, ko vs ko, ko vs het, and ratio vs ratio.

Finally, note that we are being explicitly redundant in these definitions. I am making variable names for both the a/b ratio and the b/a ratio. Thus we have some redundantly redundant (haha) flexibility when deciding on what we want to plot.

## norm vs norm
normdlgn_vs_normret <- normdlgn - normret
normret_vs_normdlgn <- normret - normdlgn
## ko vs ko
koret_vs_kodlgn <- koret - kodlgn
kodlgn_vs_koret <- kodlgn - koret

koret_vs_koscn <- koret - koscn
koscn_vs_koret <- koscn - koret

kodlgn_vs_koscn <- kodlgn - koscn
koscn_vs_kodlgn <- koscn - kodlgn

On the other hand, I am assuming we always want the normals as denominators and kos as numerators.

## ko vs norm
koret_vs_normret <- koret - normret

kodlgn_vs_normdlgn <- kodlgn - normdlgn

Finally, here is the ratio of ratios example I printed above:

I named it ‘normko_retdlgn’ in an attempt to make clear that it is actually: (normret/normdlgn)/(koret/kodlgn)

or stated differently: “norm divided by ko for ret divided by dlgn.”

## ratio of ratios
normko_retdlgn <- normret_vs_normdlgn - koret_vs_kodlgn

1.8 Define a matrix of these values.

My matrix of data will now contain 1 column for each of the above 27 samples/comparisons.

pair_mtrx <- cbind(
  ## Individual samples
  dlgnwt, retwt, scnwt, dlgnhet, rethet, dlgnmut, retmut, scnmut,
  ## Baseline comparisons
  wt_dlgnret, wt_scnret, wt_dlgnscn,
  ## Baseline subtractions
  normdlgn, normret, kodlgn, koret, koscn,
  ## het_vs_het, of which there is only 1 because we do not have hetscn
  normdlgn_vs_normret, normret_vs_normdlgn,
  ## ko_vs_ko, of which we have 3
  koret_vs_kodlgn, kodlgn_vs_koret,
  koret_vs_koscn, koscn_vs_koret,
  kodlgn_vs_koscn, koscn_vs_kodlgn,
  ## ko_vs_het, 3 including one getting around missing hetscn
  koret_vs_normret, kodlgn_vs_normdlgn,
  ## ratio of ratios
  normko_retdlgn)

  ## Baseline subtractions
  normdlgn, normret, kodlgn, koret, koscn,
  ## het_vs_het, of which there is only 1 because we do not have hetscn
  normdlgn_vs_normret, normret_vs_normdlgn,
  ## ko_vs_ko, of which we have 3
  koret_vs_kodlgn, kodlgn_vs_koret,
  koret_vs_koscn, koscn_vs_koret,
  kodlgn_vs_koscn, koscn_vs_kodlgn,
  ## ko_vs_het, 3 including one getting around missing hetscn
  koret_vs_normret, kodlgn_vs_normdlgn,
  ## ratio of ratios
  normko_retdlgn)
  )
## Error: <text>:20:11: unexpected ','
## 19:   ## Baseline subtractions
## 20:   normdlgn,
##               ^

1.9 Cutoffs

I am not sure if we will use these indexes, but I am writing these out as subsets of genes to look at. These indexes are stating that, given a cutoff (0), we want to look at only the genes which have higher x / baseline values than the cutoff.

## Queries about gene subsets.
## These are all in the context of translatomes.
cutoff <- 0
ret_kept_idx <- normret > cutoff & koret > cutoff
scn_kept_idx <- koscn > cutoff
dlgn_kept_idx <- normdlgn > cutoff & kodlgn > cutoff
ret_dlgn_kept_idx <- ret_kept_idx & dlgn_kept_idx
ret_scn_kept_idx <- ret_kept_idx & scn_kept_idx
dlgn_scn_kept_idx <- dlgn_kept_idx & scn_kept_idx

##normdlgn_vs_normret[!ret_dlgn_kept_idx] <- NA
##normret_vs_normdlgn[!ret_dlgn_kept_idx] <- NA
##koret_vs_kodlgn[!ret_dlgn_kept_idx] <- NA
##kodlgn_vs_koret[!ret_dlgn_kept_idx] <- NA
##koret_vs_koscn[!ret_scn_kept_idx] <- NA
##koscn_vs_koret[!ret_scn_kept_idx] <- NA
##kodlgn_vs_koscn[!dlgn_scn_kept_idx] <- NA
##koscn_vs_kodlgn[!dlgn_scn_kept_idx] <- NA
##koret_vs_normret[!ret_kept_idx] <- NA
##kodlgn_vs_normdlgn[!dlgn_kept_idx] <- NA
##normko_retdlgn <- normko_retdlgn[!ret_dlgn_kept_idx] <- NA

1.10 Add the matrix to the differential expression

I will use my function combine_de_tables() to add this information to my existing annotation data along with the results from the statistically valid comparison of the three tissue types.

mm_tables <- sm(combine_de_tables(
  mm_de_sa, extra_annot=pair_mtrx,
  excel=glue::glue("excel/{rundate}mm_salmon_tables-v{ver}.xlsx")))
## Error in combine_de_tables(mm_de_sa, extra_annot = pair_mtrx, excel = glue::glue("excel/{rundate}mm_salmon_tables-v{ver}.xlsx")): object 'pair_mtrx' not found

2 Plots of interesting comparisons

2.1 Retina, het vs. wt.

## Put retina baseline on y axis as black, retina het on x axis as black.
## Then recolor a subset of these as red, the reds are when normret > 0
library(ggplot2)

plotted <- as.data.frame(pair_mtrx[, c("rethet", "retwt")])
## Error in as.data.frame(pair_mtrx[, c("rethet", "retwt")]): object 'pair_mtrx' not found
red_idx <- normret > 0
plotted[, "color"] <- ifelse(red_idx, "red", "black")
## Error in plotted[, "color"] <- ifelse(red_idx, "red", "black"): object 'plotted' not found
plotted[["label"]] <- rownames(plotted)
## Error in rownames(plotted): object 'plotted' not found
ret_hetwt <- ggplot(
  plotted,
  aes_string(x="rethet", y="retwt", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "rethet", y = "retwt", label = "label", : object 'plotted' not found
ret_hetwt
## Error in eval(expr, envir, enclos): object 'ret_hetwt' not found
ret_hetwt_clicky <- ggplotly_url(
  ret_hetwt, "ret_hetwt.html", title="Retina expression, het vs. wt.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(ret_hetwt, "ret_hetwt.html", title = "Retina expression, het vs. wt.", : object 'ret_hetwt' not found

2.2 Retina, mut vs wt.

plotted <- as.data.frame(pair_mtrx[, c("retmut", "retwt")])
## Error in as.data.frame(pair_mtrx[, c("retmut", "retwt")]): object 'pair_mtrx' not found
plotted[["label"]] <- rownames(plotted)
## Error in rownames(plotted): object 'plotted' not found
plotted[["color"]] <- "black"
## Error in plotted[["color"]] <- "black": object 'plotted' not found
ret_mutwt <- ggplot(
  plotted,
  aes_string(x="retmut", y="retwt", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "retmut", y = "retwt", label = "label", : object 'plotted' not found
ret_mutwt
## Error in eval(expr, envir, enclos): object 'ret_mutwt' not found
ret_mutwt_clicky <- ggplotly_url(
  ret_mutwt, "ret_mutwt.html", title="Retina expression, mutant vs. wt.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(ret_mutwt, "ret_mutwt.html", title = "Retina expression, mutant vs. wt.", : object 'ret_mutwt' not found

2.3 dlgn, het vs. wt.

plotted <- as.data.frame(pair_mtrx[, c("dlgnhet", "dlgnwt")])
## Error in as.data.frame(pair_mtrx[, c("dlgnhet", "dlgnwt")]): object 'pair_mtrx' not found
plotted[["label"]] <- rownames(plotted)
## Error in rownames(plotted): object 'plotted' not found
plotted[["color"]] <- "black"
## Error in plotted[["color"]] <- "black": object 'plotted' not found
dlgn_hetwt <- ggplot(
  plotted,
  aes_string(x="dlgnhet", y="dlgnwt", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "dlgnhet", y = "dlgnwt", label = "label", : object 'plotted' not found
dlgn_hetwt
## Error in eval(expr, envir, enclos): object 'dlgn_hetwt' not found
dlgn_hetwt_clicky <- ggplotly_url(
  dlgn_hetwt, "dlgn_hetwt.html", title="dlgn expression, het vs. wt.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(dlgn_hetwt, "dlgn_hetwt.html", title = "dlgn expression, het vs. wt.", : object 'dlgn_hetwt' not found

2.4 dlgn, mut vs. wt.

plotted <- as.data.frame(pair_mtrx[, c("dlgnmut", "dlgnwt")])
## Error in as.data.frame(pair_mtrx[, c("dlgnmut", "dlgnwt")]): object 'pair_mtrx' not found
plotted[["label"]] <- rownames(plotted)
## Error in rownames(plotted): object 'plotted' not found
plotted[["color"]] <- "black"
## Error in plotted[["color"]] <- "black": object 'plotted' not found
dlgn_mutwt <- ggplot(
  plotted,
  aes_string(x="dlgnmut", y="dlgnwt", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "dlgnmut", y = "dlgnwt", label = "label", : object 'plotted' not found
dlgn_mutwt
## Error in eval(expr, envir, enclos): object 'dlgn_mutwt' not found
dlgn_mutwt_clicky <- ggplotly_url(
  dlgn_mutwt, "dlgn_mutwt.html", title="dlgn expression, mut vs. wt.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(dlgn_mutwt, "dlgn_mutwt.html", title = "dlgn expression, mut vs. wt.", : object 'dlgn_mutwt' not found

2.5 scn, mut vs. wt.

plotted <- as.data.frame(pair_mtrx[, c("scnmut", "scnwt")])
## Error in as.data.frame(pair_mtrx[, c("scnmut", "scnwt")]): object 'pair_mtrx' not found
plotted[["label"]] <- rownames(plotted)
## Error in rownames(plotted): object 'plotted' not found
plotted[["color"]] <- "black"
## Error in plotted[["color"]] <- "black": object 'plotted' not found
scn_mutwt <- ggplot(
  plotted,
  aes_string(x="scnmut", y="scnwt", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "scnmut", y = "scnwt", label = "label", : object 'plotted' not found
scn_mutwt
## Error in eval(expr, envir, enclos): object 'scn_mutwt' not found
scn_mutwt_clicky <- ggplotly_url(
  scn_mutwt, "scn_mutwt.html", title="scn expression, mut vs. wt.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(scn_mutwt, "scn_mutwt.html", title = "scn expression, mut vs. wt.", : object 'scn_mutwt' not found

2.6 Axon translatome specific

##  x-axis: normdlgn_vs_normret or normret_vs_normdlgn,
##              ^^^^
##  y-axis: dlgnwt-retwt (baseline dlgn - baseline retina)
plotted <- as.data.frame(pair_mtrx[, c("normdlgn_vs_normret", "wt_dlgnret")])
## Error in as.data.frame(pair_mtrx[, c("normdlgn_vs_normret", "wt_dlgnret")]): object 'pair_mtrx' not found
red_idx <- normret > 0
## Note that this order is opposite of above.
plotted[, "color"] <- ifelse(red_idx, "black", "red")
## Error in plotted[, "color"] <- ifelse(red_idx, "black", "red"): object 'plotted' not found
plotted[["label"]] <- rownames(plotted)
## Error in rownames(plotted): object 'plotted' not found
axon_trans_ret_target <- ggplot(
  plotted,
  aes_string(x="normdlgn_vs_normret", y="wt_dlgnret", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "normdlgn_vs_normret", y = "wt_dlgnret", : object 'plotted' not found
axon_trans_ret_target
## Error in eval(expr, envir, enclos): object 'axon_trans_ret_target' not found
axon_trans_ret_target_clicky <- ggplotly_url(
  axon_trans_ret_target, "axon_trans_ret_target.html", title="Axon translatome, retina target.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(axon_trans_ret_target, "axon_trans_ret_target.html", : object 'axon_trans_ret_target' not found

2.7 DLGN translatome wrt. Retina translatome

plotted <- as.data.frame(pair_mtrx[, c("normret", "normdlgn")])
## Error in as.data.frame(pair_mtrx[, c("normret", "normdlgn")]): object 'pair_mtrx' not found
plotted[["label"]] <- rownames(plotted)
## Error in rownames(plotted): object 'plotted' not found
plotted[["color"]] <- "black"
## Error in plotted[["color"]] <- "black": object 'plotted' not found
normret_normdlgn <- ggplot(
  plotted,
  aes_string(x="normret", y="normdlgn", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "normret", y = "normdlgn", label = "label", : object 'plotted' not found
normret_normdlgn
## Error in eval(expr, envir, enclos): object 'normret_normdlgn' not found
normret_normdlgn_clicky <- ggplotly_url(
  normret_normdlgn, "normret_normdlgn.html", title="Normal retina translatome vs normal dlgn translatome.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(normret_normdlgn, "normret_normdlgn.html", title = "Normal retina translatome vs normal dlgn translatome.", : object 'normret_normdlgn' not found

2.8 koret kodlgn

plotted <- as.data.frame(pair_mtrx[, c("koret", "kodlgn")])
plotted[["label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
koret_kodlgn <- ggplot(
  plotted,
  aes_string(x="koret", y="kodlgn", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
koret_kodlgn
koret_kodlgn_clicky <- ggplotly_url(
  koret_kodlgn, "koret_kodlgn.html", title="KO retina translatome vs KO dlgn translatome.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
``

## KO Retina vs KO SCN
## Error: attempt to use zero-length variable name
plotted <- as.data.frame(pair_mtrx[, c("koret", "koscn")])
## Error in as.data.frame(pair_mtrx[, c("koret", "koscn")]): object 'pair_mtrx' not found
plotted[["label"]] <- rownames(plotted)
## Error in rownames(plotted): object 'plotted' not found
plotted[["color"]] <- "black"
## Error in plotted[["color"]] <- "black": object 'plotted' not found
koret_koscn <- ggplot(
  plotted,
  aes_string(x="koret", y="koscn", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "koret", y = "koscn", label = "label", : object 'plotted' not found
koret_koscn
## Error in eval(expr, envir, enclos): object 'koret_koscn' not found
koret_koscn_clicky <- ggplotly_url(
  koret_koscn, "koret_koscn.html", title="KO retina translatome vs KO scn translatome.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(koret_koscn, "koret_koscn.html", title = "KO retina translatome vs KO scn translatome.", : object 'koret_koscn' not found

2.9 Norm dlgn ko dlgn

plotted <- as.data.frame(pair_mtrx[, c("normdlgn", "kodlgn")])
## Error in as.data.frame(pair_mtrx[, c("normdlgn", "kodlgn")]): object 'pair_mtrx' not found
plotted[["label"]] <- rownames(plotted)
## Error in rownames(plotted): object 'plotted' not found
plotted[["color"]] <- "black"
## Error in plotted[["color"]] <- "black": object 'plotted' not found
normdlgn_kodlgn <- ggplot(
  plotted,
  aes_string(x="normdlgn", y="kodlgn", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "normdlgn", y = "kodlgn", label = "label", : object 'plotted' not found
normdlgn_kodlgn
## Error in eval(expr, envir, enclos): object 'normdlgn_kodlgn' not found
normdlgn_kodlgn_clicky <- ggplotly_url(
  normdlgn_kodlgn, "normdlgn_kodlgn.html", title="Normal dlgn translatome vs KO dlgn translatome.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(normdlgn_kodlgn, "normdlgn_kodlgn.html", title = "Normal dlgn translatome vs KO dlgn translatome.", : object 'normdlgn_kodlgn' not found

2.10 Norm ret vs ko ret

plotted <- as.data.frame(pair_mtrx[, c("normret", "koret")])
## Error in as.data.frame(pair_mtrx[, c("normret", "koret")]): object 'pair_mtrx' not found
plotted[["label"]] <- rownames(plotted)
## Error in rownames(plotted): object 'plotted' not found
plotted[["color"]] <- "black"
## Error in plotted[["color"]] <- "black": object 'plotted' not found
normret_koret <- ggplot(
  plotted,
  aes_string(x="normret", y="koret", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "normret", y = "koret", label = "label", : object 'plotted' not found
normret_koret
## Error in eval(expr, envir, enclos): object 'normret_koret' not found
normret_koret_clicky <- ggplotly_url(
  normret_koret, "normret_koret.html", title="Normal retina translatome vs KO retina translatome.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(normret_koret, "normret_koret.html", title = "Normal retina translatome vs KO retina translatome.", : object 'normret_koret' not found

2.11 ror

plotted <- as.data.frame(pair_mtrx[, c("normret_vs_normdlgn", "koret_vs_kodlgn")])
## Error in as.data.frame(pair_mtrx[, c("normret_vs_normdlgn", "koret_vs_kodlgn")]): object 'pair_mtrx' not found
plotted[["label"]] <- rownames(plotted)
## Error in rownames(plotted): object 'plotted' not found
plotted[["color"]] <- "black"
## Error in plotted[["color"]] <- "black": object 'plotted' not found
normal_ko_axon_translatome <- ggplot(
  plotted,
  aes_string(x="normret_vs_normdlgn", y="koret_vs_kodlgn", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "normret_vs_normdlgn", y = "koret_vs_kodlgn", : object 'plotted' not found
normal_ko_axon_translatome
## Error in eval(expr, envir, enclos): object 'normal_ko_axon_translatome' not found
normal_ko_axon_translatome_clicky <- ggplotly_url(
  normal_ko_axon_translatome, "normal_ko_axon_translatome.html",
  title="Normal retina ko axon translatome.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(normal_ko_axon_translatome, "normal_ko_axon_translatome.html", : object 'normal_ko_axon_translatome' not found

3 Translatome level comparisons

3.1 Make a generic plotter for this stuff.

translatome_plotter <- function(pair_mtrx, x_axis="koret", y_axis="koscn", lfc=NULL, fc=NULL, linewidth=1.5,
                                up_color="red", down_color="#098534", line_color="#fcba03", alpha=0.5,
                                x_limit=c(-7, 7), y_limit=c(-7, 7)) {
  if (is.null(fc) & is.null(lfc)) {
    message("No fc/lfc was provided, defaulting to 10 fold.")
    lfc <- log2(10)
  } else if (is.null(lfc)) {
    lfc <- log2(fc)
  }
  plotted <- as.data.frame(pair_mtrx[, c(x_axis, y_axis)])
  na_idx <- is.na(plotted)
  plotted[na_idx] <- 0
  up_idx <- plotted[, y_axis] - plotted[, x_axis] >= lfc
  down_idx <- plotted[, y_axis] - plotted[, x_axis] <= (lfc * -1)
  up_genes <- rownames(plotted)[up_idx]
  down_genes <- rownames(plotted)[down_idx]
  ## Note that this order is opposite of above.
  plotted[["color"]] <- "black"
  plotted[up_idx, "color"] <- up_color
  plotted[down_idx, "color"] <- down_color
  plotted[["color"]] <- as.factor(plotted[["color"]])
  levels(plotted[["color"]]) <- c("black", up_color, down_color)
  plt <- ggplot2::ggplot(plotted, aes_string(x=x_axis,
                                             y=y_axis,
                                             color="color")) +
    geom_abline(size=1.1, slope=1, intercept=lfc, color="orange") +
    geom_abline(size=1.1, slope=1, intercept=(-1 * lfc), color="orange") +
    scale_x_continuous(limits=c(x_limit)) +
    scale_y_continuous(limits=c(y_limit)) +
    geom_point(alpha=alpha) +
    scale_color_manual(values=c(down_color, "black", up_color))
  retlist <- list(
    "mtrx" = plotted,
    "ups" = up_genes,
    "downs" = down_genes,
    "plot" = plt)
  return(retlist)
}
  1. X-axis: Always retina.
  2. Y-axis: Always a target tissue.

First plot: KO scn translatome on y axis vs. KO retina translatome on x axis.

3.2 Scn knockout translatome vs retina knockout translatome.

scnko_wrt_retko_translatome <- translatome_plotter(pair_mtrx)
## No fc/lfc was provided, defaulting to 10 fold.
## Error in as.data.frame(pair_mtrx[, c(x_axis, y_axis)]): object 'pair_mtrx' not found
scnko_wrt_retko_translatome$plot
## Error in eval(expr, envir, enclos): object 'scnko_wrt_retko_translatome' not found
scnko_wrt_retko_up_go <- simple_gprofiler(sig_genes=scnko_wrt_retko_translatome$ups,
                                          species="mmusculus")
## Error in simple_gprofiler(sig_genes = scnko_wrt_retko_translatome$ups, : object 'scnko_wrt_retko_translatome' not found
scnko_wrt_retko_down_go <- simple_gprofiler(sig_genes=scnko_wrt_retko_translatome$downs,
                                            species="mmusculus")
## Error in simple_gprofiler(sig_genes = scnko_wrt_retko_translatome$downs, : object 'scnko_wrt_retko_translatome' not found
scnko_wrt_retko_down_go$go
## Error in eval(expr, envir, enclos): object 'scnko_wrt_retko_down_go' not found
scnko_wrt_retko_down_go$kegg
## Error in eval(expr, envir, enclos): object 'scnko_wrt_retko_down_go' not found
scnko_wrt_retko_down_go$corum
## Error in eval(expr, envir, enclos): object 'scnko_wrt_retko_down_go' not found

3.3 dlgn normal translatome vs retina normal translatome.

dlgnnorm_wrt_retnorm_translatome <- translatome_plotter(pair_mtrx,
                                                        x_axis="normret", y_axis="normdlgn")
## No fc/lfc was provided, defaulting to 10 fold.
## Error in as.data.frame(pair_mtrx[, c(x_axis, y_axis)]): object 'pair_mtrx' not found
dlgnnorm_wrt_retnorm_translatome$plot
## Error in eval(expr, envir, enclos): object 'dlgnnorm_wrt_retnorm_translatome' not found
dlgnnorm_wrt_retnorm_up_go <- simple_gprofiler(sig_genes=dlgnnorm_wrt_retnorm_translatome$ups,
                                               species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnnorm_wrt_retnorm_translatome$ups, : object 'dlgnnorm_wrt_retnorm_translatome' not found
dlgnnorm_wrt_retnorm_down_go <- simple_gprofiler(sig_genes=dlgnnorm_wrt_retnorm_translatome$downs,
                                                 species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnnorm_wrt_retnorm_translatome$downs, : object 'dlgnnorm_wrt_retnorm_translatome' not found
dlgnnorm_wrt_retnorm_down_go$go
## Error in eval(expr, envir, enclos): object 'dlgnnorm_wrt_retnorm_down_go' not found
dlgnnorm_wrt_retnorm_down_go$pvalue_plots$bpp_plot_over
## Error in eval(expr, envir, enclos): object 'dlgnnorm_wrt_retnorm_down_go' not found
dlgnnorm_wrt_retnorm_down_go$pvalue_plots$ccp_plot_over
## Error in eval(expr, envir, enclos): object 'dlgnnorm_wrt_retnorm_down_go' not found

3.4 dlgn knockout translatome vs retina knockout translatome.

dlgnko_wrt_retko_translatome <- translatome_plotter(pair_mtrx,
                                                    x_axis="koret", y_axis="kodlgn")
## No fc/lfc was provided, defaulting to 10 fold.
## Error in as.data.frame(pair_mtrx[, c(x_axis, y_axis)]): object 'pair_mtrx' not found
dlgnko_wrt_retko_translatome$plot
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_translatome' not found
dlgnko_wrt_retko_up_go <- simple_gprofiler(sig_genes=dlgnko_wrt_retko_translatome$ups,
                                           species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnko_wrt_retko_translatome$ups, : object 'dlgnko_wrt_retko_translatome' not found
dlgnko_wrt_retko_up_go$go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_up_go' not found
dlgnko_wrt_retko_up_go$kegg
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_up_go' not found
dlgnko_wrt_retko_up_go$reac
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_up_go' not found
dlgnko_wrt_retko_down_go <- simple_gprofiler(sig_genes=dlgnko_wrt_retko_translatome$downs,
                                             species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnko_wrt_retko_translatome$downs, : object 'dlgnko_wrt_retko_translatome' not found
dlgnko_wrt_retko_down_go$go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_down_go' not found
dlgnko_wrt_retko_down_go$kegg
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_down_go' not found
dlgnko_wrt_retko_down_go$reac
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_down_go' not found

3.5 Normal dlgn translatome vs knockout dlgn translatome.

dlgnnorm_wrt_dlgnko_translatome <- translatome_plotter(pair_mtrx,
                                                       x_axis="normdlgn",
                                                       y_axis="kodlgn")
## No fc/lfc was provided, defaulting to 10 fold.
## Error in as.data.frame(pair_mtrx[, c(x_axis, y_axis)]): object 'pair_mtrx' not found
dlgnnorm_wrt_dlgnko_translatome$plot
## Error in eval(expr, envir, enclos): object 'dlgnnorm_wrt_dlgnko_translatome' not found
dlgnnorm_wrt_dlgnko_up_go <- simple_gprofiler(sig_genes=dlgnnorm_wrt_dlgnko_translatome$ups,
                                              species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnnorm_wrt_dlgnko_translatome$ups, : object 'dlgnnorm_wrt_dlgnko_translatome' not found
dlgnnorm_wrt_dlgnko_down_go <- simple_gprofiler(sig_genes=dlgnnorm_wrt_dlgnko_translatome$downs,
                                                species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnnorm_wrt_dlgnko_translatome$downs, : object 'dlgnnorm_wrt_dlgnko_translatome' not found
dlgnnorm_wrt_dlgnko_down_go$go
## Error in eval(expr, envir, enclos): object 'dlgnnorm_wrt_dlgnko_down_go' not found

3.6 dlgn knockout translatome vs. scn knockout translatome.

dlgnko_wrt_scnko_translatome <- translatome_plotter(pair_mtrx,
                                                    x_axis="kodlgn",
                                                    y_axis="koscn")
## No fc/lfc was provided, defaulting to 10 fold.
## Error in as.data.frame(pair_mtrx[, c(x_axis, y_axis)]): object 'pair_mtrx' not found
dlgnko_wrt_scnko_translatome$plot
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_scnko_translatome' not found
dlgnko_wrt_scnko_up_go <- simple_gprofiler(sig_genes=dlgnko_wrt_scnko_translatome$ups,
                                           species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnko_wrt_scnko_translatome$ups, : object 'dlgnko_wrt_scnko_translatome' not found
dlgnko_wrt_scnko_up_go$go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_scnko_up_go' not found
dlgnko_wrt_scnko_up_go$pvalue_plots$bpp_plot_over
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_scnko_up_go' not found
dlgnko_wrt_scnko_down_go <- simple_gprofiler(sig_genes=dlgnko_wrt_scnko_translatome$downs,
                                             species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnko_wrt_scnko_translatome$downs, : object 'dlgnko_wrt_scnko_translatome' not found
dlgnko_wrt_scnko_down_go$go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_scnko_down_go' not found

4 Some pictures

As I understand it, there is some interest in an ontology search using the ratio of ratios.

ror <- normko_retdlgn
up_idx <- ror >= 1
down_idx <- ror <= -1
ror_up <- ror[up_idx]
length(ror_up)
## [1] 1877
ror_down <- ror[down_idx]
length(ror_down)
## [1] 476
ror_gprofiler_up <- simple_gprofiler(
  sig_genes=ror_up, species="mmusculus",
  excel=glue::glue("excel/{rundate}mm_ror_gpfoiler_up-v{ver}.xlsx"))
## Performing gProfiler GO search of 1877 genes against mmusculus.
## GO search found 271 hits.
## Performing gProfiler KEGG search of 1877 genes against mmusculus.
## KEGG search found 8 hits.
## Performing gProfiler REAC search of 1877 genes against mmusculus.
## REAC search found 11 hits.
## Performing gProfiler MI search of 1877 genes against mmusculus.
## MI search found 0 hits.
## Performing gProfiler TF search of 1877 genes against mmusculus.
## TF search found 376 hits.
## Performing gProfiler CORUM search of 1877 genes against mmusculus.
## CORUM search found 1 hits.
## Performing gProfiler HP search of 1877 genes against mmusculus.
## HP search found 5 hits.
## Writing data to: excel/20201008mm_ror_gpfoiler_up-v20200114.xlsx.
## Finished writing data.
ror_gprofiler_up$pvalue_plots$mfp_plot_over

ror_gprofiler_up$pvalue_plots$bpp_plot_over

ror_gprofiler_up$pvalue_plots$ccp_plot_over

ror_gprofiler_up$pvalue_plots$tf_plot_over
## Warning: Removed 1 rows containing missing values (geom_col).

ror_gprofiler_up$pvalue_plots$hp_plot_over

ror_gprofiler_down <- simple_gprofiler(
  sig_genes=ror_down, species="mmusculus",
  excel=glue::glue("excel/{rundate}mm_ror_gpfoiler_down-v{ver}.xlsx"))
## Performing gProfiler GO search of 476 genes against mmusculus.
## GO search found 82 hits.
## Performing gProfiler KEGG search of 476 genes against mmusculus.
## KEGG search found 7 hits.
## Performing gProfiler REAC search of 476 genes against mmusculus.
## REAC search found 2 hits.
## Performing gProfiler MI search of 476 genes against mmusculus.
## MI search found 0 hits.
## Performing gProfiler TF search of 476 genes against mmusculus.
## TF search found 6 hits.
## Performing gProfiler CORUM search of 476 genes against mmusculus.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 476 genes against mmusculus.
## HP search found 0 hits.
## Writing data to: excel/20201008mm_ror_gpfoiler_down-v20200114.xlsx.
## Finished writing data.
ror_gprofiler_down$pvalue_plots$mfp_plot_over

ror_gprofiler_down$pvalue_plots$bpp_plot_over

ror_gprofiler_down$pvalue_plots$reactome_plot_over

ror_gprofiler_down$pvalue_plots$ccp_plot_over

ror_gprofiler_down$pvalue_plots$tf_plot_over

pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
loadme(filename=this_save)
---
title: "M. musculus 3 cell types, 1 timepoint, 3 genotypes, and 1 replicate."
author: "atb abelew@gmail.com"
date: "`r Sys.Date()`"
output:
  html_document:
    code_download: true
    code_folding: show
    fig_caption: true
    fig_height: 7
    fig_width: 7
    highlight: tango
    keep_md: false
    mode: selfcontained
    number_sections: true
    self_contained: true
    theme: readable
    toc: true
    toc_float:
      collapsed: false
      smooth_scroll: false
  rmdformats::readthedown:
    code_download: true
    code_folding: show
    df_print: paged
    fig_caption: true
    fig_height: 7
    fig_width: 7
    highlight: tango
    width: 300
    keep_md: false
    mode: selfcontained
    toc_float: true
  BiocStyle::html_document:
    code_download: true
    code_folding: show
    fig_caption: true
    fig_height: 7
    fig_width: 7
    highlight: tango
    keep_md: false
    mode: selfcontained
    toc_float: true
---

<style type="text/css">
body, td {
  font-size: 16px;
}
code.r{
  font-size: 16px;
}
pre {
 font-size: 16px
}
</style>

```{r options, include=FALSE}
library("hpgltools")
tt <- devtools::load_all("/data/hpgltools")
knitr::opts_knit$set(width=120,
                     progress=TRUE,
                     verbose=TRUE,
                     echo=TRUE)
knitr::opts_chunk$set(error=TRUE,
                      dpi=96)
old_options <- options(digits=4,
                       stringsAsFactors=FALSE,
                       knitr.duplicate.label="allow")
ggplot2::theme_set(ggplot2::theme_bw(base_size=10))
rundate <- format(Sys.Date(), format="%Y%m%d")
previous_file <- "undefined.Rmd"
ver <- "20200114"

##tmp <- sm(loadme(filename=paste0(gsub(pattern="\\.Rmd", replace="", x=previous_file), "-v", ver, ".rda.xz")))
rmd_file <- "index.Rmd"
```

# M. musculus

This will be a very minimal analysis until we get some replicates.

## Annotations

I am using mm38_100.

```{r annotations}
## My load_biomart_annotations() function defaults to human, so that will be quick.
mm_annot <- load_biomart_annotations(species="mmusculus")
mm_annot <- mm_annot[["annotation"]]
mm_annot[["txid"]] <- paste0(mm_annot[["ensembl_transcript_id"]], ".", mm_annot[["version"]])
rownames(mm_annot) <- make.names(mm_annot[["ensembl_gene_id"]], unique=TRUE)

tx_gene_map <- mm_annot[, c("txid", "ensembl_gene_id")]
```

So, I now have a table of mouse annotations.

## Metadata

I am going to write a quick sample sheet in the current working directory called
'all_samples.xlsx' and put the names of the count tables in it.

## Create expressionsets

Here I combine the metadata, count data, and annotations.

It is worth noting that the gene IDs from htseq-count probably do not match the
annotations retrieved because they are likely exon-based rather than gene
based.  This is not really a problem, but don't forget it!

## Hisat2 expressionset

```{r hisat2}
hisat_annot <- mm_annot
rownames(hisat_annot) <- paste0("gene:", rownames(hisat_annot))
mm38_hisat <- create_expt("sample_sheets/all_samples.xlsx",
                          gene_info=hisat_annot)

plot_libsize(mm38_hisat)$plot
mm38_first <- normalize_expt(mm38_hisat, filter=TRUE, convert="cpm", norm="quant", transform="log2")
pp(file="pca_norm.png", image=plot_pca(mm38_first)$plot)

mm38_norm <- normalize_expt(mm38_hisat, filter=TRUE, convert="cpm", batch="svaseq")
mm38_norm <- normalize_expt(mm38_norm, transform="log2")
pp(file="pca_sva.png", image=plot_pca(mm38_norm)$plot)

new <- subset_expt(mm38_hisat, subset="batch=='b202009'")
new_norm <- normalize_expt(new, filter=TRUE, convert="cpm", norm="quant", transform="log2")
plot_pca(new_norm)$plot
```

```{r expt}
mm38_salmon <- sm(create_expt("sample_sheets/all_samples.xlsx", tx_gene_map=tx_gene_map,
                              gene_info=mm_annot, file_column="salmonfile"))

mmtx_annot <- mm_annot
rownames(mmtx_annot) <- mm_annot[["txid"]]
mm38_saltx <- sm(create_expt("sample_sheets/all_samples.xlsx",
                             gene_info=mmtx_annot, file_column="salmonfile"))
```

## Query expressionsets

In this block I will calculate all the diagnostic plots, but not show them.  I
will show them next with a little annotation.

I will leave the output for the first of each invocation and silence it for the second.

### Initial salmon plots

```{r query_salmon, fig.show="hide"}
mm38_plots_sa <- sm(graph_metrics(mm38_salmon))

mm38_norm_sa <- normalize_expt(mm38_salmon, norm="quant", convert="cpm",
                            transform="log2", filter=TRUE)

mm38n_plots_sa <- sm(graph_metrics(mm38_norm_sa))
```

```{r show_plots_salmon}
mm38_plots_sa$legend
mm38_plots_sa$libsize
mm38_plots_sa$nonzero
mm38n_plots_sa$density
mm38n_plots_sa$pc_plot
```

### Initial hisat2 plots

```{r query_hisat, fig.show="hide"}
mm38_plots_hi <- sm(graph_metrics(mm38_hisat))

mm38_norm_hi <- normalize_expt(mm38_hisat, norm="quant", convert="cpm",
                               transform="log2", filter=TRUE)

mm38n_plots_hi <- sm(graph_metrics(mm38_norm_hi))
```

```{r show_plots_hisat}
mm38_plots_hi$libsize
mm38_plots_hi$nonzero
mm38n_plots_hi$density
mm38n_plots_hi$pc_plot
```

## Do a simple DE

The only interesting DE I see in this is to compare the retinas to the dlgns.
I can treat them as replicates and compare.

These differential expression analyses are _EXPLICITLY_ _NOT_ what you care
about.  However, they are useful for two purposes:

1.  Seeing that the three tissue types are indeed different.
2.  Setting up the table of results with appropriate rows/columns of (rows)genes
    and (columns) annotations.  We will therefore add to these tables the
    results of the expression analyses that you actually do care about.

When we receive full replicate sets, this cheater method of encapsulating the
data will not longer be required.

### With salmon

```{r de_sa, fig.show="hide"}
mm_sa <- set_expt_conditions(mm38_salmon, fact="celltype")
mm_norm_sa <- sm(normalize_expt(mm_sa, convert="rpkm", transform="log2", column="cds_length"))
plot_pca(mm_norm_sa)$plot

mm_de_sa <- all_pairwise(mm_sa, model_batch=FALSE)
```

### With hisat2

```{r de_hi, fig.show="hide"}
## mm_hi <- set_expt_conditions(mm38_hisat, fact="celltype")
mm_hi <- mm38_hisat
mm_norm_hi <- sm(normalize_expt(mm_hi, convert="rpkm", transform="log2", filter=TRUE,
                                column="cds_length", batch="svaseq"))
plot_pca(mm_norm_hi)$plot

keepers <- list(
  "wt_dlgnret" = c("wt_dlgn", "wt_retina"),
  "wt_scnret" = c("wt_scn", "wt_retina"),
  "wt_dlgnscn" = c("wt_dlgn", "wt_scn"),
  "normret" = c("het_retina", "wt_retina"),
  "koret" = c("ko_retina", "wt_retina"),
  "normscn" = c("het_scn", "wt_scn"),
  "koscn" = c("ko_scn", "wt_scn"),
  "normdlgn" = c("het_dlgn", "wt_dlgn"),
  "kodlgn" = c("ko_dlgn", "wt_dlgn"),
  "normdlgn_vs_normret" = c("normdlgn", "normret"),
  "normscn_vs_normret" = c("normscn", "normret"),
  "kodlgn_vs_koret" = c("kodlgn", "koret"),
  "koscn_vs_koret" = c("koscn", "koret"),
  "koscn_vs_kodlgn" = c("koscn", "kodlgn"),
  "koret_vs_normret" = c("ko_retina", "het_retina"),
  "koscn_vs_normscn" = c("ko_scn", "het_scn"),
  "kodlgn_vs_normdlgn" = c("ko_dlgn", "het_dlgn"),
  "normko_retdlgn" = c("normdlgn_vs_normret", "kodlgn_vs_koret"),
  "normko_retscn" = c("normscn_vs_normret", "koscn_vs_koret"))
extras <- "normdlgn_vs_normret = (het_dlgn-wt_dlgn)-(het_retina-wt_retina),
           normscn_vs_normret = (het_scn-wt_scn)-(het_retina-wt_retina),
           kodlgn_vs_koret = (ko_dlgn-wt_dlgn)-(ko_retina-wt_retina),
           koscn_vs_koret = (ko_scn-wt_scn)-(ko_retina-wt_retina),
           koscn_vs_kodlgn = (ko_scn-wt_scn)-(ko_dlgn-wt_dlgn),
           normko_retdlgn = ((het_dlgn-wt_dlgn)-(het_retina-wt_retina)) - ((ko_dlgn-wt_dlgn)-(ko_retina-wt_retina)),
           normko_retscn = ((het_scn-wt_scn)-(het_retina-wt_retina)) - ((ko_scn-wt_scn)-(ko_retina-wt_retina))"
mm_filt <- normalize_expt(mm_hi, filter=TRUE)
mm_test <- limma_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extras)
mm_test <- deseq_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extras)
mm_test <- edger_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extras)
mm_test <- ebseq_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extras)
mm_de_hi <- sm(all_pairwise(mm_filt, model_batch="svaseq", parallel=FALSE, extra_contrasts=extras))
mm_de_tables <- combine_de_tables(mm_de_hi, excel="excel/testing_202010.xlsx")
```

## Set up for initial analysis

Until we get full replicates, I will do simple subtractions.

### Term definition

In an attempt to keep some clarity in the terms used, I want to define them
now.  There are three contexts in which we will consider the data:

1.  The individual sample type.  When considering individual samples, I will use
    three terms in this and only this context: wild-type (wt), het, and mut.

2.  The individual translatome.  These are defines as something / baseline.  I
    will exclusively call the wt samples 'baseline' when speaking in this
    context.  I will exclusively state 'normal' when referring to het / wt
    samples, and I will state 'ko' when referring to mut / wt samples in the
    translatome context.

3.  Translatome vs. translatome.  Whenever comparing translatomes, I will use
    the names as in #2 and always put the numerator first when writing the name
    of a comparison.

The most complex example of the above nomenclature is:

"normko_retdlgn is defined as normret_vs_normdlgn - koret_vs_kodlgn"

This states we are examining at the translatome context:
   (norm(retina translatome) - norm(dlgn translatome)) -
   (ko(retina translatome) - ko(dlgn translatome))

Which in turn is synonymous to the following at the sample context:
  ((rethet - retwt)  -  (dlgnhet - dlgnwt))  -
  ((retko - retwt)  -  (dlgnko - dlgnwt))

Now let us associate the various variable names with the appropriate samples:

```{r sample_names}
dlgnwt <- "iprgc_01"
retwt <- "iprgc_02"
scnwt <- "iprgc_03"

dlgnhet <- "iprgc_04"
rethet <- "iprgc_05"
scnhet <- NULL  ## Does not yet exist.

dlgnmut <- "iprgc_06"
retmut <- "iprgc_07"
scnmut <- "iprgc_08"
```

Give these variable names, now lets associate columns of the expression data
with them.  These are at the sample context, so the appropriate names are:
'wt', 'het', and 'mut'.  In each case I will prefix the genotype with the tissue
type: 'ret', 'dlgn', and 'scn'.  Thus 'retwt' refers to the sample used
to calculate the translatome retina baseline; in contrast 'dlgnmut' is the
sample which provides the dlgn knockout.

```{r sample_columns}
## Sample context
mm38_norm <- mm_norm_sa
dlgnwt <- exprs(mm38_norm)[, dlgnwt]
retwt <- exprs(mm38_norm)[, retwt]
scnwt <- exprs(mm38_norm)[, scnwt]
dlgnhet <- exprs(mm38_norm)[, dlgnhet]
rethet <- exprs(mm38_norm)[, rethet]
dlgnmut <- exprs(mm38_norm)[, dlgnmut]
retmut <- exprs(mm38_norm)[, retmut]
scnmut <- exprs(mm38_norm)[, scnmut]
```

Each of the above 8 variables provides 1 column of information. We have 3
baseline comparisons available to us.  In each of these we compare one wt
sample to another.

```{r baseline_comparisons}
## Baseline comparisons
wt_dlgnret <- dlgnwt - retwt
wt_scnret <- scnwt - retwt
wt_dlgnscn <- dlgnwt - scnwt
```

Simultaneously, we have 5 available translatomes.  This are provided by
comparing each het or mut to the associated wt.  These will therefore receive
names: 'norm' and 'ko' instead of 'het' and 'mut'.

```{r translatomes}
## Translatome context
normret <- rethet - retwt
koret <- retmut - retwt
koscn <- scnmut - scnwt
normdlgn <- dlgnhet - dlgnwt
kodlgn <- dlgnmut - dlgnwt
```

Given these translatomes, there are a few contrasts of likely interest.  These
are performed by comparing the relevant translatomes.

Will will split these into 4 separate categories:
het vs het, ko vs ko, ko vs het, and ratio vs ratio.

Finally, note that we are being explicitly redundant in these definitions.  I am
making variable names for both the a/b ratio and the b/a ratio.  Thus we have
some redundantly redundant (haha) flexibility when deciding on what we want to plot.

```{r norm_vs_norm}
## norm vs norm
normdlgn_vs_normret <- normdlgn - normret
normret_vs_normdlgn <- normret - normdlgn
```

```{r ko_vs_ko}
## ko vs ko
koret_vs_kodlgn <- koret - kodlgn
kodlgn_vs_koret <- kodlgn - koret

koret_vs_koscn <- koret - koscn
koscn_vs_koret <- koscn - koret

kodlgn_vs_koscn <- kodlgn - koscn
koscn_vs_kodlgn <- koscn - kodlgn
```

On the other hand, I am assuming we always want the normals as denominators and
kos as numerators.

```{r ko_vs_norm}
## ko vs norm
koret_vs_normret <- koret - normret

kodlgn_vs_normdlgn <- kodlgn - normdlgn
```

Finally, here is the ratio of ratios example I printed above:

I named it 'normko_retdlgn' in an attempt to make clear that it is actually:
 (normret/normdlgn)/(koret/kodlgn)

or stated differently: "norm divided by ko for ret divided by dlgn."

```{r ror}
## ratio of ratios
normko_retdlgn <- normret_vs_normdlgn - koret_vs_kodlgn
```

## Define a matrix of these values.

My matrix of data will now contain 1 column for each of the above 27
samples/comparisons.

```{r matrix_of_values}
pair_mtrx <- cbind(
  ## Individual samples
  dlgnwt, retwt, scnwt, dlgnhet, rethet, dlgnmut, retmut, scnmut,
  ## Baseline comparisons
  wt_dlgnret, wt_scnret, wt_dlgnscn,
  ## Baseline subtractions
  normdlgn, normret, kodlgn, koret, koscn,
  ## het_vs_het, of which there is only 1 because we do not have hetscn
  normdlgn_vs_normret, normret_vs_normdlgn,
  ## ko_vs_ko, of which we have 3
  koret_vs_kodlgn, kodlgn_vs_koret,
  koret_vs_koscn, koscn_vs_koret,
  kodlgn_vs_koscn, koscn_vs_kodlgn,
  ## ko_vs_het, 3 including one getting around missing hetscn
  koret_vs_normret, kodlgn_vs_normdlgn,
  ## ratio of ratios
  normko_retdlgn)

  ## Baseline subtractions
  normdlgn, normret, kodlgn, koret, koscn,
  ## het_vs_het, of which there is only 1 because we do not have hetscn
  normdlgn_vs_normret, normret_vs_normdlgn,
  ## ko_vs_ko, of which we have 3
  koret_vs_kodlgn, kodlgn_vs_koret,
  koret_vs_koscn, koscn_vs_koret,
  kodlgn_vs_koscn, koscn_vs_kodlgn,
  ## ko_vs_het, 3 including one getting around missing hetscn
  koret_vs_normret, kodlgn_vs_normdlgn,
  ## ratio of ratios
  normko_retdlgn)
  )
```

## Cutoffs

I am not sure if we will use these indexes, but I am writing these out as
subsets of genes to look at.  These indexes are stating that, given a cutoff
(0), we want to look at only the genes which have higher x / baseline values
than the cutoff.


```{r cutoffs}
## Queries about gene subsets.
## These are all in the context of translatomes.
cutoff <- 0
ret_kept_idx <- normret > cutoff & koret > cutoff
scn_kept_idx <- koscn > cutoff
dlgn_kept_idx <- normdlgn > cutoff & kodlgn > cutoff
ret_dlgn_kept_idx <- ret_kept_idx & dlgn_kept_idx
ret_scn_kept_idx <- ret_kept_idx & scn_kept_idx
dlgn_scn_kept_idx <- dlgn_kept_idx & scn_kept_idx

##normdlgn_vs_normret[!ret_dlgn_kept_idx] <- NA
##normret_vs_normdlgn[!ret_dlgn_kept_idx] <- NA
##koret_vs_kodlgn[!ret_dlgn_kept_idx] <- NA
##kodlgn_vs_koret[!ret_dlgn_kept_idx] <- NA
##koret_vs_koscn[!ret_scn_kept_idx] <- NA
##koscn_vs_koret[!ret_scn_kept_idx] <- NA
##kodlgn_vs_koscn[!dlgn_scn_kept_idx] <- NA
##koscn_vs_kodlgn[!dlgn_scn_kept_idx] <- NA
##koret_vs_normret[!ret_kept_idx] <- NA
##kodlgn_vs_normdlgn[!dlgn_kept_idx] <- NA
##normko_retdlgn <- normko_retdlgn[!ret_dlgn_kept_idx] <- NA
```

## Add the matrix to the differential expression

I will use my function combine_de_tables() to add this information to my
existing annotation data along with the results from the statistically valid
comparison of the three tissue types.

```{r add_matrix_de, fig.show="hide"}
mm_tables <- sm(combine_de_tables(
  mm_de_sa, extra_annot=pair_mtrx,
  excel=glue::glue("excel/{rundate}mm_salmon_tables-v{ver}.xlsx")))
```

# Plots of interesting comparisons

## Retina, het vs. wt.

```{r ret_hetwt_plot}
## Put retina baseline on y axis as black, retina het on x axis as black.
## Then recolor a subset of these as red, the reds are when normret > 0
library(ggplot2)

plotted <- as.data.frame(pair_mtrx[, c("rethet", "retwt")])
red_idx <- normret > 0
plotted[, "color"] <- ifelse(red_idx, "red", "black")
plotted[["label"]] <- rownames(plotted)
ret_hetwt <- ggplot(
  plotted,
  aes_string(x="rethet", y="retwt", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
ret_hetwt
ret_hetwt_clicky <- ggplotly_url(
  ret_hetwt, "ret_hetwt.html", title="Retina expression, het vs. wt.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
```

## Retina, mut vs wt.

```{r ret_hetmut_plot}
plotted <- as.data.frame(pair_mtrx[, c("retmut", "retwt")])
plotted[["label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
ret_mutwt <- ggplot(
  plotted,
  aes_string(x="retmut", y="retwt", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
ret_mutwt
ret_mutwt_clicky <- ggplotly_url(
  ret_mutwt, "ret_mutwt.html", title="Retina expression, mutant vs. wt.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
```

## dlgn, het vs. wt.

```{r dlgn_het_wt_plot}
plotted <- as.data.frame(pair_mtrx[, c("dlgnhet", "dlgnwt")])
plotted[["label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
dlgn_hetwt <- ggplot(
  plotted,
  aes_string(x="dlgnhet", y="dlgnwt", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
dlgn_hetwt
dlgn_hetwt_clicky <- ggplotly_url(
  dlgn_hetwt, "dlgn_hetwt.html", title="dlgn expression, het vs. wt.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
```

## dlgn, mut vs. wt.

```{r dlgn_mut_wt_plot}
plotted <- as.data.frame(pair_mtrx[, c("dlgnmut", "dlgnwt")])
plotted[["label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
dlgn_mutwt <- ggplot(
  plotted,
  aes_string(x="dlgnmut", y="dlgnwt", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
dlgn_mutwt
dlgn_mutwt_clicky <- ggplotly_url(
  dlgn_mutwt, "dlgn_mutwt.html", title="dlgn expression, mut vs. wt.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
```

## scn, mut vs. wt.

```{r scn_mut_wt_plot}
plotted <- as.data.frame(pair_mtrx[, c("scnmut", "scnwt")])
plotted[["label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
scn_mutwt <- ggplot(
  plotted,
  aes_string(x="scnmut", y="scnwt", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
scn_mutwt
scn_mutwt_clicky <- ggplotly_url(
  scn_mutwt, "scn_mutwt.html", title="scn expression, mut vs. wt.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
```

## Axon translatome specific

```{r axon_translatome_plot}
##  x-axis: normdlgn_vs_normret or normret_vs_normdlgn,
##              ^^^^
##  y-axis: dlgnwt-retwt (baseline dlgn - baseline retina)
plotted <- as.data.frame(pair_mtrx[, c("normdlgn_vs_normret", "wt_dlgnret")])
red_idx <- normret > 0
## Note that this order is opposite of above.
plotted[, "color"] <- ifelse(red_idx, "black", "red")
plotted[["label"]] <- rownames(plotted)
axon_trans_ret_target <- ggplot(
  plotted,
  aes_string(x="normdlgn_vs_normret", y="wt_dlgnret", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
axon_trans_ret_target
axon_trans_ret_target_clicky <- ggplotly_url(
  axon_trans_ret_target, "axon_trans_ret_target.html", title="Axon translatome, retina target.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
```

## DLGN translatome wrt. Retina translatome

```{r dlgn_translatome_plot}
plotted <- as.data.frame(pair_mtrx[, c("normret", "normdlgn")])
plotted[["label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
normret_normdlgn <- ggplot(
  plotted,
  aes_string(x="normret", y="normdlgn", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
normret_normdlgn
normret_normdlgn_clicky <- ggplotly_url(
  normret_normdlgn, "normret_normdlgn.html", title="Normal retina translatome vs normal dlgn translatome.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
```

## koret kodlgn

```{r koret_kodlgn_somethingsomethingplot}
plotted <- as.data.frame(pair_mtrx[, c("koret", "kodlgn")])
plotted[["label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
koret_kodlgn <- ggplot(
  plotted,
  aes_string(x="koret", y="kodlgn", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
koret_kodlgn
koret_kodlgn_clicky <- ggplotly_url(
  koret_kodlgn, "koret_kodlgn.html", title="KO retina translatome vs KO dlgn translatome.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
``

## KO Retina vs KO SCN

```{r koret_koscn_plot_rawr}
plotted <- as.data.frame(pair_mtrx[, c("koret", "koscn")])
plotted[["label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
koret_koscn <- ggplot(
  plotted,
  aes_string(x="koret", y="koscn", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
koret_koscn
koret_koscn_clicky <- ggplotly_url(
  koret_koscn, "koret_koscn.html", title="KO retina translatome vs KO scn translatome.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
```

## Norm dlgn ko dlgn

```{r normdlgn_kodlgn_plot}
plotted <- as.data.frame(pair_mtrx[, c("normdlgn", "kodlgn")])
plotted[["label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
normdlgn_kodlgn <- ggplot(
  plotted,
  aes_string(x="normdlgn", y="kodlgn", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
normdlgn_kodlgn
normdlgn_kodlgn_clicky <- ggplotly_url(
  normdlgn_kodlgn, "normdlgn_kodlgn.html", title="Normal dlgn translatome vs KO dlgn translatome.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
```

## Norm ret vs ko ret

```{r norm_ret_ko_ret_plot}
plotted <- as.data.frame(pair_mtrx[, c("normret", "koret")])
plotted[["label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
normret_koret <- ggplot(
  plotted,
  aes_string(x="normret", y="koret", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
normret_koret
normret_koret_clicky <- ggplotly_url(
  normret_koret, "normret_koret.html", title="Normal retina translatome vs KO retina translatome.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
```

## ror

```{r ror_plot}
plotted <- as.data.frame(pair_mtrx[, c("normret_vs_normdlgn", "koret_vs_kodlgn")])
plotted[["label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
normal_ko_axon_translatome <- ggplot(
  plotted,
  aes_string(x="normret_vs_normdlgn", y="koret_vs_kodlgn", label="label", color="color")) +
  geom_point(alpha=0.5) +
  scale_color_manual(values=c("black", "red"))
normal_ko_axon_translatome
normal_ko_axon_translatome_clicky <- ggplotly_url(
  normal_ko_axon_translatome, "normal_ko_axon_translatome.html",
  title="Normal retina ko axon translatome.",
  url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
```

# Translatome level comparisons

## Make a generic plotter for this stuff.

```{r generic_plotter}
translatome_plotter <- function(pair_mtrx, x_axis="koret", y_axis="koscn", lfc=NULL, fc=NULL, linewidth=1.5,
                                up_color="red", down_color="#098534", line_color="#fcba03", alpha=0.5,
                                x_limit=c(-7, 7), y_limit=c(-7, 7)) {
  if (is.null(fc) & is.null(lfc)) {
    message("No fc/lfc was provided, defaulting to 10 fold.")
    lfc <- log2(10)
  } else if (is.null(lfc)) {
    lfc <- log2(fc)
  }
  plotted <- as.data.frame(pair_mtrx[, c(x_axis, y_axis)])
  na_idx <- is.na(plotted)
  plotted[na_idx] <- 0
  up_idx <- plotted[, y_axis] - plotted[, x_axis] >= lfc
  down_idx <- plotted[, y_axis] - plotted[, x_axis] <= (lfc * -1)
  up_genes <- rownames(plotted)[up_idx]
  down_genes <- rownames(plotted)[down_idx]
  ## Note that this order is opposite of above.
  plotted[["color"]] <- "black"
  plotted[up_idx, "color"] <- up_color
  plotted[down_idx, "color"] <- down_color
  plotted[["color"]] <- as.factor(plotted[["color"]])
  levels(plotted[["color"]]) <- c("black", up_color, down_color)
  plt <- ggplot2::ggplot(plotted, aes_string(x=x_axis,
                                             y=y_axis,
                                             color="color")) +
    geom_abline(size=1.1, slope=1, intercept=lfc, color="orange") +
    geom_abline(size=1.1, slope=1, intercept=(-1 * lfc), color="orange") +
    scale_x_continuous(limits=c(x_limit)) +
    scale_y_continuous(limits=c(y_limit)) +
    geom_point(alpha=alpha) +
    scale_color_manual(values=c(down_color, "black", up_color))
  retlist <- list(
    "mtrx" = plotted,
    "ups" = up_genes,
    "downs" = down_genes,
    "plot" = plt)
  return(retlist)
}
```

1. X-axis: Always retina.
2. Y-axis: Always a target tissue.

First plot: KO scn translatome on y axis vs. KO retina translatome on x axis.

## Scn knockout translatome vs retina knockout translatome.

```{r scn_ko_wrt_retina_ko_translatome_gprofiler}
scnko_wrt_retko_translatome <- translatome_plotter(pair_mtrx)
scnko_wrt_retko_translatome$plot
scnko_wrt_retko_up_go <- simple_gprofiler(sig_genes=scnko_wrt_retko_translatome$ups,
                                          species="mmusculus")
scnko_wrt_retko_down_go <- simple_gprofiler(sig_genes=scnko_wrt_retko_translatome$downs,
                                            species="mmusculus")
scnko_wrt_retko_down_go$go
scnko_wrt_retko_down_go$kegg
scnko_wrt_retko_down_go$corum
```

## dlgn normal translatome vs retina normal translatome.

```{r dlgn_norm_translatome_vs_ret_normal_translatome_gprofiler}
dlgnnorm_wrt_retnorm_translatome <- translatome_plotter(pair_mtrx,
                                                        x_axis="normret", y_axis="normdlgn")
dlgnnorm_wrt_retnorm_translatome$plot
dlgnnorm_wrt_retnorm_up_go <- simple_gprofiler(sig_genes=dlgnnorm_wrt_retnorm_translatome$ups,
                                               species="mmusculus")
dlgnnorm_wrt_retnorm_down_go <- simple_gprofiler(sig_genes=dlgnnorm_wrt_retnorm_translatome$downs,
                                                 species="mmusculus")
dlgnnorm_wrt_retnorm_down_go$go
dlgnnorm_wrt_retnorm_down_go$pvalue_plots$bpp_plot_over
dlgnnorm_wrt_retnorm_down_go$pvalue_plots$ccp_plot_over
```

## dlgn knockout translatome vs retina knockout translatome.

```{r dlgn_ko_translatome_vs_retina_ko_translatome_gprofiler}
dlgnko_wrt_retko_translatome <- translatome_plotter(pair_mtrx,
                                                    x_axis="koret", y_axis="kodlgn")
dlgnko_wrt_retko_translatome$plot
dlgnko_wrt_retko_up_go <- simple_gprofiler(sig_genes=dlgnko_wrt_retko_translatome$ups,
                                           species="mmusculus")
dlgnko_wrt_retko_up_go$go
dlgnko_wrt_retko_up_go$kegg
dlgnko_wrt_retko_up_go$reac
dlgnko_wrt_retko_down_go <- simple_gprofiler(sig_genes=dlgnko_wrt_retko_translatome$downs,
                                             species="mmusculus")
dlgnko_wrt_retko_down_go$go
dlgnko_wrt_retko_down_go$kegg
dlgnko_wrt_retko_down_go$reac
```

## Normal dlgn translatome vs knockout dlgn translatome.

```{r norm_dlgn_translatome_vs_ko_dlgn_translatome_gfprofiler}
dlgnnorm_wrt_dlgnko_translatome <- translatome_plotter(pair_mtrx,
                                                       x_axis="normdlgn",
                                                       y_axis="kodlgn")
dlgnnorm_wrt_dlgnko_translatome$plot
dlgnnorm_wrt_dlgnko_up_go <- simple_gprofiler(sig_genes=dlgnnorm_wrt_dlgnko_translatome$ups,
                                              species="mmusculus")
dlgnnorm_wrt_dlgnko_down_go <- simple_gprofiler(sig_genes=dlgnnorm_wrt_dlgnko_translatome$downs,
                                                species="mmusculus")
dlgnnorm_wrt_dlgnko_down_go$go
```

## dlgn knockout translatome vs. scn knockout translatome.

```{r dlgn_ko_translatome_vs_scn_ko_translatome_gprofiler}
dlgnko_wrt_scnko_translatome <- translatome_plotter(pair_mtrx,
                                                    x_axis="kodlgn",
                                                    y_axis="koscn")
dlgnko_wrt_scnko_translatome$plot
dlgnko_wrt_scnko_up_go <- simple_gprofiler(sig_genes=dlgnko_wrt_scnko_translatome$ups,
                                           species="mmusculus")
dlgnko_wrt_scnko_up_go$go
dlgnko_wrt_scnko_up_go$pvalue_plots$bpp_plot_over
dlgnko_wrt_scnko_down_go <- simple_gprofiler(sig_genes=dlgnko_wrt_scnko_translatome$downs,
                                             species="mmusculus")
dlgnko_wrt_scnko_down_go$go
```

# Some pictures

As I understand it, there is some interest in an ontology search using the ratio of ratios.

```{r other_contrasts}
ror <- normko_retdlgn
up_idx <- ror >= 1
down_idx <- ror <= -1
ror_up <- ror[up_idx]
length(ror_up)
ror_down <- ror[down_idx]
length(ror_down)

ror_gprofiler_up <- simple_gprofiler(
  sig_genes=ror_up, species="mmusculus",
  excel=glue::glue("excel/{rundate}mm_ror_gpfoiler_up-v{ver}.xlsx"))
ror_gprofiler_up$pvalue_plots$mfp_plot_over
ror_gprofiler_up$pvalue_plots$bpp_plot_over
ror_gprofiler_up$pvalue_plots$ccp_plot_over
ror_gprofiler_up$pvalue_plots$tf_plot_over
ror_gprofiler_up$pvalue_plots$hp_plot_over

ror_gprofiler_down <- simple_gprofiler(
  sig_genes=ror_down, species="mmusculus",
  excel=glue::glue("excel/{rundate}mm_ror_gpfoiler_down-v{ver}.xlsx"))
ror_gprofiler_down$pvalue_plots$mfp_plot_over
ror_gprofiler_down$pvalue_plots$bpp_plot_over
ror_gprofiler_down$pvalue_plots$reactome_plot_over
ror_gprofiler_down$pvalue_plots$ccp_plot_over
ror_gprofiler_down$pvalue_plots$tf_plot_over
```

```{r saveme, eval=FALSE}
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
```


```{r loadme, eval=FALSE}
loadme(filename=this_save)
```
