This will be a very minimal analysis until we get some replicates.
I am using mm38_100.
## My load_biomart_annotations() function defaults to human, so that will be quick.
load_biomart_annotations(species="mmusculus") mm_annot <-
## The biomart annotations file already exists, loading from it.
mm_annot[["annotation"]]
mm_annot <-"txid"]] <- paste0(mm_annot[["ensembl_transcript_id"]], ".", mm_annot[["version"]])
mm_annot[[rownames(mm_annot) <- make.names(mm_annot[["ensembl_gene_id"]], unique=TRUE)
mm_annot[, c("txid", "ensembl_gene_id")] tx_gene_map <-
So, I now have a table of mouse annotations.
I am going to write a quick sample sheet in the current working directory called ‘all_samples.xlsx’ and put the names of the count tables in it.
Here I combine the metadata, count data, and annotations.
It is worth noting that the gene IDs from htseq-count probably do not match the annotations retrieved because they are likely exon-based rather than gene based. This is not really a problem, but don’t forget it!
mm_annot
hisat_annot <-rownames(hisat_annot) <- paste0("gene:", rownames(hisat_annot))
create_expt("sample_sheets/all_samples.xlsx",
mm38_hisat <-gene_info=hisat_annot)
## Reading the sample metadata.
## Dropped 1 rows from the sample metadata because they were blank.
## The sample definitions comprises: 23 rows(samples) and 24 columns(metadata fields).
## Reading count tables.
## Reading count files with read.table().
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_01/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_02/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_03/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_04/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_05/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_06/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_07/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_08/outputs/hisat2_mm38_95/r1_trimmed.count.xz contains 25788 rows and merges to 25788 rows.
## preprocessing/202009/01_wt_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/02_wt_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/03_wt_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/04_ko_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/05_ko_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/06_ko_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/07_het1_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/08_het2_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/09_het3_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/10_het1_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/11_het2_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/12_het3_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/13_het1_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/14_het2_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## preprocessing/202009/15_het3_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25788 rows.
## Finished reading count data.
## Matched 25371 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 25598 rows and 23 columns.
plot_libsize(mm38_hisat)$plot
normalize_expt(mm38_hisat, filter=TRUE, convert="cpm", norm="quant", transform="log2") mm38_first <-
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 11029 low-count genes (14569 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 192 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
pp(file="pca_norm.png", image=plot_pca(mm38_first)$plot)
## Writing the image to: pca_norm.png and calling dev.off().
normalize_expt(mm38_hisat, filter=TRUE, convert="cpm", batch="svaseq") mm38_norm <-
## This function will replace the expt$expressionset slot with:
## svaseq(cpm(cbcb(data)))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 11029 low-count genes (14569 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: not transforming the data.
## Step 5: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 306878 entries are x>1: 92%.
## batch_counts: Before batch/surrogate estimation, 3404 entries are x==0: 1%.
## batch_counts: Before batch/surrogate estimation, 24805 entries are 0<x<1: 7%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 2286 (1%) elements which are < 0 after batch correction.
## Setting low elements to zero.
normalize_expt(mm38_norm, transform="log2") mm38_norm <-
## This function will replace the expt$expressionset slot with:
## log2(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 2286 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
pp(file="pca_sva.png", image=plot_pca(mm38_norm)$plot)
## Writing the image to: pca_sva.png and calling dev.off().
subset_expt(mm38_hisat, subset="batch=='b202009'") new <-
## Using a subset expression.
## There were 23, now there are 15 samples.
normalize_expt(new, filter=TRUE, convert="cpm", norm="quant", transform="log2") new_norm <-
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 11363 low-count genes (14235 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 551 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
plot_pca(new_norm)$plot
sm(create_expt("sample_sheets/all_samples.xlsx", tx_gene_map=tx_gene_map,
mm38_salmon <-gene_info=mm_annot, file_column="salmonfile"))
mm_annot
mmtx_annot <-rownames(mmtx_annot) <- mm_annot[["txid"]]
sm(create_expt("sample_sheets/all_samples.xlsx",
mm38_saltx <-gene_info=mmtx_annot, file_column="salmonfile"))
In this block I will calculate all the diagnostic plots, but not show them. I will show them next with a little annotation.
I will leave the output for the first of each invocation and silence it for the second.
sm(graph_metrics(mm38_salmon)) mm38_plots_sa <-
normalize_expt(mm38_salmon, norm="quant", convert="cpm",
mm38_norm_sa <-transform="log2", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 2794 low-count genes (3970 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 1105 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
sm(graph_metrics(mm38_norm_sa)) mm38n_plots_sa <-
$legend mm38_plots_sa
$libsize mm38_plots_sa
$nonzero mm38_plots_sa
$density mm38n_plots_sa
$pc_plot mm38n_plots_sa
sm(graph_metrics(mm38_hisat)) mm38_plots_hi <-
normalize_expt(mm38_hisat, norm="quant", convert="cpm",
mm38_norm_hi <-transform="log2", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 11029 low-count genes (14569 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 192 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
sm(graph_metrics(mm38_norm_hi)) mm38n_plots_hi <-
$libsize mm38_plots_hi
$nonzero mm38_plots_hi
$density mm38n_plots_hi
$pc_plot mm38n_plots_hi
The only interesting DE I see in this is to compare the retinas to the dlgns. I can treat them as replicates and compare.
These differential expression analyses are EXPLICITLY NOT what you care about. However, they are useful for two purposes:
When we receive full replicate sets, this cheater method of encapsulating the data will not longer be required.
set_expt_conditions(mm38_salmon, fact="celltype")
mm_sa <- sm(normalize_expt(mm_sa, convert="rpkm", transform="log2", column="cds_length"))
mm_norm_sa <-plot_pca(mm_norm_sa)$plot
all_pairwise(mm_sa, model_batch=FALSE) mm_de_sa <-
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Assuming no batch in model for testing pca.
## Not putting labels on the PC plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
## mm_hi <- set_expt_conditions(mm38_hisat, fact="celltype")
mm38_hisat
mm_hi <- sm(normalize_expt(mm_hi, convert="rpkm", transform="log2", filter=TRUE,
mm_norm_hi <-column="cds_length", batch="svaseq"))
## Error in density.default(x, adjust = adj) : 'x' contains missing values
## Error in density.default(x, adjust = adj) : 'x' contains missing values
plot_pca(mm_norm_hi)$plot
list(
keepers <-"wt_dlgnret" = c("wt_dlgn", "wt_retina"),
"wt_scnret" = c("wt_scn", "wt_retina"),
"wt_dlgnscn" = c("wt_dlgn", "wt_scn"),
"normret" = c("het_retina", "wt_retina"),
"koret" = c("ko_retina", "wt_retina"),
"normscn" = c("het_scn", "wt_scn"),
"koscn" = c("ko_scn", "wt_scn"),
"normdlgn" = c("het_dlgn", "wt_dlgn"),
"kodlgn" = c("ko_dlgn", "wt_dlgn"),
"normdlgn_vs_normret" = c("normdlgn", "normret"),
"normscn_vs_normret" = c("normscn", "normret"),
"kodlgn_vs_koret" = c("kodlgn", "koret"),
"koscn_vs_koret" = c("koscn", "koret"),
"koscn_vs_kodlgn" = c("koscn", "kodlgn"),
"koret_vs_normret" = c("ko_retina", "het_retina"),
"koscn_vs_normscn" = c("ko_scn", "het_scn"),
"kodlgn_vs_normdlgn" = c("ko_dlgn", "het_dlgn"),
"normko_retdlgn" = c("normdlgn_vs_normret", "kodlgn_vs_koret"),
"normko_retscn" = c("normscn_vs_normret", "koscn_vs_koret"))
"normdlgn_vs_normret = (het_dlgn-wt_dlgn)-(het_retina-wt_retina),
extras <- normscn_vs_normret = (het_scn-wt_scn)-(het_retina-wt_retina),
kodlgn_vs_koret = (ko_dlgn-wt_dlgn)-(ko_retina-wt_retina),
koscn_vs_koret = (ko_scn-wt_scn)-(ko_retina-wt_retina),
koscn_vs_kodlgn = (ko_scn-wt_scn)-(ko_dlgn-wt_dlgn),
normko_retdlgn = ((het_dlgn-wt_dlgn)-(het_retina-wt_retina)) - ((ko_dlgn-wt_dlgn)-(ko_retina-wt_retina)),
normko_retscn = ((het_scn-wt_scn)-(het_retina-wt_retina)) - ((ko_scn-wt_scn)-(ko_retina-wt_retina))"
normalize_expt(mm_hi, filter=TRUE) mm_filt <-
## This function will replace the expt$expressionset slot with:
## cbcb(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 11029 low-count genes (14569 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
limma_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extras) mm_test <-
## Starting limma pairwise comparison.
## libsize was not specified, this parameter has profound effects on limma's result.
## Using the libsize from expt$best_libsize.
## Limma step 1/6: choosing model.
## Extracting surrogate estimates from svaseq and adding them to the model.
## batch_counts: Before batch/surrogate estimation, 329826 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 3404 entries are x==0: 1%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## Choosing the non-intercept containing model.
## Limma step 2/6: running limma::voom(), switch with the argument 'which_voom'.
## Using normalize.method=quantile for voom.
## Limma step 3/6: running lmFit with method: ls.
## Limma step 4/6: making and fitting contrasts with no intercept. (~ 0 + factors)
## Limma step 5/6: Running eBayes with robust=FALSE and trend=FALSE.
## Limma step 6/6: Writing limma outputs.
## Limma step 6/6: 1/43: Creating table: het_retina_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 2/43: Creating table: het_scn_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 3/43: Creating table: ko_dlgn_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 4/43: Creating table: ko_retina_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 5/43: Creating table: ko_scn_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 6/43: Creating table: wt_dlgn_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 7/43: Creating table: wt_retina_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 8/43: Creating table: wt_scn_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 9/43: Creating table: het_scn_vs_het_retina. Adjust=BH
## Limma step 6/6: 10/43: Creating table: ko_dlgn_vs_het_retina. Adjust=BH
## Limma step 6/6: 11/43: Creating table: ko_retina_vs_het_retina. Adjust=BH
## Limma step 6/6: 12/43: Creating table: ko_scn_vs_het_retina. Adjust=BH
## Limma step 6/6: 13/43: Creating table: wt_dlgn_vs_het_retina. Adjust=BH
## Limma step 6/6: 14/43: Creating table: wt_retina_vs_het_retina. Adjust=BH
## Limma step 6/6: 15/43: Creating table: wt_scn_vs_het_retina. Adjust=BH
## Limma step 6/6: 16/43: Creating table: ko_dlgn_vs_het_scn. Adjust=BH
## Limma step 6/6: 17/43: Creating table: ko_retina_vs_het_scn. Adjust=BH
## Limma step 6/6: 18/43: Creating table: ko_scn_vs_het_scn. Adjust=BH
## Limma step 6/6: 19/43: Creating table: wt_dlgn_vs_het_scn. Adjust=BH
## Limma step 6/6: 20/43: Creating table: wt_retina_vs_het_scn. Adjust=BH
## Limma step 6/6: 21/43: Creating table: wt_scn_vs_het_scn. Adjust=BH
## Limma step 6/6: 22/43: Creating table: ko_retina_vs_ko_dlgn. Adjust=BH
## Limma step 6/6: 23/43: Creating table: ko_scn_vs_ko_dlgn. Adjust=BH
## Limma step 6/6: 24/43: Creating table: wt_dlgn_vs_ko_dlgn. Adjust=BH
## Limma step 6/6: 25/43: Creating table: wt_retina_vs_ko_dlgn. Adjust=BH
## Limma step 6/6: 26/43: Creating table: wt_scn_vs_ko_dlgn. Adjust=BH
## Limma step 6/6: 27/43: Creating table: ko_scn_vs_ko_retina. Adjust=BH
## Limma step 6/6: 28/43: Creating table: wt_dlgn_vs_ko_retina. Adjust=BH
## Limma step 6/6: 29/43: Creating table: wt_retina_vs_ko_retina. Adjust=BH
## Limma step 6/6: 30/43: Creating table: wt_scn_vs_ko_retina. Adjust=BH
## Limma step 6/6: 31/43: Creating table: wt_dlgn_vs_ko_scn. Adjust=BH
## Limma step 6/6: 32/43: Creating table: wt_retina_vs_ko_scn. Adjust=BH
## Limma step 6/6: 33/43: Creating table: wt_scn_vs_ko_scn. Adjust=BH
## Limma step 6/6: 34/43: Creating table: wt_retina_vs_wt_dlgn. Adjust=BH
## Limma step 6/6: 35/43: Creating table: wt_scn_vs_wt_dlgn. Adjust=BH
## Limma step 6/6: 36/43: Creating table: wt_scn_vs_wt_retina. Adjust=BH
## Limma step 6/6: 37/43: Creating table: normdlgn_vs_normret. Adjust=BH
## Limma step 6/6: 38/43: Creating table: normscn_vs_normret. Adjust=BH
## Limma step 6/6: 39/43: Creating table: kodlgn_vs_koret. Adjust=BH
## Limma step 6/6: 40/43: Creating table: koscn_vs_koret. Adjust=BH
## Limma step 6/6: 41/43: Creating table: koscn_vs_kodlgn. Adjust=BH
## Limma step 6/6: 42/43: Creating table: normko_retdlgn. Adjust=BH
## Limma step 6/6: 43/43: Creating table: normko_retscn. Adjust=BH
## Limma step 6/6: 1/9: Creating table: het_dlgn. Adjust=BH
## Limma step 6/6: 2/9: Creating table: het_retina. Adjust=BH
## Limma step 6/6: 3/9: Creating table: het_scn. Adjust=BH
## Limma step 6/6: 4/9: Creating table: ko_dlgn. Adjust=BH
## Limma step 6/6: 5/9: Creating table: ko_retina. Adjust=BH
## Limma step 6/6: 6/9: Creating table: ko_scn. Adjust=BH
## Limma step 6/6: 7/9: Creating table: wt_dlgn. Adjust=BH
## Limma step 6/6: 8/9: Creating table: wt_retina. Adjust=BH
## Limma step 6/6: 9/9: Creating table: wt_scn. Adjust=BH
deseq_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extras) mm_test <-
## Hey you, use deseq2 pairwise.
## Starting DESeq2 pairwise comparisons.
## The data should be suitable for EdgeR/DESeq/EBSeq. If they freak out, check the state of the count table and ensure that it is in integer counts.
## Extracting surrogate estimates from svaseq and adding them to the model.
## batch_counts: Before batch/surrogate estimation, 329826 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 3404 entries are x==0: 1%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## Choosing the non-intercept containing model.
## DESeq2 step 1/5: Including a matrix of batch estimates in the deseq model.
## converting counts to integer mode
## DESeq2 step 2/5: Estimate size factors.
## DESeq2 step 3/5: Estimate dispersions.
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## Using a parametric fitting seems to have worked.
## DESeq2 step 4/5: nbinomWaldTest.
## The contrast normdlgn is not in the results.
## If this is not an extra contrast, then this is an error.
## The contrast normscn is not in the results.
## If this is not an extra contrast, then this is an error.
## The contrast kodlgn is not in the results.
## If this is not an extra contrast, then this is an error.
## The contrast koscn is not in the results.
## If this is not an extra contrast, then this is an error.
## The contrast koscn is not in the results.
## If this is not an extra contrast, then this is an error.
## The contrast normko_retdlgn is not in the results.
## If this is not an extra contrast, then this is an error.
## The contrast normko_retscn is not in the results.
## If this is not an extra contrast, then this is an error.
edger_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extras) mm_test <-
## Starting edgeR pairwise comparisons.
## The data should be suitable for EdgeR/DESeq/EBSeq. If they freak out, check the state of the count table and ensure that it is in integer counts.
## Extracting surrogate estimates from svaseq and adding them to the model.
## batch_counts: Before batch/surrogate estimation, 329826 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 3404 entries are x==0: 1%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## Choosing the non-intercept containing model.
## EdgeR step 1/9: Importing and normalizing data.
## EdgeR step 2/9: Estimating the common dispersion.
## EdgeR step 3/9: Estimating dispersion across genes.
## EdgeR step 4/9: Estimating GLM Common dispersion.
## EdgeR step 5/9: Estimating GLM Trended dispersion.
## EdgeR step 6/9: Estimating GLM Tagged dispersion.
## EdgeR step 7/9: Running glmFit, switch to glmQLFit by changing the argument 'edger_test'.
## EdgeR step 8/9: Making pairwise contrasts.
ebseq_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extras) mm_test <-
## The data should be suitable for EdgeR/DESeq/EBSeq. If they freak out, check the state of the count table and ensure that it is in integer counts.
## Starting EBSeq pairwise subset.
## Choosing the non-intercept containing model.
## Starting EBTest of het_dlgn vs. het_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. het_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. ko_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. ko_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. ko_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. wt_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_dlgn vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. het_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. ko_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. ko_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. ko_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. wt_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_retina vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_scn vs. ko_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_scn vs. ko_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_scn vs. ko_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_scn vs. wt_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_scn vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of het_scn vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_dlgn vs. ko_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_dlgn vs. ko_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_dlgn vs. wt_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_dlgn vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_dlgn vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_retina vs. ko_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_retina vs. wt_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_retina vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_retina vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_scn vs. wt_dlgn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_scn vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of ko_scn vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of wt_dlgn vs. wt_retina.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of wt_dlgn vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of wt_retina vs. wt_scn.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
sm(all_pairwise(mm_filt, model_batch="svaseq", parallel=FALSE, extra_contrasts=extras)) mm_de_hi <-
## Error in dimnames(x) <- dn: length of 'dimnames' [2] not equal to array extent
combine_de_tables(mm_de_hi, excel="excel/testing_202010.xlsx") mm_de_tables <-
## Error in combine_de_tables(mm_de_hi, excel = "excel/testing_202010.xlsx"): object 'mm_de_hi' not found
Until we get full replicates, I will do simple subtractions.
In an attempt to keep some clarity in the terms used, I want to define them now. There are three contexts in which we will consider the data:
The individual sample type. When considering individual samples, I will use three terms in this and only this context: wild-type (wt), het, and mut.
The individual translatome. These are defines as something / baseline. I will exclusively call the wt samples ‘baseline’ when speaking in this context. I will exclusively state ‘normal’ when referring to het / wt samples, and I will state ‘ko’ when referring to mut / wt samples in the translatome context.
Translatome vs. translatome. Whenever comparing translatomes, I will use the names as in #2 and always put the numerator first when writing the name of a comparison.
The most complex example of the above nomenclature is:
“normko_retdlgn is defined as normret_vs_normdlgn - koret_vs_kodlgn”
This states we are examining at the translatome context: (norm(retina translatome) - norm(dlgn translatome)) - (ko(retina translatome) - ko(dlgn translatome))
Which in turn is synonymous to the following at the sample context: ((rethet - retwt) - (dlgnhet - dlgnwt)) - ((retko - retwt) - (dlgnko - dlgnwt))
Now let us associate the various variable names with the appropriate samples:
"iprgc_01"
dlgnwt <- "iprgc_02"
retwt <- "iprgc_03"
scnwt <-
"iprgc_04"
dlgnhet <- "iprgc_05"
rethet <- NULL ## Does not yet exist.
scnhet <-
"iprgc_06"
dlgnmut <- "iprgc_07"
retmut <- "iprgc_08" scnmut <-
Give these variable names, now lets associate columns of the expression data with them. These are at the sample context, so the appropriate names are: ‘wt’, ‘het’, and ‘mut’. In each case I will prefix the genotype with the tissue type: ‘ret’, ‘dlgn’, and ‘scn’. Thus ‘retwt’ refers to the sample used to calculate the translatome retina baseline; in contrast ‘dlgnmut’ is the sample which provides the dlgn knockout.
## Sample context
mm_norm_sa
mm38_norm <- exprs(mm38_norm)[, dlgnwt]
dlgnwt <- exprs(mm38_norm)[, retwt]
retwt <- exprs(mm38_norm)[, scnwt]
scnwt <- exprs(mm38_norm)[, dlgnhet]
dlgnhet <- exprs(mm38_norm)[, rethet]
rethet <- exprs(mm38_norm)[, dlgnmut]
dlgnmut <- exprs(mm38_norm)[, retmut]
retmut <- exprs(mm38_norm)[, scnmut] scnmut <-
Each of the above 8 variables provides 1 column of information. We have 3 baseline comparisons available to us. In each of these we compare one wt sample to another.
## Baseline comparisons
dlgnwt - retwt
wt_dlgnret <- scnwt - retwt
wt_scnret <- dlgnwt - scnwt wt_dlgnscn <-
Simultaneously, we have 5 available translatomes. This are provided by comparing each het or mut to the associated wt. These will therefore receive names: ‘norm’ and ‘ko’ instead of ‘het’ and ‘mut’.
## Translatome context
rethet - retwt
normret <- retmut - retwt
koret <- scnmut - scnwt
koscn <- dlgnhet - dlgnwt
normdlgn <- dlgnmut - dlgnwt kodlgn <-
Given these translatomes, there are a few contrasts of likely interest. These are performed by comparing the relevant translatomes.
Will will split these into 4 separate categories: het vs het, ko vs ko, ko vs het, and ratio vs ratio.
Finally, note that we are being explicitly redundant in these definitions. I am making variable names for both the a/b ratio and the b/a ratio. Thus we have some redundantly redundant (haha) flexibility when deciding on what we want to plot.
## norm vs norm
normdlgn - normret
normdlgn_vs_normret <- normret - normdlgn normret_vs_normdlgn <-
## ko vs ko
koret - kodlgn
koret_vs_kodlgn <- kodlgn - koret
kodlgn_vs_koret <-
koret - koscn
koret_vs_koscn <- koscn - koret
koscn_vs_koret <-
kodlgn - koscn
kodlgn_vs_koscn <- koscn - kodlgn koscn_vs_kodlgn <-
On the other hand, I am assuming we always want the normals as denominators and kos as numerators.
## ko vs norm
koret - normret
koret_vs_normret <-
kodlgn - normdlgn kodlgn_vs_normdlgn <-
Finally, here is the ratio of ratios example I printed above:
I named it ‘normko_retdlgn’ in an attempt to make clear that it is actually: (normret/normdlgn)/(koret/kodlgn)
or stated differently: “norm divided by ko for ret divided by dlgn.”
## ratio of ratios
normret_vs_normdlgn - koret_vs_kodlgn normko_retdlgn <-
My matrix of data will now contain 1 column for each of the above 27 samples/comparisons.
cbind(
pair_mtrx <-## Individual samples
dlgnwt, retwt, scnwt, dlgnhet, rethet, dlgnmut, retmut, scnmut,## Baseline comparisons
wt_dlgnret, wt_scnret, wt_dlgnscn,## Baseline subtractions
normdlgn, normret, kodlgn, koret, koscn,## het_vs_het, of which there is only 1 because we do not have hetscn
normdlgn_vs_normret, normret_vs_normdlgn,## ko_vs_ko, of which we have 3
koret_vs_kodlgn, kodlgn_vs_koret,
koret_vs_koscn, koscn_vs_koret,
kodlgn_vs_koscn, koscn_vs_kodlgn,## ko_vs_het, 3 including one getting around missing hetscn
koret_vs_normret, kodlgn_vs_normdlgn,## ratio of ratios
normko_retdlgn)
## Baseline subtractions
normdlgn, normret, kodlgn, koret, koscn,## het_vs_het, of which there is only 1 because we do not have hetscn
normdlgn_vs_normret, normret_vs_normdlgn,## ko_vs_ko, of which we have 3
koret_vs_kodlgn, kodlgn_vs_koret,
koret_vs_koscn, koscn_vs_koret,
kodlgn_vs_koscn, koscn_vs_kodlgn,## ko_vs_het, 3 including one getting around missing hetscn
koret_vs_normret, kodlgn_vs_normdlgn,## ratio of ratios
)
normko_retdlgn)
## Error: <text>:20:11: unexpected ','
## 19: ## Baseline subtractions
## 20: normdlgn,
## ^
I am not sure if we will use these indexes, but I am writing these out as subsets of genes to look at. These indexes are stating that, given a cutoff (0), we want to look at only the genes which have higher x / baseline values than the cutoff.
## Queries about gene subsets.
## These are all in the context of translatomes.
0
cutoff <- normret > cutoff & koret > cutoff
ret_kept_idx <- koscn > cutoff
scn_kept_idx <- normdlgn > cutoff & kodlgn > cutoff
dlgn_kept_idx <- ret_kept_idx & dlgn_kept_idx
ret_dlgn_kept_idx <- ret_kept_idx & scn_kept_idx
ret_scn_kept_idx <- dlgn_kept_idx & scn_kept_idx
dlgn_scn_kept_idx <-
##normdlgn_vs_normret[!ret_dlgn_kept_idx] <- NA
##normret_vs_normdlgn[!ret_dlgn_kept_idx] <- NA
##koret_vs_kodlgn[!ret_dlgn_kept_idx] <- NA
##kodlgn_vs_koret[!ret_dlgn_kept_idx] <- NA
##koret_vs_koscn[!ret_scn_kept_idx] <- NA
##koscn_vs_koret[!ret_scn_kept_idx] <- NA
##kodlgn_vs_koscn[!dlgn_scn_kept_idx] <- NA
##koscn_vs_kodlgn[!dlgn_scn_kept_idx] <- NA
##koret_vs_normret[!ret_kept_idx] <- NA
##kodlgn_vs_normdlgn[!dlgn_kept_idx] <- NA
##normko_retdlgn <- normko_retdlgn[!ret_dlgn_kept_idx] <- NA
I will use my function combine_de_tables() to add this information to my existing annotation data along with the results from the statistically valid comparison of the three tissue types.
sm(combine_de_tables(
mm_tables <-extra_annot=pair_mtrx,
mm_de_sa, excel=glue::glue("excel/{rundate}mm_salmon_tables-v{ver}.xlsx")))
## Error in combine_de_tables(mm_de_sa, extra_annot = pair_mtrx, excel = glue::glue("excel/{rundate}mm_salmon_tables-v{ver}.xlsx")): object 'pair_mtrx' not found
## Put retina baseline on y axis as black, retina het on x axis as black.
## Then recolor a subset of these as red, the reds are when normret > 0
library(ggplot2)
as.data.frame(pair_mtrx[, c("rethet", "retwt")]) plotted <-
## Error in as.data.frame(pair_mtrx[, c("rethet", "retwt")]): object 'pair_mtrx' not found
normret > 0
red_idx <-"color"] <- ifelse(red_idx, "red", "black") plotted[,
## Error in plotted[, "color"] <- ifelse(red_idx, "red", "black"): object 'plotted' not found
"label"]] <- rownames(plotted) plotted[[
## Error in rownames(plotted): object 'plotted' not found
ggplot(
ret_hetwt <-
plotted,aes_string(x="rethet", y="retwt", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "rethet", y = "retwt", label = "label", : object 'plotted' not found
ret_hetwt
## Error in eval(expr, envir, enclos): object 'ret_hetwt' not found
ggplotly_url(
ret_hetwt_clicky <-"ret_hetwt.html", title="Retina expression, het vs. wt.",
ret_hetwt, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(ret_hetwt, "ret_hetwt.html", title = "Retina expression, het vs. wt.", : object 'ret_hetwt' not found
as.data.frame(pair_mtrx[, c("retmut", "retwt")]) plotted <-
## Error in as.data.frame(pair_mtrx[, c("retmut", "retwt")]): object 'pair_mtrx' not found
"label"]] <- rownames(plotted) plotted[[
## Error in rownames(plotted): object 'plotted' not found
"color"]] <- "black" plotted[[
## Error in plotted[["color"]] <- "black": object 'plotted' not found
ggplot(
ret_mutwt <-
plotted,aes_string(x="retmut", y="retwt", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "retmut", y = "retwt", label = "label", : object 'plotted' not found
ret_mutwt
## Error in eval(expr, envir, enclos): object 'ret_mutwt' not found
ggplotly_url(
ret_mutwt_clicky <-"ret_mutwt.html", title="Retina expression, mutant vs. wt.",
ret_mutwt, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(ret_mutwt, "ret_mutwt.html", title = "Retina expression, mutant vs. wt.", : object 'ret_mutwt' not found
as.data.frame(pair_mtrx[, c("dlgnhet", "dlgnwt")]) plotted <-
## Error in as.data.frame(pair_mtrx[, c("dlgnhet", "dlgnwt")]): object 'pair_mtrx' not found
"label"]] <- rownames(plotted) plotted[[
## Error in rownames(plotted): object 'plotted' not found
"color"]] <- "black" plotted[[
## Error in plotted[["color"]] <- "black": object 'plotted' not found
ggplot(
dlgn_hetwt <-
plotted,aes_string(x="dlgnhet", y="dlgnwt", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "dlgnhet", y = "dlgnwt", label = "label", : object 'plotted' not found
dlgn_hetwt
## Error in eval(expr, envir, enclos): object 'dlgn_hetwt' not found
ggplotly_url(
dlgn_hetwt_clicky <-"dlgn_hetwt.html", title="dlgn expression, het vs. wt.",
dlgn_hetwt, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(dlgn_hetwt, "dlgn_hetwt.html", title = "dlgn expression, het vs. wt.", : object 'dlgn_hetwt' not found
as.data.frame(pair_mtrx[, c("dlgnmut", "dlgnwt")]) plotted <-
## Error in as.data.frame(pair_mtrx[, c("dlgnmut", "dlgnwt")]): object 'pair_mtrx' not found
"label"]] <- rownames(plotted) plotted[[
## Error in rownames(plotted): object 'plotted' not found
"color"]] <- "black" plotted[[
## Error in plotted[["color"]] <- "black": object 'plotted' not found
ggplot(
dlgn_mutwt <-
plotted,aes_string(x="dlgnmut", y="dlgnwt", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "dlgnmut", y = "dlgnwt", label = "label", : object 'plotted' not found
dlgn_mutwt
## Error in eval(expr, envir, enclos): object 'dlgn_mutwt' not found
ggplotly_url(
dlgn_mutwt_clicky <-"dlgn_mutwt.html", title="dlgn expression, mut vs. wt.",
dlgn_mutwt, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(dlgn_mutwt, "dlgn_mutwt.html", title = "dlgn expression, mut vs. wt.", : object 'dlgn_mutwt' not found
as.data.frame(pair_mtrx[, c("scnmut", "scnwt")]) plotted <-
## Error in as.data.frame(pair_mtrx[, c("scnmut", "scnwt")]): object 'pair_mtrx' not found
"label"]] <- rownames(plotted) plotted[[
## Error in rownames(plotted): object 'plotted' not found
"color"]] <- "black" plotted[[
## Error in plotted[["color"]] <- "black": object 'plotted' not found
ggplot(
scn_mutwt <-
plotted,aes_string(x="scnmut", y="scnwt", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "scnmut", y = "scnwt", label = "label", : object 'plotted' not found
scn_mutwt
## Error in eval(expr, envir, enclos): object 'scn_mutwt' not found
ggplotly_url(
scn_mutwt_clicky <-"scn_mutwt.html", title="scn expression, mut vs. wt.",
scn_mutwt, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(scn_mutwt, "scn_mutwt.html", title = "scn expression, mut vs. wt.", : object 'scn_mutwt' not found
## x-axis: normdlgn_vs_normret or normret_vs_normdlgn,
## ^^^^
## y-axis: dlgnwt-retwt (baseline dlgn - baseline retina)
as.data.frame(pair_mtrx[, c("normdlgn_vs_normret", "wt_dlgnret")]) plotted <-
## Error in as.data.frame(pair_mtrx[, c("normdlgn_vs_normret", "wt_dlgnret")]): object 'pair_mtrx' not found
normret > 0
red_idx <-## Note that this order is opposite of above.
"color"] <- ifelse(red_idx, "black", "red") plotted[,
## Error in plotted[, "color"] <- ifelse(red_idx, "black", "red"): object 'plotted' not found
"label"]] <- rownames(plotted) plotted[[
## Error in rownames(plotted): object 'plotted' not found
ggplot(
axon_trans_ret_target <-
plotted,aes_string(x="normdlgn_vs_normret", y="wt_dlgnret", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "normdlgn_vs_normret", y = "wt_dlgnret", : object 'plotted' not found
axon_trans_ret_target
## Error in eval(expr, envir, enclos): object 'axon_trans_ret_target' not found
ggplotly_url(
axon_trans_ret_target_clicky <-"axon_trans_ret_target.html", title="Axon translatome, retina target.",
axon_trans_ret_target, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(axon_trans_ret_target, "axon_trans_ret_target.html", : object 'axon_trans_ret_target' not found
as.data.frame(pair_mtrx[, c("normret", "normdlgn")]) plotted <-
## Error in as.data.frame(pair_mtrx[, c("normret", "normdlgn")]): object 'pair_mtrx' not found
"label"]] <- rownames(plotted) plotted[[
## Error in rownames(plotted): object 'plotted' not found
"color"]] <- "black" plotted[[
## Error in plotted[["color"]] <- "black": object 'plotted' not found
ggplot(
normret_normdlgn <-
plotted,aes_string(x="normret", y="normdlgn", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "normret", y = "normdlgn", label = "label", : object 'plotted' not found
normret_normdlgn
## Error in eval(expr, envir, enclos): object 'normret_normdlgn' not found
ggplotly_url(
normret_normdlgn_clicky <-"normret_normdlgn.html", title="Normal retina translatome vs normal dlgn translatome.",
normret_normdlgn, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(normret_normdlgn, "normret_normdlgn.html", title = "Normal retina translatome vs normal dlgn translatome.", : object 'normret_normdlgn' not found
as.data.frame(pair_mtrx[, c("koret", "kodlgn")])
plotted <-"label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
plotted[[ ggplot(
koret_kodlgn <-
plotted,aes_string(x="koret", y="kodlgn", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
koret_kodlgn ggplotly_url(
koret_kodlgn_clicky <-"koret_kodlgn.html", title="KO retina translatome vs KO dlgn translatome.",
koret_kodlgn, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
``
## KO Retina vs KO SCN
## Error: attempt to use zero-length variable name
as.data.frame(pair_mtrx[, c("koret", "koscn")]) plotted <-
## Error in as.data.frame(pair_mtrx[, c("koret", "koscn")]): object 'pair_mtrx' not found
"label"]] <- rownames(plotted) plotted[[
## Error in rownames(plotted): object 'plotted' not found
"color"]] <- "black" plotted[[
## Error in plotted[["color"]] <- "black": object 'plotted' not found
ggplot(
koret_koscn <-
plotted,aes_string(x="koret", y="koscn", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "koret", y = "koscn", label = "label", : object 'plotted' not found
koret_koscn
## Error in eval(expr, envir, enclos): object 'koret_koscn' not found
ggplotly_url(
koret_koscn_clicky <-"koret_koscn.html", title="KO retina translatome vs KO scn translatome.",
koret_koscn, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(koret_koscn, "koret_koscn.html", title = "KO retina translatome vs KO scn translatome.", : object 'koret_koscn' not found
as.data.frame(pair_mtrx[, c("normdlgn", "kodlgn")]) plotted <-
## Error in as.data.frame(pair_mtrx[, c("normdlgn", "kodlgn")]): object 'pair_mtrx' not found
"label"]] <- rownames(plotted) plotted[[
## Error in rownames(plotted): object 'plotted' not found
"color"]] <- "black" plotted[[
## Error in plotted[["color"]] <- "black": object 'plotted' not found
ggplot(
normdlgn_kodlgn <-
plotted,aes_string(x="normdlgn", y="kodlgn", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "normdlgn", y = "kodlgn", label = "label", : object 'plotted' not found
normdlgn_kodlgn
## Error in eval(expr, envir, enclos): object 'normdlgn_kodlgn' not found
ggplotly_url(
normdlgn_kodlgn_clicky <-"normdlgn_kodlgn.html", title="Normal dlgn translatome vs KO dlgn translatome.",
normdlgn_kodlgn, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(normdlgn_kodlgn, "normdlgn_kodlgn.html", title = "Normal dlgn translatome vs KO dlgn translatome.", : object 'normdlgn_kodlgn' not found
as.data.frame(pair_mtrx[, c("normret", "koret")]) plotted <-
## Error in as.data.frame(pair_mtrx[, c("normret", "koret")]): object 'pair_mtrx' not found
"label"]] <- rownames(plotted) plotted[[
## Error in rownames(plotted): object 'plotted' not found
"color"]] <- "black" plotted[[
## Error in plotted[["color"]] <- "black": object 'plotted' not found
ggplot(
normret_koret <-
plotted,aes_string(x="normret", y="koret", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "normret", y = "koret", label = "label", : object 'plotted' not found
normret_koret
## Error in eval(expr, envir, enclos): object 'normret_koret' not found
ggplotly_url(
normret_koret_clicky <-"normret_koret.html", title="Normal retina translatome vs KO retina translatome.",
normret_koret, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(normret_koret, "normret_koret.html", title = "Normal retina translatome vs KO retina translatome.", : object 'normret_koret' not found
as.data.frame(pair_mtrx[, c("normret_vs_normdlgn", "koret_vs_kodlgn")]) plotted <-
## Error in as.data.frame(pair_mtrx[, c("normret_vs_normdlgn", "koret_vs_kodlgn")]): object 'pair_mtrx' not found
"label"]] <- rownames(plotted) plotted[[
## Error in rownames(plotted): object 'plotted' not found
"color"]] <- "black" plotted[[
## Error in plotted[["color"]] <- "black": object 'plotted' not found
ggplot(
normal_ko_axon_translatome <-
plotted,aes_string(x="normret_vs_normdlgn", y="koret_vs_kodlgn", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
## Error in ggplot(plotted, aes_string(x = "normret_vs_normdlgn", y = "koret_vs_kodlgn", : object 'plotted' not found
normal_ko_axon_translatome
## Error in eval(expr, envir, enclos): object 'normal_ko_axon_translatome' not found
ggplotly_url(
normal_ko_axon_translatome_clicky <-"normal_ko_axon_translatome.html",
normal_ko_axon_translatome, title="Normal retina ko axon translatome.",
url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## Error in ggplotly_url(normal_ko_axon_translatome, "normal_ko_axon_translatome.html", : object 'normal_ko_axon_translatome' not found
function(pair_mtrx, x_axis="koret", y_axis="koscn", lfc=NULL, fc=NULL, linewidth=1.5,
translatome_plotter <-up_color="red", down_color="#098534", line_color="#fcba03", alpha=0.5,
x_limit=c(-7, 7), y_limit=c(-7, 7)) {
if (is.null(fc) & is.null(lfc)) {
message("No fc/lfc was provided, defaulting to 10 fold.")
log2(10)
lfc <-else if (is.null(lfc)) {
} log2(fc)
lfc <-
} as.data.frame(pair_mtrx[, c(x_axis, y_axis)])
plotted <- is.na(plotted)
na_idx <- 0
plotted[na_idx] <- plotted[, y_axis] - plotted[, x_axis] >= lfc
up_idx <- plotted[, y_axis] - plotted[, x_axis] <= (lfc * -1)
down_idx <- rownames(plotted)[up_idx]
up_genes <- rownames(plotted)[down_idx]
down_genes <-## Note that this order is opposite of above.
"color"]] <- "black"
plotted[["color"] <- up_color
plotted[up_idx, "color"] <- down_color
plotted[down_idx, "color"]] <- as.factor(plotted[["color"]])
plotted[[levels(plotted[["color"]]) <- c("black", up_color, down_color)
ggplot2::ggplot(plotted, aes_string(x=x_axis,
plt <-y=y_axis,
color="color")) +
geom_abline(size=1.1, slope=1, intercept=lfc, color="orange") +
geom_abline(size=1.1, slope=1, intercept=(-1 * lfc), color="orange") +
scale_x_continuous(limits=c(x_limit)) +
scale_y_continuous(limits=c(y_limit)) +
geom_point(alpha=alpha) +
scale_color_manual(values=c(down_color, "black", up_color))
list(
retlist <-"mtrx" = plotted,
"ups" = up_genes,
"downs" = down_genes,
"plot" = plt)
return(retlist)
}
First plot: KO scn translatome on y axis vs. KO retina translatome on x axis.
translatome_plotter(pair_mtrx) scnko_wrt_retko_translatome <-
## No fc/lfc was provided, defaulting to 10 fold.
## Error in as.data.frame(pair_mtrx[, c(x_axis, y_axis)]): object 'pair_mtrx' not found
$plot scnko_wrt_retko_translatome
## Error in eval(expr, envir, enclos): object 'scnko_wrt_retko_translatome' not found
simple_gprofiler(sig_genes=scnko_wrt_retko_translatome$ups,
scnko_wrt_retko_up_go <-species="mmusculus")
## Error in simple_gprofiler(sig_genes = scnko_wrt_retko_translatome$ups, : object 'scnko_wrt_retko_translatome' not found
simple_gprofiler(sig_genes=scnko_wrt_retko_translatome$downs,
scnko_wrt_retko_down_go <-species="mmusculus")
## Error in simple_gprofiler(sig_genes = scnko_wrt_retko_translatome$downs, : object 'scnko_wrt_retko_translatome' not found
$go scnko_wrt_retko_down_go
## Error in eval(expr, envir, enclos): object 'scnko_wrt_retko_down_go' not found
$kegg scnko_wrt_retko_down_go
## Error in eval(expr, envir, enclos): object 'scnko_wrt_retko_down_go' not found
$corum scnko_wrt_retko_down_go
## Error in eval(expr, envir, enclos): object 'scnko_wrt_retko_down_go' not found
translatome_plotter(pair_mtrx,
dlgnnorm_wrt_retnorm_translatome <-x_axis="normret", y_axis="normdlgn")
## No fc/lfc was provided, defaulting to 10 fold.
## Error in as.data.frame(pair_mtrx[, c(x_axis, y_axis)]): object 'pair_mtrx' not found
$plot dlgnnorm_wrt_retnorm_translatome
## Error in eval(expr, envir, enclos): object 'dlgnnorm_wrt_retnorm_translatome' not found
simple_gprofiler(sig_genes=dlgnnorm_wrt_retnorm_translatome$ups,
dlgnnorm_wrt_retnorm_up_go <-species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnnorm_wrt_retnorm_translatome$ups, : object 'dlgnnorm_wrt_retnorm_translatome' not found
simple_gprofiler(sig_genes=dlgnnorm_wrt_retnorm_translatome$downs,
dlgnnorm_wrt_retnorm_down_go <-species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnnorm_wrt_retnorm_translatome$downs, : object 'dlgnnorm_wrt_retnorm_translatome' not found
$go dlgnnorm_wrt_retnorm_down_go
## Error in eval(expr, envir, enclos): object 'dlgnnorm_wrt_retnorm_down_go' not found
$pvalue_plots$bpp_plot_over dlgnnorm_wrt_retnorm_down_go
## Error in eval(expr, envir, enclos): object 'dlgnnorm_wrt_retnorm_down_go' not found
$pvalue_plots$ccp_plot_over dlgnnorm_wrt_retnorm_down_go
## Error in eval(expr, envir, enclos): object 'dlgnnorm_wrt_retnorm_down_go' not found
translatome_plotter(pair_mtrx,
dlgnko_wrt_retko_translatome <-x_axis="koret", y_axis="kodlgn")
## No fc/lfc was provided, defaulting to 10 fold.
## Error in as.data.frame(pair_mtrx[, c(x_axis, y_axis)]): object 'pair_mtrx' not found
$plot dlgnko_wrt_retko_translatome
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_translatome' not found
simple_gprofiler(sig_genes=dlgnko_wrt_retko_translatome$ups,
dlgnko_wrt_retko_up_go <-species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnko_wrt_retko_translatome$ups, : object 'dlgnko_wrt_retko_translatome' not found
$go dlgnko_wrt_retko_up_go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_up_go' not found
$kegg dlgnko_wrt_retko_up_go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_up_go' not found
$reac dlgnko_wrt_retko_up_go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_up_go' not found
simple_gprofiler(sig_genes=dlgnko_wrt_retko_translatome$downs,
dlgnko_wrt_retko_down_go <-species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnko_wrt_retko_translatome$downs, : object 'dlgnko_wrt_retko_translatome' not found
$go dlgnko_wrt_retko_down_go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_down_go' not found
$kegg dlgnko_wrt_retko_down_go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_down_go' not found
$reac dlgnko_wrt_retko_down_go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_retko_down_go' not found
translatome_plotter(pair_mtrx,
dlgnnorm_wrt_dlgnko_translatome <-x_axis="normdlgn",
y_axis="kodlgn")
## No fc/lfc was provided, defaulting to 10 fold.
## Error in as.data.frame(pair_mtrx[, c(x_axis, y_axis)]): object 'pair_mtrx' not found
$plot dlgnnorm_wrt_dlgnko_translatome
## Error in eval(expr, envir, enclos): object 'dlgnnorm_wrt_dlgnko_translatome' not found
simple_gprofiler(sig_genes=dlgnnorm_wrt_dlgnko_translatome$ups,
dlgnnorm_wrt_dlgnko_up_go <-species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnnorm_wrt_dlgnko_translatome$ups, : object 'dlgnnorm_wrt_dlgnko_translatome' not found
simple_gprofiler(sig_genes=dlgnnorm_wrt_dlgnko_translatome$downs,
dlgnnorm_wrt_dlgnko_down_go <-species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnnorm_wrt_dlgnko_translatome$downs, : object 'dlgnnorm_wrt_dlgnko_translatome' not found
$go dlgnnorm_wrt_dlgnko_down_go
## Error in eval(expr, envir, enclos): object 'dlgnnorm_wrt_dlgnko_down_go' not found
translatome_plotter(pair_mtrx,
dlgnko_wrt_scnko_translatome <-x_axis="kodlgn",
y_axis="koscn")
## No fc/lfc was provided, defaulting to 10 fold.
## Error in as.data.frame(pair_mtrx[, c(x_axis, y_axis)]): object 'pair_mtrx' not found
$plot dlgnko_wrt_scnko_translatome
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_scnko_translatome' not found
simple_gprofiler(sig_genes=dlgnko_wrt_scnko_translatome$ups,
dlgnko_wrt_scnko_up_go <-species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnko_wrt_scnko_translatome$ups, : object 'dlgnko_wrt_scnko_translatome' not found
$go dlgnko_wrt_scnko_up_go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_scnko_up_go' not found
$pvalue_plots$bpp_plot_over dlgnko_wrt_scnko_up_go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_scnko_up_go' not found
simple_gprofiler(sig_genes=dlgnko_wrt_scnko_translatome$downs,
dlgnko_wrt_scnko_down_go <-species="mmusculus")
## Error in simple_gprofiler(sig_genes = dlgnko_wrt_scnko_translatome$downs, : object 'dlgnko_wrt_scnko_translatome' not found
$go dlgnko_wrt_scnko_down_go
## Error in eval(expr, envir, enclos): object 'dlgnko_wrt_scnko_down_go' not found
As I understand it, there is some interest in an ontology search using the ratio of ratios.
normko_retdlgn
ror <- ror >= 1
up_idx <- ror <= -1
down_idx <- ror[up_idx]
ror_up <-length(ror_up)
## [1] 1877
ror[down_idx]
ror_down <-length(ror_down)
## [1] 476
simple_gprofiler(
ror_gprofiler_up <-sig_genes=ror_up, species="mmusculus",
excel=glue::glue("excel/{rundate}mm_ror_gpfoiler_up-v{ver}.xlsx"))
## Performing gProfiler GO search of 1877 genes against mmusculus.
## GO search found 271 hits.
## Performing gProfiler KEGG search of 1877 genes against mmusculus.
## KEGG search found 8 hits.
## Performing gProfiler REAC search of 1877 genes against mmusculus.
## REAC search found 11 hits.
## Performing gProfiler MI search of 1877 genes against mmusculus.
## MI search found 0 hits.
## Performing gProfiler TF search of 1877 genes against mmusculus.
## TF search found 376 hits.
## Performing gProfiler CORUM search of 1877 genes against mmusculus.
## CORUM search found 1 hits.
## Performing gProfiler HP search of 1877 genes against mmusculus.
## HP search found 5 hits.
## Writing data to: excel/20201008mm_ror_gpfoiler_up-v20200114.xlsx.
## Finished writing data.
$pvalue_plots$mfp_plot_over ror_gprofiler_up
$pvalue_plots$bpp_plot_over ror_gprofiler_up
$pvalue_plots$ccp_plot_over ror_gprofiler_up
$pvalue_plots$tf_plot_over ror_gprofiler_up
## Warning: Removed 1 rows containing missing values (geom_col).
$pvalue_plots$hp_plot_over ror_gprofiler_up
simple_gprofiler(
ror_gprofiler_down <-sig_genes=ror_down, species="mmusculus",
excel=glue::glue("excel/{rundate}mm_ror_gpfoiler_down-v{ver}.xlsx"))
## Performing gProfiler GO search of 476 genes against mmusculus.
## GO search found 82 hits.
## Performing gProfiler KEGG search of 476 genes against mmusculus.
## KEGG search found 7 hits.
## Performing gProfiler REAC search of 476 genes against mmusculus.
## REAC search found 2 hits.
## Performing gProfiler MI search of 476 genes against mmusculus.
## MI search found 0 hits.
## Performing gProfiler TF search of 476 genes against mmusculus.
## TF search found 6 hits.
## Performing gProfiler CORUM search of 476 genes against mmusculus.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 476 genes against mmusculus.
## HP search found 0 hits.
## Writing data to: excel/20201008mm_ror_gpfoiler_down-v20200114.xlsx.
## Finished writing data.
$pvalue_plots$mfp_plot_over ror_gprofiler_down
$pvalue_plots$bpp_plot_over ror_gprofiler_down
$pvalue_plots$reactome_plot_over ror_gprofiler_down
$pvalue_plots$ccp_plot_over ror_gprofiler_down
$pvalue_plots$tf_plot_over ror_gprofiler_down
::pander(sessionInfo())
pandermessage(paste0("This is hpgltools commit: ", get_git_commit()))
paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
this_save <-message(paste0("Saving to ", this_save))
sm(saveme(filename=this_save)) tmp <-
loadme(filename=this_save)