This will be a very minimal analysis until we get some replicates.
I am using mm38_100.
## My load_biomart_annotations() function defaults to human, so that will be quick.
load_biomart_annotations(species="mmusculus") mm_annot <-
## The biomart annotations file already exists, loading from it.
mm_annot[["annotation"]]
mm_annot <-"txid"]] <- paste0(mm_annot[["ensembl_transcript_id"]], ".", mm_annot[["version"]])
mm_annot[[rownames(mm_annot) <- make.names(mm_annot[["ensembl_gene_id"]], unique=TRUE)
mm_annot[, c("txid", "ensembl_gene_id")] tx_gene_map <-
So, I now have a table of mouse annotations.
I am going to write a quick sample sheet in the current working directory called ‘all_samples.xlsx’ and put the names of the count tables in it.
Here I combine the metadata, count data, and annotations.
It is worth noting that the gene IDs from htseq-count probably do not match the annotations retrieved because they are likely exon-based rather than gene based. This is not really a problem, but don’t forget it!
mm_annot
hisat_annot <-rownames(hisat_annot) <- paste0("gene:", rownames(hisat_annot))
create_expt("sample_sheets/all_samples.xlsx",
mm38_hisat <-gene_info=hisat_annot)
## Reading the sample metadata.
## Dropped 1 rows from the sample metadata because they were blank.
## The sample definitions comprises: 23 rows(samples) and 24 columns(metadata fields).
## Reading count tables.
## Reading count files with read.table().
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mmusculus_iprgc_2019/preprocessing/iprgc_01/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows.
## preprocessing/iprgc_02/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/iprgc_03/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/iprgc_04/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/iprgc_05/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/iprgc_06/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/iprgc_07/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/iprgc_08/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/01_wt_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/02_wt_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/03_wt_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/04_ko_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/05_ko_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/06_ko_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/07_het1_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/08_het2_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/09_het3_retina/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/10_het1_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/11_het2_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/12_het3_scn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/13_het1_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/14_het2_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## preprocessing/202009/15_het3_dlgn/outputs/hisat2_mm38_100/r1_ca.count_mm38_100_sno_gene_ID.count.xz contains 25765 rows and merges to 25765 rows.
## Finished reading count data.
## Matched 25419 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 25760 rows and 23 columns.
plot_libsize(mm38_hisat)$plot
normalize_expt(mm38_hisat, filter=TRUE, convert="cpm", norm="quant", transform="log2") mm38_first <-
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 11159 low-count genes (14601 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 20 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
pp(file="pre_pca.png", image=plot_pca(mm38_first)$plot)
## Writing the image to: pre_pca.png and calling dev.off().
normalize_expt(mm38_hisat, filter=TRUE, convert="cpm", batch="svaseq") mm38_norm <-
## This function will replace the expt$expressionset slot with:
## svaseq(cpm(cbcb(data)))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 11159 low-count genes (14601 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: not transforming the data.
## Step 5: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 309316 entries are x>1: 92%.
## batch_counts: Before batch/surrogate estimation, 2857 entries are x==0: 1%.
## batch_counts: Before batch/surrogate estimation, 23650 entries are 0<x<1: 7%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 2108 (1%) elements which are < 0 after batch correction.
## Setting low elements to zero.
normalize_expt(mm38_norm, transform="log2") mm38_norm <-
## This function will replace the expt$expressionset slot with:
## log2(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 2108 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
pp(file="post_pca.png", image=plot_pca(mm38_norm)$plot)
## Writing the image to: post_pca.png and calling dev.off().
create_expt("sample_sheets/all_samples.xlsx", tx_gene_map=tx_gene_map,
mm38_salmon <-gene_info=mm_annot, file_column="salmonfile")
## Reading the sample metadata.
## Dropped 1 rows from the sample metadata because they were blank.
## The sample definitions comprises: 23 rows(samples) and 24 columns(metadata fields).
## Reading count tables.
## Using the transcript to gene mapping.
## Reading salmon data with tximport.
## Finished reading count data.
## Matched 6431 annotations and counts.
## Bringing together the count matrix and gene information.
## The mapped IDs are not the rownames of your gene information, changing them now.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 6431 rows and 23 columns.
mm_annot
mmtx_annot <-rownames(mmtx_annot) <- mm_annot[["txid"]]
sm(create_expt("sample_sheets/all_samples.xlsx",
mm38_saltx <-gene_info=mmtx_annot, file_column="salmonfile"))
In this block I will calculate all the diagnostic plots, but not show them. I will show them next with a little annotation.
I will leave the output for the first of each invocation and silence it for the second.
sm(graph_metrics(mm38_salmon)) mm38_plots_sa <-
normalize_expt(mm38_salmon, norm="quant", convert="cpm",
mm38_norm_sa <-transform="log2", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 2223 low-count genes (4208 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 11131 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
graph_metrics(mm38_norm_sa) mm38n_plots_sa <-
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## Plotting a CV plot.
## Naively calculating coefficient of variation/dispersion with respect to condition.
## Finished calculating dispersion estimates.
## Plotting the representation of the top-n genes.
## Plotting the expression of the top-n PC loaded genes.
## Printing a color to condition legend.
$legend mm38_plots_sa
$libsize mm38_plots_sa
$nonzero mm38_plots_sa
$density mm38n_plots_sa
$pc_plot mm38n_plots_sa
sm(graph_metrics(mm38_hisat)) mm38_plots_hi <-
normalize_expt(mm38_hisat, norm="quant", convert="cpm",
mm38_norm_hi <-transform="log2", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 11159 low-count genes (14601 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 20 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
sm(graph_metrics(mm38_norm_hi)) mm38n_plots_hi <-
normalize_expt(mm38_hisat, filter=TRUE) mm38_normbatch_hi <-
## This function will replace the expt$expressionset slot with:
## cbcb(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 11159 low-count genes (14601 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
normalize_expt(mm38_normbatch_hi, norm="quant", batch="svaseq") mm38_normbatch_hi <-
## This function will replace the expt$expressionset slot with:
## svaseq(quant(data))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Warning in normalize_expt(mm38_normbatch_hi, norm = "quant", batch = "svaseq"):
## Quantile normalization and sva do not always play well together.
## Step 1: not doing count filtering.
## Step 2: normalizing the data with quant.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 332778 entries are x>1: 99%.
## batch_counts: Before batch/surrogate estimation, 20 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 3025 entries are 0<x<1: 1%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 517 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
normalize_expt(mm38_normbatch_hi, transform="log2") mm38_normbatch_hi <-
## This function will replace the expt$expressionset slot with:
## log2(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 517 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
$libsize mm38_plots_hi
$nonzero mm38_plots_hi
$density mm38n_plots_hi
$pc_plot mm38n_plots_hi
The only interesting DE I see in this is to compare the retinas to the dlgns. I can treat them as replicates and compare.
These differential expression analyses are EXPLICITLY NOT what you care about. However, they are useful for two purposes:
When we receive full replicate sets, this cheater method of encapsulating the data will not longer be required.
set_expt_conditions(mm38_salmon, fact="celltype")
mm_sa <- sm(normalize_expt(mm_sa, convert="rpkm", transform="log2", column="cds_length"))
mm_norm_sa <-plot_pca(mm_norm_sa)$plot
all_pairwise(mm_sa, model_batch=FALSE) mm_de_sa <-
list(
normal_keepers <-"wt_dlgnret" = c("wt_dlgn", "wt_retina"),
"wt_scnret" = c("wt_scn", "wt_retina"),
"wt_dlgnscn" = c("wt_dlgn", "wt_scn"),
"het_dlgnret" = c("het_dlgn", "het_retina"),
"het_scnret" = c("het_scn", "het_retina"),
"het_dlgnscn" = c("het_dlgn", "het_scn"),
"ko_dlgnret" = c("ko_dlgn", "ko_retina"),
"ko_scnret" = c("ko_scn", "ko_retina"),
"ko_dlgnscn" = c("ko_dlgn", "ko_scn"),
"normret" = c("het_retina", "wt_retina"),
"koret" = c("ko_retina", "wt_retina"),
"normscn" = c("het_scn", "wt_scn"),
"koscn" = c("ko_scn", "wt_scn"),
"normdlgn" = c("het_dlgn", "wt_dlgn"),
"kodlgn" = c("ko_dlgn", "wt_dlgn")
)
list(
all_keepers <-"wt_dlgnret" = c("wt_dlgn", "wt_retina"),
"wt_scnret" = c("wt_scn", "wt_retina"),
"wt_dlgnscn" = c("wt_dlgn", "wt_scn"),
"het_dlgnret" = c("het_dlgn", "het_retina"),
"het_scnret" = c("het_scn", "het_retina"),
"het_dlgnscn" = c("het_dlgn", "het_scn"),
"ko_dlgnret" = c("ko_dlgn", "ko_retina"),
"ko_scnret" = c("ko_scn", "ko_retina"),
"ko_dlgnscn" = c("ko_dlgn", "ko_scn"),
"normret" = c("het_retina", "wt_retina"),
"koret" = c("ko_retina", "wt_retina"),
"normscn" = c("het_scn", "wt_scn"),
"koscn" = c("ko_scn", "wt_scn"),
"normdlgn" = c("het_dlgn", "wt_dlgn"),
"kodlgn" = c("ko_dlgn", "wt_dlgn"),
"normdlgn_vs_normret" = c("normdlgn", "normret"),
"normscn_vs_normret" = c("normscn", "normret"),
"kodlgn_vs_koret" = c("kodlgn", "koret"),
"koscn_vs_koret" = c("koscn", "koret"),
"koscn_vs_kodlgn" = c("koscn", "kodlgn"),
"koret_vs_normret" = c("ko_retina", "het_retina"),
"koscn_vs_normscn" = c("ko_scn", "het_scn"),
"kodlgn_vs_normdlgn" = c("ko_dlgn", "het_dlgn"),
"normko_retdlgn" = c("normdlgn_vs_normret", "kodlgn_vs_koret"),
"normko_retscn" = c("normscn_vs_normret", "koscn_vs_koret"))
"normdlgn_vs_normret = (het_dlgn-wt_dlgn)-(het_retina-wt_retina),
extra_contrasts <- normscn_vs_normret = (het_scn-wt_scn)-(het_retina-wt_retina),
kodlgn_vs_koret = (ko_dlgn-wt_dlgn)-(ko_retina-wt_retina),
koscn_vs_koret = (ko_scn-wt_scn)-(ko_retina-wt_retina),
koscn_vs_kodlgn = (ko_scn-wt_scn)-(ko_dlgn-wt_dlgn),
normko_vs_retdlgn = ((het_dlgn-wt_dlgn)-(het_retina-wt_retina)) - ((ko_dlgn-wt_dlgn)-(ko_retina-wt_retina)),
normko_vs_retscn = ((het_scn-wt_scn)-(het_retina-wt_retina)) - ((ko_scn-wt_scn)-(ko_retina-wt_retina))"
normalize_expt(mm38_hisat, filter=TRUE) mm_filt <-
## This function will replace the expt$expressionset slot with:
## cbcb(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 11159 low-count genes (14601 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
sm(limma_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extra_contrasts)) mm_limma <-
deseq_pairwise(mm_filt, model_batch="svaseq") mm_deseq <-
## Hey you, use deseq2 pairwise.
## Starting DESeq2 pairwise comparisons.
## The data should be suitable for EdgeR/DESeq/EBSeq. If they freak out, check the state of the count table and ensure that it is in integer counts.
## Extracting surrogate estimates from svaseq and adding them to the model.
## batch_counts: Before batch/surrogate estimation, 331257 entries are x>1: 99%.
## batch_counts: Before batch/surrogate estimation, 2857 entries are x==0: 1%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## Choosing the non-intercept containing model.
## DESeq2 step 1/5: Including a matrix of batch estimates in the deseq model.
## converting counts to integer mode
## DESeq2 step 2/5: Estimate size factors.
## DESeq2 step 3/5: Estimate dispersions.
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## Using a parametric fitting seems to have worked.
## DESeq2 step 4/5: nbinomWaldTest.
sm(edger_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extra_contrasts)) mm_edger <-
## mm_basic <- sm(basic_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extra_contrasts))
## mm_ebseq <- ebseq_pairwise(mm_filt, model_batch="svaseq", extra_contrasts=extra_constrast)
write_expt(mm_filt, excel="excel/raw_data_and_metrics_20201030.xlsx") mm_write <-
## Writing the first sheet, containing a legend and some summary data.
## Writing the raw reads.
## Graphing the raw reads.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete
## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete
## Attempting mixed linear model with: ~ condition + batch
## Fitting the expressionset to the model, this is slow.
## Loading required package: Matrix
##
## Total:82 s
## Placing factor: condition at the beginning of the model.
## Writing the normalized reads.
## Graphing the normalized reads.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete
## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete
## Attempting mixed linear model with: ~ condition + batch
## Fitting the expressionset to the model, this is slow.
##
## Total:84 s
## Placing factor: condition at the beginning of the model.
## Writing the median reads by factor.
all_pairwise(mm_filt, model_batch="svaseq", parallel=FALSE, do_ebseq=FALSE) mm_de_normal <-
## batch_counts: Before batch/surrogate estimation, 331257 entries are x>1: 99%.
## batch_counts: Before batch/surrogate estimation, 2857 entries are x==0: 1%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (14601 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 2857 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 309316 entries are x>1: 92%.
## batch_counts: Before batch/surrogate estimation, 2857 entries are x==0: 1%.
## batch_counts: Before batch/surrogate estimation, 23650 entries are 0<x<1: 7%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 2108 (1%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Not putting labels on the PC plot.
## Starting basic_pairwise().
## Starting basic pairwise comparison.
## Basic step 0/3: Normalizing data.
## Basic step 0/3: Converting data.
## Basic step 0/3: Transforming data.
## Basic step 1/3: Creating mean and variance tables.
## Basic step 2/3: Performing 45 comparisons.
## Basic step 3/3: Creating faux DE Tables.
## Basic: Returning tables.
## Starting deseq_pairwise().
## Starting DESeq2 pairwise comparisons.
## The data should be suitable for EdgeR/DESeq/EBSeq. If they freak out, check the state of the count table and ensure that it is in integer counts.
## Including batch estimates from sva/ruv/pca in the model.
## Choosing the non-intercept containing model.
## DESeq2 step 1/5: Including a matrix of batch estimates in the deseq model.
## converting counts to integer mode
## DESeq2 step 2/5: Estimate size factors.
## DESeq2 step 3/5: Estimate dispersions.
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## Using a parametric fitting seems to have worked.
## DESeq2 step 4/5: nbinomWaldTest.
## Starting edger_pairwise().
## Starting edgeR pairwise comparisons.
## The data should be suitable for EdgeR/DESeq/EBSeq. If they freak out, check the state of the count table and ensure that it is in integer counts.
## Including batch estimates from sva/ruv/pca in the model.
## Choosing the non-intercept containing model.
## EdgeR step 1/9: Importing and normalizing data.
## EdgeR step 2/9: Estimating the common dispersion.
## EdgeR step 3/9: Estimating dispersion across genes.
## EdgeR step 4/9: Estimating GLM Common dispersion.
## EdgeR step 5/9: Estimating GLM Trended dispersion.
## EdgeR step 6/9: Estimating GLM Tagged dispersion.
## EdgeR step 7/9: Running glmFit, switch to glmQLFit by changing the argument 'edger_test'.
## EdgeR step 8/9: Making pairwise contrasts.
## Starting limma_pairwise().
## Starting limma pairwise comparison.
## libsize was not specified, this parameter has profound effects on limma's result.
## Using the libsize from expt$best_libsize.
## Limma step 1/6: choosing model.
## Including batch estimates from sva/ruv/pca in the model.
## Choosing the non-intercept containing model.
## Limma step 2/6: running limma::voom(), switch with the argument 'which_voom'.
## Using normalize.method=quantile for voom.
## Limma step 3/6: running lmFit with method: ls.
## Limma step 4/6: making and fitting contrasts with no intercept. (~ 0 + factors)
## Limma step 5/6: Running eBayes with robust=FALSE and trend=FALSE.
## Limma step 6/6: Writing limma outputs.
## Limma step 6/6: 1/36: Creating table: het_retina_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 2/36: Creating table: het_scn_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 3/36: Creating table: ko_dlgn_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 4/36: Creating table: ko_retina_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 5/36: Creating table: ko_scn_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 6/36: Creating table: wt_dlgn_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 7/36: Creating table: wt_retina_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 8/36: Creating table: wt_scn_vs_het_dlgn. Adjust=BH
## Limma step 6/6: 9/36: Creating table: het_scn_vs_het_retina. Adjust=BH
## Limma step 6/6: 10/36: Creating table: ko_dlgn_vs_het_retina. Adjust=BH
## Limma step 6/6: 11/36: Creating table: ko_retina_vs_het_retina. Adjust=BH
## Limma step 6/6: 12/36: Creating table: ko_scn_vs_het_retina. Adjust=BH
## Limma step 6/6: 13/36: Creating table: wt_dlgn_vs_het_retina. Adjust=BH
## Limma step 6/6: 14/36: Creating table: wt_retina_vs_het_retina. Adjust=BH
## Limma step 6/6: 15/36: Creating table: wt_scn_vs_het_retina. Adjust=BH
## Limma step 6/6: 16/36: Creating table: ko_dlgn_vs_het_scn. Adjust=BH
## Limma step 6/6: 17/36: Creating table: ko_retina_vs_het_scn. Adjust=BH
## Limma step 6/6: 18/36: Creating table: ko_scn_vs_het_scn. Adjust=BH
## Limma step 6/6: 19/36: Creating table: wt_dlgn_vs_het_scn. Adjust=BH
## Limma step 6/6: 20/36: Creating table: wt_retina_vs_het_scn. Adjust=BH
## Limma step 6/6: 21/36: Creating table: wt_scn_vs_het_scn. Adjust=BH
## Limma step 6/6: 22/36: Creating table: ko_retina_vs_ko_dlgn. Adjust=BH
## Limma step 6/6: 23/36: Creating table: ko_scn_vs_ko_dlgn. Adjust=BH
## Limma step 6/6: 24/36: Creating table: wt_dlgn_vs_ko_dlgn. Adjust=BH
## Limma step 6/6: 25/36: Creating table: wt_retina_vs_ko_dlgn. Adjust=BH
## Limma step 6/6: 26/36: Creating table: wt_scn_vs_ko_dlgn. Adjust=BH
## Limma step 6/6: 27/36: Creating table: ko_scn_vs_ko_retina. Adjust=BH
## Limma step 6/6: 28/36: Creating table: wt_dlgn_vs_ko_retina. Adjust=BH
## Limma step 6/6: 29/36: Creating table: wt_retina_vs_ko_retina. Adjust=BH
## Limma step 6/6: 30/36: Creating table: wt_scn_vs_ko_retina. Adjust=BH
## Limma step 6/6: 31/36: Creating table: wt_dlgn_vs_ko_scn. Adjust=BH
## Limma step 6/6: 32/36: Creating table: wt_retina_vs_ko_scn. Adjust=BH
## Limma step 6/6: 33/36: Creating table: wt_scn_vs_ko_scn. Adjust=BH
## Limma step 6/6: 34/36: Creating table: wt_retina_vs_wt_dlgn. Adjust=BH
## Limma step 6/6: 35/36: Creating table: wt_scn_vs_wt_dlgn. Adjust=BH
## Limma step 6/6: 36/36: Creating table: wt_scn_vs_wt_retina. Adjust=BH
## Limma step 6/6: 1/9: Creating table: het_dlgn. Adjust=BH
## Limma step 6/6: 2/9: Creating table: het_retina. Adjust=BH
## Limma step 6/6: 3/9: Creating table: het_scn. Adjust=BH
## Limma step 6/6: 4/9: Creating table: ko_dlgn. Adjust=BH
## Limma step 6/6: 5/9: Creating table: ko_retina. Adjust=BH
## Limma step 6/6: 6/9: Creating table: ko_scn. Adjust=BH
## Limma step 6/6: 7/9: Creating table: wt_dlgn. Adjust=BH
## Limma step 6/6: 8/9: Creating table: wt_retina. Adjust=BH
## Limma step 6/6: 9/9: Creating table: wt_scn. Adjust=BH
## Comparing analyses.
combine_de_tables(mm_de_normal,
mm_de_normal_tables <-excel="excel/only_pairwise_de_20201030.xlsx",
keepers=normal_keepers)
## Deleting the file excel/only_pairwise_de_20201030.xlsx before writing the tables.
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on 1/15: wt_dlgnret which is: wt_dlgn/wt_retina.
## Found inverse table with wt_retina_vs_wt_dlgn
## The ebseq table is null.
## Working on 2/15: wt_scnret which is: wt_scn/wt_retina.
## Found table with wt_scn_vs_wt_retina
## The ebseq table is null.
## Working on 3/15: wt_dlgnscn which is: wt_dlgn/wt_scn.
## Found inverse table with wt_scn_vs_wt_dlgn
## The ebseq table is null.
## Working on 4/15: het_dlgnret which is: het_dlgn/het_retina.
## Found inverse table with het_retina_vs_het_dlgn
## The ebseq table is null.
## Working on 5/15: het_scnret which is: het_scn/het_retina.
## Found table with het_scn_vs_het_retina
## The ebseq table is null.
## Working on 6/15: het_dlgnscn which is: het_dlgn/het_scn.
## Found inverse table with het_scn_vs_het_dlgn
## The ebseq table is null.
## Working on 7/15: ko_dlgnret which is: ko_dlgn/ko_retina.
## Found inverse table with ko_retina_vs_ko_dlgn
## The ebseq table is null.
## Working on 8/15: ko_scnret which is: ko_scn/ko_retina.
## Found table with ko_scn_vs_ko_retina
## The ebseq table is null.
## Working on 9/15: ko_dlgnscn which is: ko_dlgn/ko_scn.
## Found inverse table with ko_scn_vs_ko_dlgn
## The ebseq table is null.
## Working on 10/15: normret which is: het_retina/wt_retina.
## Found inverse table with wt_retina_vs_het_retina
## The ebseq table is null.
## Working on 11/15: koret which is: ko_retina/wt_retina.
## Found inverse table with wt_retina_vs_ko_retina
## The ebseq table is null.
## Working on 12/15: normscn which is: het_scn/wt_scn.
## Found inverse table with wt_scn_vs_het_scn
## The ebseq table is null.
## Working on 13/15: koscn which is: ko_scn/wt_scn.
## Found inverse table with wt_scn_vs_ko_scn
## The ebseq table is null.
## Working on 14/15: normdlgn which is: het_dlgn/wt_dlgn.
## Found inverse table with wt_dlgn_vs_het_dlgn
## The ebseq table is null.
## Working on 15/15: kodlgn which is: ko_dlgn/wt_dlgn.
## Found inverse table with wt_dlgn_vs_ko_dlgn
## The ebseq table is null.
## Adding venn plots for wt_dlgnret.
## Limma expression coefficients for wt_dlgnret; R^2: 0.748; equation: y = 0.834x + 0.769
## Deseq expression coefficients for wt_dlgnret; R^2: 0.75; equation: y = 0.833x + 1.47
## Edger expression coefficients for wt_dlgnret; R^2: 0.751; equation: y = 0.835x + 1.85
## Adding venn plots for wt_scnret.
## Limma expression coefficients for wt_scnret; R^2: 0.709; equation: y = 0.802x + 0.844
## Deseq expression coefficients for wt_scnret; R^2: 0.719; equation: y = 0.831x + 1.44
## Edger expression coefficients for wt_scnret; R^2: 0.719; equation: y = 0.831x + 1.86
## Adding venn plots for wt_dlgnscn.
## Limma expression coefficients for wt_dlgnscn; R^2: 0.897; equation: y = 0.905x + 0.445
## Deseq expression coefficients for wt_dlgnscn; R^2: 0.898; equation: y = 0.938x + 0.539
## Edger expression coefficients for wt_dlgnscn; R^2: 0.898; equation: y = 0.939x + 0.573
## Adding venn plots for het_dlgnret.
## Limma expression coefficients for het_dlgnret; R^2: 0.755; equation: y = 0.844x + 0.74
## Deseq expression coefficients for het_dlgnret; R^2: 0.752; equation: y = 0.851x + 1.34
## Edger expression coefficients for het_dlgnret; R^2: 0.752; equation: y = 0.852x + 1.42
## Adding venn plots for het_scnret.
## Limma expression coefficients for het_scnret; R^2: 0.73; equation: y = 0.803x + 0.859
## Deseq expression coefficients for het_scnret; R^2: 0.737; equation: y = 0.814x + 1.65
## Edger expression coefficients for het_scnret; R^2: 0.738; equation: y = 0.816x + 1.77
## Adding venn plots for het_dlgnscn.
## Limma expression coefficients for het_dlgnscn; R^2: 0.914; equation: y = 0.92x + 0.37
## Deseq expression coefficients for het_dlgnscn; R^2: 0.91; equation: y = 0.941x + 0.57
## Edger expression coefficients for het_dlgnscn; R^2: 0.91; equation: y = 0.942x + 0.552
## Adding venn plots for ko_dlgnret.
## Limma expression coefficients for ko_dlgnret; R^2: 0.726; equation: y = 0.832x + 0.794
## Deseq expression coefficients for ko_dlgnret; R^2: 0.707; equation: y = 0.816x + 1.64
## Edger expression coefficients for ko_dlgnret; R^2: 0.708; equation: y = 0.82x + 1.9
## Adding venn plots for ko_scnret.
## Limma expression coefficients for ko_scnret; R^2: 0.748; equation: y = 0.832x + 0.729
## Deseq expression coefficients for ko_scnret; R^2: 0.729; equation: y = 0.833x + 1.48
## Edger expression coefficients for ko_scnret; R^2: 0.729; equation: y = 0.835x + 1.73
## Adding venn plots for ko_dlgnscn.
## Limma expression coefficients for ko_dlgnscn; R^2: 0.925; equation: y = 0.956x + 0.21
## Deseq expression coefficients for ko_dlgnscn; R^2: 0.914; equation: y = 0.965x + 0.342
## Edger expression coefficients for ko_dlgnscn; R^2: 0.915; equation: y = 0.964x + 0.355
## Adding venn plots for normret.
## Limma expression coefficients for normret; R^2: 0.974; equation: y = 0.997x + 0.0231
## Deseq expression coefficients for normret; R^2: 0.975; equation: y = 0.971x + 0.304
## Edger expression coefficients for normret; R^2: 0.975; equation: y = 0.971x + 0.354
## Adding venn plots for koret.
## Limma expression coefficients for koret; R^2: 0.952; equation: y = 0.98x + 0.0965
## Deseq expression coefficients for koret; R^2: 0.954; equation: y = 0.941x + 0.573
## Edger expression coefficients for koret; R^2: 0.954; equation: y = 0.941x + 0.69
## Adding venn plots for normscn.
## Limma expression coefficients for normscn; R^2: 0.975; equation: y = 1x - 0.0313
## Deseq expression coefficients for normscn; R^2: 0.974; equation: y = 1x - 0.05
## Edger expression coefficients for normscn; R^2: 0.974; equation: y = 1x - 0.0377
## Adding venn plots for koscn.
## Limma expression coefficients for koscn; R^2: 0.966; equation: y = 0.978x + 0.0697
## Deseq expression coefficients for koscn; R^2: 0.967; equation: y = 0.956x + 0.362
## Edger expression coefficients for koscn; R^2: 0.967; equation: y = 0.957x + 0.419
## Adding venn plots for normdlgn.
## Limma expression coefficients for normdlgn; R^2: 0.966; equation: y = 0.997x + 0.0135
## Deseq expression coefficients for normdlgn; R^2: 0.967; equation: y = 0.988x + 0.152
## Edger expression coefficients for normdlgn; R^2: 0.967; equation: y = 0.988x + 0.123
## Adding venn plots for kodlgn.
## Limma expression coefficients for kodlgn; R^2: 0.97; equation: y = 1x - 0.0142
## Deseq expression coefficients for kodlgn; R^2: 0.972; equation: y = 0.968x + 0.329
## Edger expression coefficients for kodlgn; R^2: 0.972; equation: y = 0.968x + 0.313
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/only_pairwise_de_20201030.xlsx.
extract_significant_genes(mm_de_normal_tables,
mm_de_normal_sig <-excel="excel/only_pairwise_sig_20201030.xlsx")
## Writing a legend of columns.
## Did not find the ebseq_logfc, skipping ebseq.
## Printing significant genes to the file: excel/only_pairwise_sig_20201030.xlsx
## 1/15: Creating significant table up_limma_wt_dlgnret
## 2/15: Creating significant table up_limma_wt_scnret
## 3/15: Creating significant table up_limma_wt_dlgnscn
## 4/15: Creating significant table up_limma_het_dlgnret
## 5/15: Creating significant table up_limma_het_scnret
## 6/15: Creating significant table up_limma_het_dlgnscn
## 7/15: Creating significant table up_limma_ko_dlgnret
## 8/15: Creating significant table up_limma_ko_scnret
## 9/15: Creating significant table up_limma_ko_dlgnscn
## The up table normret is empty.
## The down table normret is empty.
## The up table koret is empty.
## The down table koret is empty.
## The up table normscn is empty.
## The down table normscn is empty.
## The up table koscn is empty.
## The down table koscn is empty.
## The up table normdlgn is empty.
## The up table kodlgn is empty.
## Printing significant genes to the file: excel/only_pairwise_sig_20201030.xlsx
## 1/15: Creating significant table up_edger_wt_dlgnret
## 2/15: Creating significant table up_edger_wt_scnret
## 3/15: Creating significant table up_edger_wt_dlgnscn
## 4/15: Creating significant table up_edger_het_dlgnret
## 5/15: Creating significant table up_edger_het_scnret
## 6/15: Creating significant table up_edger_het_dlgnscn
## 7/15: Creating significant table up_edger_ko_dlgnret
## 8/15: Creating significant table up_edger_ko_scnret
## 9/15: Creating significant table up_edger_ko_dlgnscn
## 10/15: Creating significant table up_edger_normret
## The down table normret is empty.
## 11/15: Creating significant table up_edger_koret
## The up table normscn is empty.
## The down table normscn is empty.
## 13/15: Creating significant table up_edger_koscn
## The down table koscn is empty.
## The up table normdlgn is empty.
## 15/15: Creating significant table up_edger_kodlgn
## Printing significant genes to the file: excel/only_pairwise_sig_20201030.xlsx
## 1/15: Creating significant table up_deseq_wt_dlgnret
## 2/15: Creating significant table up_deseq_wt_scnret
## 3/15: Creating significant table up_deseq_wt_dlgnscn
## 4/15: Creating significant table up_deseq_het_dlgnret
## 5/15: Creating significant table up_deseq_het_scnret
## 6/15: Creating significant table up_deseq_het_dlgnscn
## 7/15: Creating significant table up_deseq_ko_dlgnret
## 8/15: Creating significant table up_deseq_ko_scnret
## 9/15: Creating significant table up_deseq_ko_dlgnscn
## 10/15: Creating significant table up_deseq_normret
## The down table normret is empty.
## 11/15: Creating significant table up_deseq_koret
## The up table normscn is empty.
## The down table normscn is empty.
## 13/15: Creating significant table up_deseq_koscn
## The down table koscn is empty.
## 14/15: Creating significant table up_deseq_normdlgn
## 15/15: Creating significant table up_deseq_kodlgn
## Printing significant genes to the file: excel/only_pairwise_sig_20201030.xlsx
## The up table wt_dlgnret is empty.
## The down table wt_dlgnret is empty.
## The up table wt_scnret is empty.
## The down table wt_scnret is empty.
## The up table wt_dlgnscn is empty.
## The down table wt_dlgnscn is empty.
## 4/15: Creating significant table up_basic_het_dlgnret
## 5/15: Creating significant table up_basic_het_scnret
## 6/15: Creating significant table up_basic_het_dlgnscn
## The up table ko_dlgnret is empty.
## The down table ko_dlgnret is empty.
## The up table ko_scnret is empty.
## The down table ko_scnret is empty.
## The up table ko_dlgnscn is empty.
## The down table ko_dlgnscn is empty.
## The up table normret is empty.
## The down table normret is empty.
## The up table koret is empty.
## The down table koret is empty.
## The up table normscn is empty.
## The down table normscn is empty.
## The up table koscn is empty.
## The down table koscn is empty.
## The up table normdlgn is empty.
## The down table normdlgn is empty.
## The up table kodlgn is empty.
## The down table kodlgn is empty.
## Adding significance bar plots.
mm_de_normal_tables[["plots"]][["het_dlgnret"]][["deseq_vol_plots"]][["plot"]]
vol1 <- mm_de_normal_tables[["data"]][["het_dlgnret"]][["description"]]
annot1 <-"data"]][["label"]] <- annot1
vol1[[ ggplt(gg=vol1, filename="plots/volcano_dlgn_vs_retina.html")
vol1_ggplotly <- mm_de_normal_tables[["plots"]][["het_scnret"]][["deseq_vol_plots"]][["plot"]]
vol2 <- mm_de_normal_tables[["data"]][["het_scnret"]][["description"]]
annot2 <-"data"]][["label"]] <- annot2
vol2[[ ggplt(gg=vol2, filename="volcano_scn_vs_retina.html")
vol2_ggplotly <- mm_de_normal_tables[["plots"]][["het_dlgnscn"]][["deseq_vol_plots"]][["plot"]]
vol3 <- mm_de_normal_tables[["data"]][["het_dlgnscn"]][["description"]]
annot3 <-"data"]][["label"]] <- annot3
vol3[[ ggplt(gg=vol3, filename="volcano_dlgn_vs_scn.html") vol3_ggplotly <-
sm(all_pairwise(mm_filt, model_batch="svaseq", parallel=FALSE, extra_contrasts=extras)) mm_de_hi <-
## Error in basic_pairwise(...) : object 'extras' not found
## Error in make_pairwise_contrasts(model_data, conditions, extra_contrasts = extra_contrasts, :
## object 'extras' not found
## Error in make_pairwise_contrasts(model_data, conditions, extra_contrasts = extra_contrasts, :
## object 'extras' not found
## Error in make_pairwise_contrasts(model = chosen_model, conditions = conditions, :
## object 'extras' not found
## Error in basic_pairwise(...) : object 'extras' not found
## Error in make_pairwise_contrasts(model_data, conditions, extra_contrasts = extra_contrasts, :
## object 'extras' not found
## Error in make_pairwise_contrasts(model_data, conditions, extra_contrasts = extra_contrasts, :
## object 'extras' not found
## Error in make_pairwise_contrasts(model = chosen_model, conditions = conditions, :
## object 'extras' not found
## Error in correlate_de_tables(results, annot_df = annot_df, extra_contrasts = extra_contrasts): object 'extras' not found
sm(combine_de_tables(mm_de_hi, excel="excel/testing_20201030.xlsx", keepers=keepers)) mm_de_tables <-
## Error in combine_de_tables(mm_de_hi, excel = "excel/testing_20201030.xlsx", : object 'mm_de_hi' not found
sm(extract_significant_genes(mm_de_tables, excel="excel/testing_sig_202010.xlsx")) mm_de_sig <-
## Error in extract_significant_genes(mm_de_tables, excel = "excel/testing_sig_202010.xlsx"): object 'mm_de_tables' not found
## I changed the sample sheet so that the column is renamed to 'location'
## alt_model <- "~ location + location:genotype"
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:Matrix':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:hpgltools':
##
## anyMissing, rowMedians
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
"~ celltype + celltype:genotype"
alt_model <- pData(mm_filt)
metadata <-"location"]] <- factor(metadata[["location"]], levels=c("retina", "scn", "dlgn"))
metadata[["genotype"]] <- factor(metadata[["genotype"]], levels=c("wt", "ko", "het"))
metadata[[ DESeqDataSetFromMatrix(countData=exprs(mm_filt),
dds <-colData=metadata,
design=~0+genotype+location+genotype:location)
## converting counts to integer mode
DESeq(dds) dds_run <-
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
resultsNames(dds_run)
## [1] "genotypewt" "genotypeko"
## [3] "genotypehet" "locationscn"
## [5] "locationdlgn" "genotypeko.locationscn"
## [7] "genotypehet.locationscn" "genotypeko.locationdlgn"
## [9] "genotypehet.locationdlgn"
mm_edger$all_tables
edger_tables <-
results(dds_run, contrast=list("genotypeko.locationscn",
dds_result <-"genotypeko.locationdlgn"))
as.data.frame(dds_result)
deseq_table <- "koscn_vs_kodlgn"
expected_edger <- edger_tables[[expected_edger]]
edger_table <- merge(deseq_table, edger_table, by="row.names")
merged <-cor.test(merged[["log2FoldChange"]], merged[["logFC"]])
##
## Pearson's product-moment correlation
##
## data: merged[["log2FoldChange"]] and merged[["logFC"]]
## t = 232, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8838 0.8907
## sample estimates:
## cor
## 0.8873
results(dds_run, contrast=list("genotypehet.locationscn"))
dds_result <- as.data.frame(dds_result)
deseq_table <- "normscn_vs_normret"
expected_edger <- edger_tables[[expected_edger]]
edger_table <- merge(deseq_table, edger_table, by="row.names")
merged <-cor.test(merged[["log2FoldChange"]], merged[["logFC"]])
##
## Pearson's product-moment correlation
##
## data: merged[["log2FoldChange"]] and merged[["logFC"]]
## t = 229, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8810 0.8881
## sample estimates:
## cor
## 0.8846
results(dds_run, contrast=list("genotypeko.locationscn"))
dds_result <- as.data.frame(dds_result)
deseq_table <- "koscn_vs_koret"
expected_edger <- edger_tables[[expected_edger]]
edger_table <- merge(deseq_table, edger_table, by="row.names")
merged <-cor.test(merged[["log2FoldChange"]], merged[["logFC"]])
##
## Pearson's product-moment correlation
##
## data: merged[["log2FoldChange"]] and merged[["logFC"]]
## t = 222, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8745 0.8819
## sample estimates:
## cor
## 0.8783
results(dds_run, contrast=list("genotypeko.locationdlgn"))
dds_result <- as.data.frame(dds_result)
deseq_table <- "kodlgn_vs_koret"
expected_edger <- edger_tables[[expected_edger]]
edger_table <- merge(deseq_table, edger_table, by="row.names")
merged <-cor.test(merged[["log2FoldChange"]], merged[["logFC"]])
##
## Pearson's product-moment correlation
##
## data: merged[["log2FoldChange"]] and merged[["logFC"]]
## t = 154, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7797 0.7921
## sample estimates:
## cor
## 0.7859
results(dds_run, contrast=list("genotypehet.locationdlgn"))
dds_result <- as.data.frame(dds_result)
deseq_table <- "normdlgn_vs_normret"
expected_edger <- edger_tables[[expected_edger]]
edger_table <- merge(deseq_table, edger_table, by="row.names")
merged <-cor.test(merged[["log2FoldChange"]], merged[["logFC"]])
##
## Pearson's product-moment correlation
##
## data: merged[["log2FoldChange"]] and merged[["logFC"]]
## t = 125, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7112 0.7269
## sample estimates:
## cor
## 0.7191
results(dds_run, contrast=list("genotypehet.locationscn"))
dds_result <- as.data.frame(dds_result)
deseq_table <- "normscn_vs_normret"
expected_edger <- edger_tables[[expected_edger]]
edger_table <- merge(deseq_table, edger_table, by="row.names")
merged <-cor.test(merged[["log2FoldChange"]], merged[["logFC"]])
##
## Pearson's product-moment correlation
##
## data: merged[["log2FoldChange"]] and merged[["logFC"]]
## t = 229, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8810 0.8881
## sample estimates:
## cor
## 0.8846
For the normal contrasts, we will not be using an interaction model, but instead using the ‘condition’ factor in the metadata, thus we can just use deseq_pairwise and edger_pairwise
I am doing this primarily to get a sense of how similar normal things are vs. my putatively matching set of interaction/ror contrasts.
mm_deseq$all_tables
deseq_tables <-for (t in 1:length(deseq_tables)) {
names(deseq_tables)[t]
tname <-message("Comparing ", tname, ".")
edger_tables[tname]
e <- deseq_tables[tname]
d <- merge(e, d, by="row.names")[, c(2, 8)]
ed <-print(cor.test(ed[[1]], ed[[2]]))
}
## Comparing het_retina_vs_het_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 2528, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9988 0.9989
## sample estimates:
## cor
## 0.9989
## Comparing het_scn_vs_het_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1726, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9975 0.9976
## sample estimates:
## cor
## 0.9976
## Comparing ko_dlgn_vs_het_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 941, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9916 0.9921
## sample estimates:
## cor
## 0.9919
## Comparing ko_retina_vs_het_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1605, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9971 0.9973
## sample estimates:
## cor
## 0.9972
## Comparing ko_scn_vs_het_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1028, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9929 0.9934
## sample estimates:
## cor
## 0.9932
## Comparing wt_dlgn_vs_het_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1259, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9953 0.9956
## sample estimates:
## cor
## 0.9954
## Comparing wt_retina_vs_het_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 2122, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9983 0.9984
## sample estimates:
## cor
## 0.9984
## Comparing wt_scn_vs_het_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1781, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9976 0.9978
## sample estimates:
## cor
## 0.9977
## Comparing het_scn_vs_het_retina.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 3008, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9992 0.9992
## sample estimates:
## cor
## 0.9992
## Comparing ko_dlgn_vs_het_retina.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 3076, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9992 0.9993
## sample estimates:
## cor
## 0.9992
## Comparing ko_retina_vs_het_retina.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 504, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9716 0.9733
## sample estimates:
## cor
## 0.9725
## Comparing ko_scn_vs_het_retina.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1907, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9979 0.9981
## sample estimates:
## cor
## 0.998
## Comparing wt_dlgn_vs_het_retina.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 2585, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9989 0.9989
## sample estimates:
## cor
## 0.9989
## Comparing wt_retina_vs_het_retina.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 852, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9898 0.9904
## sample estimates:
## cor
## 0.9901
## Comparing wt_scn_vs_het_retina.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 3486, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9994 0.9994
## sample estimates:
## cor
## 0.9994
## Comparing ko_dlgn_vs_het_scn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1968, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9981 0.9982
## sample estimates:
## cor
## 0.9981
## Comparing ko_retina_vs_het_scn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1655, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9973 0.9974
## sample estimates:
## cor
## 0.9973
## Comparing ko_scn_vs_het_scn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 719, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9857 0.9866
## sample estimates:
## cor
## 0.9862
## Comparing wt_dlgn_vs_het_scn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 2197, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9984 0.9985
## sample estimates:
## cor
## 0.9985
## Comparing wt_retina_vs_het_scn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 2229, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9985 0.9986
## sample estimates:
## cor
## 0.9985
## Comparing wt_scn_vs_het_scn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 978, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9922 0.9927
## sample estimates:
## cor
## 0.9925
## Comparing ko_retina_vs_ko_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1636, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9972 0.9974
## sample estimates:
## cor
## 0.9973
## Comparing ko_scn_vs_ko_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1517, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9967 0.9969
## sample estimates:
## cor
## 0.9968
## Comparing wt_dlgn_vs_ko_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1332, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9958 0.9960
## sample estimates:
## cor
## 0.9959
## Comparing wt_retina_vs_ko_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 2379, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9987 0.9988
## sample estimates:
## cor
## 0.9987
## Comparing wt_scn_vs_ko_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 2271, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9985 0.9986
## sample estimates:
## cor
## 0.9986
## Comparing ko_scn_vs_ko_retina.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1380, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9961 0.9963
## sample estimates:
## cor
## 0.9962
## Comparing wt_dlgn_vs_ko_retina.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1429, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9963 0.9966
## sample estimates:
## cor
## 0.9964
## Comparing wt_retina_vs_ko_retina.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 560, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9768 0.9782
## sample estimates:
## cor
## 0.9775
## Comparing wt_scn_vs_ko_retina.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1603, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9971 0.9973
## sample estimates:
## cor
## 0.9972
## Comparing wt_dlgn_vs_ko_scn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1137, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9942 0.9946
## sample estimates:
## cor
## 0.9944
## Comparing wt_retina_vs_ko_scn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 1702, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9974 0.9976
## sample estimates:
## cor
## 0.9975
## Comparing wt_scn_vs_ko_scn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 704, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9851 0.9860
## sample estimates:
## cor
## 0.9856
## Comparing wt_retina_vs_wt_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 2060, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9982 0.9983
## sample estimates:
## cor
## 0.9983
## Comparing wt_scn_vs_wt_dlgn.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 2707, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.999 0.999
## sample estimates:
## cor
## 0.999
## Comparing wt_scn_vs_wt_retina.
##
## Pearson's product-moment correlation
##
## data: ed[[1]] and ed[[2]]
## t = 2783, df = 14599, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9990 0.9991
## sample estimates:
## cor
## 0.9991
pca_information(mm38_hisat, expt_factors=c("location", "genotype", "time", "condition"), num_factors=4) pc_raw <-
## More shallow curves in these plots suggest more genes in this principle component.
$pca_cor pc_raw
## PC1 PC2 PC3 PC4
## location 0.12983 0.14484 -0.32668 -0.03445
## genotype 0.07776 -0.14606 0.02969 0.48423
## time -0.61235 -0.30192 -0.31319 -0.61949
## condition 0.11226 -0.09307 -0.07115 0.44364
$anova_p pc_raw
## PC1 PC2 PC3 PC4
## location 0.554898 0.5096 0.1281 0.876002
## genotype 0.724337 0.5061 0.8930 0.019211
## time 0.001897 0.1615 0.1456 0.001619
## condition 0.610060 0.6728 0.7470 0.033967
pca_information(mm38_norm_hi, expt_factors=c("location", "genotype", "time", "condition"), num_factors=4) pc_norm <-
## More shallow curves in these plots suggest more genes in this principle component.
$pca_cor pc_norm
## PC1 PC2 PC3 PC4
## location 0.03141 -0.2346 0.95040 -0.0168
## genotype -0.03435 -0.1992 -0.01796 0.3055
## time 0.11061 0.8753 0.27578 -0.3471
## condition -0.02269 -0.2579 0.27116 0.2814
$anova_p pc_norm
## PC1 PC2 PC3 PC4
## location 0.8869 2.813e-01 4.023e-12 0.9394
## genotype 0.8763 3.622e-01 9.352e-01 0.1563
## time 0.6154 4.583e-08 2.028e-01 0.1047
## condition 0.9181 2.349e-01 2.107e-01 0.1934
pca_information(mm38_normbatch_hi,
pc_nb <-expt_factors=c("location", "genotype", "time", "condition"),
num_factors=4, plot_pcas=TRUE)
## More shallow curves in these plots suggest more genes in this principle component.
$pca_cor pc_nb
## PC1 PC2 PC3 PC4
## location -0.04291 0.97563 -0.06657 -0.1112
## genotype -0.13119 0.04489 0.30661 -0.1353
## time 0.17331 0.07804 -0.22464 -0.1243
## condition -0.13603 0.33774 0.26735 -0.1605
$anova_p pc_nb
## PC1 PC2 PC3 PC4
## location 0.8458 2.583e-15 0.7628 0.6135
## genotype 0.5507 8.388e-01 0.1547 0.5383
## time 0.4290 7.234e-01 0.3028 0.5721
## condition 0.5360 1.150e-01 0.2175 0.4643
## It looks to me like PCs 2 and 3 are the most likely to be of interest with respect to
## genotype and location.
plot_pca(mm38_normbatch_hi)
default_pca <-$plot default_pca
## Here is what it looks like if we focus on PCs 2 and 3 instead:
$pca_plots$PC2_PC3 pc_nb
plot_3d_pca(default_pca, file="for_najib.html") all_three <-
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## I am not sure why, but it changed my colors!
$plot all_three
We can make estimates about how many genes are related to the various factors with variancePartition.
simple_varpart(mm38_norm_hi) mm38_varpart <-
## Attempting mixed linear model with: ~ condition + batch
## Fitting the expressionset to the model, this is slow.
##
## Total:89 s
## Placing factor: condition at the beginning of the model.
pp(file="pre_varpart.png", image=mm38_varpart$partition_plot)
## Writing the image to: pre_varpart.png and calling dev.off().
simple_varpart(mm38_normbatch_hi) mm38_nb_varpart <-
## Attempting mixed linear model with: ~ condition + batch
## Fitting the expressionset to the model, this is slow.
##
## Total:89 s
## Placing factor: condition at the beginning of the model.
pp(file="post_varpart.png", image=mm38_nb_varpart$partition_plot)
## Writing the image to: post_varpart.png and calling dev.off().
This suggests there is some useful variance in the data which corresponds to genotype/location.
## mm_de_normal_sig
mm_de_normal_sig[["deseq"]][["ups"]][["het_dlgnret"]]
up_dlgn_ret <-rownames(up_dlgn_ret) <- gsub(x=rownames(up_dlgn_ret), pattern="gene:", replacement="")
simple_gprofiler(up_dlgn_ret, species="mmusculus", excel="excel/up_dlgn_ret_gp.xlsx") up_dlgn_ret_gp <-
## Performing gProfiler GO search of 2683 genes against mmusculus.
## GO search found 956 hits.
## Performing gProfiler KEGG search of 2683 genes against mmusculus.
## KEGG search found 55 hits.
## Performing gProfiler REAC search of 2683 genes against mmusculus.
## REAC search found 67 hits.
## Performing gProfiler MI search of 2683 genes against mmusculus.
## MI search found 0 hits.
## Performing gProfiler TF search of 2683 genes against mmusculus.
## TF search found 609 hits.
## Performing gProfiler CORUM search of 2683 genes against mmusculus.
## CORUM search found 4 hits.
## Performing gProfiler HP search of 2683 genes against mmusculus.
## HP search found 63 hits.
## Writing data to: excel/up_dlgn_ret_gp.xlsx.
## Finished writing data.
mm_de_normal_sig[["deseq"]][["downs"]][["het_dlgnret"]]
down_dlgn_ret <-rownames(down_dlgn_ret) <- gsub(x=rownames(down_dlgn_ret), pattern="gene:", replacement="")
simple_gprofiler(down_dlgn_ret, species="mmusculus", excel="excel/down_dlgn_ret_gp.xlsx") down_dlgn_ret_gp <-
## Performing gProfiler GO search of 2202 genes against mmusculus.
## GO search found 686 hits.
## Performing gProfiler KEGG search of 2202 genes against mmusculus.
## KEGG search found 19 hits.
## Performing gProfiler REAC search of 2202 genes against mmusculus.
## REAC search found 55 hits.
## Performing gProfiler MI search of 2202 genes against mmusculus.
## MI search found 0 hits.
## Performing gProfiler TF search of 2202 genes against mmusculus.
## TF search found 609 hits.
## Performing gProfiler CORUM search of 2202 genes against mmusculus.
## CORUM search found 3 hits.
## Performing gProfiler HP search of 2202 genes against mmusculus.
## HP search found 140 hits.
## Writing data to: excel/down_dlgn_ret_gp.xlsx.
## Finished writing data.
## mm_de_normal_sig
mm_de_normal_sig[["deseq"]][["ups"]][["het_scnret"]]
up_scn_ret <-rownames(up_scn_ret) <- gsub(x=rownames(up_scn_ret), pattern="gene:", replacement="")
simple_gprofiler(up_scn_ret, species="mmusculus", excel="excel/up_scn_ret_gp.xlsx") up_scn_ret_gp <-
## Performing gProfiler GO search of 2422 genes against mmusculus.
## GO search found 728 hits.
## Performing gProfiler KEGG search of 2422 genes against mmusculus.
## KEGG search found 33 hits.
## Performing gProfiler REAC search of 2422 genes against mmusculus.
## REAC search found 44 hits.
## Performing gProfiler MI search of 2422 genes against mmusculus.
## MI search found 0 hits.
## Performing gProfiler TF search of 2422 genes against mmusculus.
## TF search found 577 hits.
## Performing gProfiler CORUM search of 2422 genes against mmusculus.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 2422 genes against mmusculus.
## HP search found 42 hits.
## Writing data to: excel/up_scn_ret_gp.xlsx.
## Finished writing data.
mm_de_normal_sig[["deseq"]][["downs"]][["het_scnret"]]
down_scn_ret <-rownames(down_scn_ret) <- gsub(x=rownames(down_scn_ret), pattern="gene:", replacement="")
simple_gprofiler(down_scn_ret, species="mmusculus", excel="excel/down_scn_ret_gp.xlsx") down_scn_ret_gp <-
## Performing gProfiler GO search of 2358 genes against mmusculus.
## GO search found 690 hits.
## Performing gProfiler KEGG search of 2358 genes against mmusculus.
## KEGG search found 24 hits.
## Performing gProfiler REAC search of 2358 genes against mmusculus.
## REAC search found 49 hits.
## Performing gProfiler MI search of 2358 genes against mmusculus.
## MI search found 0 hits.
## Performing gProfiler TF search of 2358 genes against mmusculus.
## TF search found 713 hits.
## Performing gProfiler CORUM search of 2358 genes against mmusculus.
## CORUM search found 1 hits.
## Performing gProfiler HP search of 2358 genes against mmusculus.
## HP search found 92 hits.
## Writing data to: excel/down_scn_ret_gp.xlsx.
## Finished writing data.
## mm_de_normal_sig
mm_de_normal_sig[["deseq"]][["ups"]][["het_dlgnscn"]]
up_scn_ret <-rownames(up_scn_ret) <- gsub(x=rownames(up_scn_ret), pattern="gene:", replacement="")
simple_gprofiler(up_scn_ret, species="mmusculus", excel="excel/up_dlgn_scn_gp.xlsx") up_scn_ret_gp <-
## Performing gProfiler GO search of 1202 genes against mmusculus.
## GO search found 612 hits.
## Performing gProfiler KEGG search of 1202 genes against mmusculus.
## KEGG search found 47 hits.
## Performing gProfiler REAC search of 1202 genes against mmusculus.
## REAC search found 30 hits.
## Performing gProfiler MI search of 1202 genes against mmusculus.
## MI search found 0 hits.
## Performing gProfiler TF search of 1202 genes against mmusculus.
## TF search found 454 hits.
## Performing gProfiler CORUM search of 1202 genes against mmusculus.
## CORUM search found 2 hits.
## Performing gProfiler HP search of 1202 genes against mmusculus.
## HP search found 28 hits.
## Writing data to: excel/up_dlgn_scn_gp.xlsx.
## Finished writing data.
mm_de_normal_sig[["deseq"]][["downs"]][["het_dlgnscn"]]
down_scn_ret <-rownames(down_scn_ret) <- gsub(x=rownames(down_scn_ret), pattern="gene:", replacement="")
simple_gprofiler(down_scn_ret, species="mmusculus", excel="excel/down_dlgn_scn_gp.xlsx") down_scn_ret_gp <-
## Performing gProfiler GO search of 996 genes against mmusculus.
## GO search found 196 hits.
## Performing gProfiler KEGG search of 996 genes against mmusculus.
## KEGG search found 9 hits.
## Performing gProfiler REAC search of 996 genes against mmusculus.
## REAC search found 12 hits.
## Performing gProfiler MI search of 996 genes against mmusculus.
## MI search found 0 hits.
## Performing gProfiler TF search of 996 genes against mmusculus.
## TF search found 311 hits.
## Performing gProfiler CORUM search of 996 genes against mmusculus.
## CORUM search found 3 hits.
## Performing gProfiler HP search of 996 genes against mmusculus.
## HP search found 25 hits.
## Writing data to: excel/down_dlgn_scn_gp.xlsx.
## Finished writing data.
## The columns of likely interest:
## [199] "5_utr_start"
## [200] "5_utr_end"
## [201] "3_utr_start"
## [202] "3_utr_end"
c("ensembl_transcript_id", "cds_length", "chromosome_name",
length_requests <-"strand", "start_position", "end_position",
"5_utr_start", "5_utr_end", "3_utr_start", "3_utr_end")
load_biomart_annotations(species="mmusculus", do_save=FALSE, overwrite=TRUE,
mm_annot_v2 <-length_requests=length_requests)
## Got a bad mart type when trying host Nov2019.archive.ensembl.org mart ENSEMBL_MART_ENSEMBL.
## Got a bad mart type when trying host Oct2019.archive.ensembl.org mart ENSEMBL_MART_ENSEMBL.
## Using mart: ENSEMBL_MART_ENSEMBL from host: Sep2019.archive.ensembl.org.
## Successfully connected to the mmusculus_gene_ensembl database.
## Finished downloading ensembl gene annotations.
## Finished downloading ensembl structure annotations.
## Dropping haplotype chromosome annotations, set drop_haplotypes=FALSE if this is bad.
## So, it turns out that there really are a _lot_ of utr differences depending on transcript
## Here are the few transcripts, it gets less encouraging after these.
head(mm_annot_v2[["annotation"]])
## ensembl_transcript_id ensembl_gene_id version
## ENSMUST00000000001 ENSMUST00000000001 ENSMUSG00000000001 4
## ENSMUST00000000001.1 ENSMUST00000000001 ENSMUSG00000000001 4
## ENSMUST00000000001.2 ENSMUST00000000001 ENSMUSG00000000001 4
## ENSMUST00000000001.3 ENSMUST00000000001 ENSMUSG00000000001 4
## ENSMUST00000000003 ENSMUST00000000003 ENSMUSG00000000003 15
## ENSMUST00000000003.1 ENSMUST00000000003 ENSMUSG00000000003 15
## transcript_version hgnc_symbol
## ENSMUST00000000001 4 <NA>
## ENSMUST00000000001.1 4 <NA>
## ENSMUST00000000001.2 4 <NA>
## ENSMUST00000000001.3 4 <NA>
## ENSMUST00000000003 13 <NA>
## ENSMUST00000000003.1 13 <NA>
## description
## ENSMUST00000000001 guanine nucleotide binding protein (G protein), alpha inhibiting 3 [Source:MGI Symbol;Acc:MGI:95773]
## ENSMUST00000000001.1 guanine nucleotide binding protein (G protein), alpha inhibiting 3 [Source:MGI Symbol;Acc:MGI:95773]
## ENSMUST00000000001.2 guanine nucleotide binding protein (G protein), alpha inhibiting 3 [Source:MGI Symbol;Acc:MGI:95773]
## ENSMUST00000000001.3 guanine nucleotide binding protein (G protein), alpha inhibiting 3 [Source:MGI Symbol;Acc:MGI:95773]
## ENSMUST00000000003 probasin [Source:MGI Symbol;Acc:MGI:1860484]
## ENSMUST00000000003.1 probasin [Source:MGI Symbol;Acc:MGI:1860484]
## gene_biotype cds_length chromosome_name strand
## ENSMUST00000000001 protein_coding 1065 3 -
## ENSMUST00000000001.1 protein_coding 1065 3 -
## ENSMUST00000000001.2 protein_coding 1065 3 -
## ENSMUST00000000001.3 protein_coding 1065 3 -
## ENSMUST00000000003 protein_coding 525 X -
## ENSMUST00000000003.1 protein_coding 525 X -
## start_position end_position 5_utr_start 5_utr_end
## ENSMUST00000000001 108107280 108146146 NA NA
## ENSMUST00000000001.1 108107280 108146146 108146006 108146146
## ENSMUST00000000001.2 108107280 108146146 NA NA
## ENSMUST00000000001.3 108107280 108146146 NA NA
## ENSMUST00000000003 77837901 77853623 NA NA
## ENSMUST00000000003.1 77837901 77853623 NA NA
## 3_utr_start 3_utr_end
## ENSMUST00000000001 NA NA
## ENSMUST00000000001.1 NA NA
## ENSMUST00000000001.2 108107280 108109316
## ENSMUST00000000001.3 108109403 108109421
## ENSMUST00000000003 77837901 77838114
## ENSMUST00000000003.1 77841860 77841882
## Maybe I can consider the first transcript for each gene which has the information for the 3' UTR?
up_dlgn_ret[, c("deseq_logfc", "deseq_adjp")]
test_up_lfc <-"threep_length"]] <- 0
test_up_lfc[[ down_dlgn_ret[, c("deseq_logfc", "deseq_adjp")]
test_down_lfc <-"threep_length"]] <- 0
test_down_lfc[[
mm_annot_v2[["annotation"]]
mm <-for (g in rownames(test_up_lfc)) {
mm[["ensembl_gene_id"]] == g
rows <- mm[rows, ]
testers <-if (nrow(testers) == 0) {
next
} 0
length <-for (tx in 1:nrow(testers)) {
testers[tx, ]
row <-if ((!is.na(row[["3_utr_start"]]) & !is.na(row[["3_utr_end"]]))) {
abs(row[["3_utr_start"]] - row[["3_utr_end"]])
length <-
}if (length) {
"threep_length"] <- length
test_up_lfc[g, next
}
}
}
for (g in rownames(test_down_lfc)) {
mm[["ensembl_gene_id"]] == g
rows <- mm[rows, ]
testers <-if (nrow(testers) == 0) {
next
} 0
length <-for (tx in 1:nrow(testers)) {
testers[tx, ]
row <-if ((!is.na(row[["3_utr_start"]]) & !is.na(row[["3_utr_end"]]))) {
abs(row[["3_utr_start"]] - row[["3_utr_end"]])
length <-
}if (length) {
"threep_length"] <- length
test_down_lfc[g, next
}
}
}
"direction"]] <- "up"
test_up_lfc[["direction"]] <- "down"
test_down_lfc[[ rbind(test_up_lfc[, c("threep_length", "direction")],
test_both <-c("threep_length", "direction")])
test_down_lfc[,
ggplot2::ggplot(test_both,
multi <-aes(x=threep_length, fill=direction)) +
ggplot2::geom_density(alpha=0.5) +
ggplot2::scale_x_log10()
pp(file="nasty_histogram.png", image=multi)
## Writing the image to: nasty_histogram.png and calling dev.off().
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 147 rows containing non-finite values (stat_density).
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 147 rows containing non-finite values (stat_density).
Plot histogram of utr length for ups vs downs,
Until we get full replicates, I will do simple subtractions.
In an attempt to keep some clarity in the terms used, I want to define them now. There are three contexts in which we will consider the data:
The individual sample type. When considering individual samples, I will use three terms in this and only this context: wild-type (wt), het, and mut.
The individual translatome. These are defines as something / baseline. I will exclusively call the wt samples ‘baseline’ when speaking in this context. I will exclusively state ‘normal’ when referring to het / wt samples, and I will state ‘ko’ when referring to mut / wt samples in the translatome context.
Translatome vs. translatome. Whenever comparing translatomes, I will use the names as in #2 and always put the numerator first when writing the name of a comparison.
The most complex example of the above nomenclature is:
“normko_retdlgn is defined as normret_vs_normdlgn - koret_vs_kodlgn”
This states we are examining at the translatome context: (norm(retina translatome) - norm(dlgn translatome)) - (ko(retina translatome) - ko(dlgn translatome))
Which in turn is synonymous to the following at the sample context: ((rethet - retwt) - (dlgnhet - dlgnwt)) - ((retko - retwt) - (dlgnko - dlgnwt))
Now let us associate the various variable names with the appropriate samples:
"iprgc_01"
dlgnwt <- "iprgc_02"
retwt <- "iprgc_03"
scnwt <-
"iprgc_04"
dlgnhet <- "iprgc_05"
rethet <- NULL ## Does not yet exist.
scnhet <-
"iprgc_06"
dlgnmut <- "iprgc_07"
retmut <- "iprgc_08" scnmut <-
Give these variable names, now lets associate columns of the expression data with them. These are at the sample context, so the appropriate names are: ‘wt’, ‘het’, and ‘mut’. In each case I will prefix the genotype with the tissue type: ‘ret’, ‘dlgn’, and ‘scn’. Thus ‘retwt’ refers to the sample used to calculate the translatome retina baseline; in contrast ‘dlgnmut’ is the sample which provides the dlgn knockout.
## Sample context
mm_norm_sa mm38_norm <-
## Error in eval(expr, envir, enclos): object 'mm_norm_sa' not found
exprs(mm38_norm)[, dlgnwt]
dlgnwt <- exprs(mm38_norm)[, retwt]
retwt <- exprs(mm38_norm)[, scnwt]
scnwt <- exprs(mm38_norm)[, dlgnhet]
dlgnhet <- exprs(mm38_norm)[, rethet]
rethet <- exprs(mm38_norm)[, dlgnmut]
dlgnmut <- exprs(mm38_norm)[, retmut]
retmut <- exprs(mm38_norm)[, scnmut] scnmut <-
Each of the above 8 variables provides 1 column of information. We have 3 baseline comparisons available to us. In each of these we compare one wt sample to another.
## Baseline comparisons
dlgnwt - retwt
wt_dlgnret <- scnwt - retwt
wt_scnret <- dlgnwt - scnwt wt_dlgnscn <-
Simultaneously, we have 5 available translatomes. This are provided by comparing each het or mut to the associated wt. These will therefore receive names: ‘norm’ and ‘ko’ instead of ‘het’ and ‘mut’.
## Translatome context
rethet - retwt
normret <- retmut - retwt
koret <- scnmut - scnwt
koscn <- dlgnhet - dlgnwt
normdlgn <- dlgnmut - dlgnwt kodlgn <-
Given these translatomes, there are a few contrasts of likely interest. These are performed by comparing the relevant translatomes.
Will will split these into 4 separate categories: het vs het, ko vs ko, ko vs het, and ratio vs ratio.
Finally, note that we are being explicitly redundant in these definitions. I am making variable names for both the a/b ratio and the b/a ratio. Thus we have some redundantly redundant (haha) flexibility when deciding on what we want to plot.
## norm vs norm
normdlgn - normret
normdlgn_vs_normret <- normret - normdlgn normret_vs_normdlgn <-
## ko vs ko
koret - kodlgn
koret_vs_kodlgn <- kodlgn - koret
kodlgn_vs_koret <-
koret - koscn
koret_vs_koscn <- koscn - koret
koscn_vs_koret <-
kodlgn - koscn
kodlgn_vs_koscn <- koscn - kodlgn koscn_vs_kodlgn <-
On the other hand, I am assuming we always want the normals as denominators and kos as numerators.
## ko vs norm
koret - normret
koret_vs_normret <-
kodlgn - normdlgn kodlgn_vs_normdlgn <-
Finally, here is the ratio of ratios example I printed above:
I named it ‘normko_retdlgn’ in an attempt to make clear that it is actually: (normret/normdlgn)/(koret/kodlgn)
or stated differently: “norm divided by ko for ret divided by dlgn.”
## ratio of ratios
normret_vs_normdlgn - koret_vs_kodlgn normko_retdlgn <-
My matrix of data will now contain 1 column for each of the above 27 samples/comparisons.
cbind(
pair_mtrx <-## Individual samples
dlgnwt, retwt, scnwt, dlgnhet, rethet, dlgnmut, retmut, scnmut,## Baseline comparisons
wt_dlgnret, wt_scnret, wt_dlgnscn,## Baseline subtractions
normdlgn, normret, kodlgn, koret, koscn,## het_vs_het, of which there is only 1 because we do not have hetscn
normdlgn_vs_normret, normret_vs_normdlgn,## ko_vs_ko, of which we have 3
koret_vs_kodlgn, kodlgn_vs_koret,
koret_vs_koscn, koscn_vs_koret,
kodlgn_vs_koscn, koscn_vs_kodlgn,## ko_vs_het, 3 including one getting around missing hetscn
koret_vs_normret, kodlgn_vs_normdlgn,## ratio of ratios
normko_retdlgn)
I am not sure if we will use these indexes, but I am writing these out as subsets of genes to look at. These indexes are stating that, given a cutoff (0), we want to look at only the genes which have higher x / baseline values than the cutoff.
## Queries about gene subsets.
## These are all in the context of translatomes.
0
cutoff <- normret > cutoff & koret > cutoff
ret_kept_idx <- koscn > cutoff
scn_kept_idx <- normdlgn > cutoff & kodlgn > cutoff
dlgn_kept_idx <- ret_kept_idx & dlgn_kept_idx
ret_dlgn_kept_idx <- ret_kept_idx & scn_kept_idx
ret_scn_kept_idx <- dlgn_kept_idx & scn_kept_idx
dlgn_scn_kept_idx <-
##normdlgn_vs_normret[!ret_dlgn_kept_idx] <- NA
##normret_vs_normdlgn[!ret_dlgn_kept_idx] <- NA
##koret_vs_kodlgn[!ret_dlgn_kept_idx] <- NA
##kodlgn_vs_koret[!ret_dlgn_kept_idx] <- NA
##koret_vs_koscn[!ret_scn_kept_idx] <- NA
##koscn_vs_koret[!ret_scn_kept_idx] <- NA
##kodlgn_vs_koscn[!dlgn_scn_kept_idx] <- NA
##koscn_vs_kodlgn[!dlgn_scn_kept_idx] <- NA
##koret_vs_normret[!ret_kept_idx] <- NA
##kodlgn_vs_normdlgn[!dlgn_kept_idx] <- NA
##normko_retdlgn <- normko_retdlgn[!ret_dlgn_kept_idx] <- NA
I will use my function combine_de_tables() to add this information to my existing annotation data along with the results from the statistically valid comparison of the three tissue types.
sm(combine_de_tables(
mm_tables <-extra_annot=pair_mtrx,
mm_de_sa, excel=glue::glue("excel/{rundate}mm_salmon_tables-v{ver}.xlsx")))
## Put retina baseline on y axis as black, retina het on x axis as black.
## Then recolor a subset of these as red, the reds are when normret > 0
library(ggplot2)
as.data.frame(pair_mtrx[, c("rethet", "retwt")])
plotted <- normret > 0
red_idx <-"color"] <- ifelse(red_idx, "red", "black")
plotted[, "label"]] <- rownames(plotted)
plotted[[ ggplot(
ret_hetwt <-
plotted,aes_string(x="rethet", y="retwt", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
ret_hetwt ggplotly_url(
ret_hetwt_clicky <-"ret_hetwt.html", title="Retina expression, het vs. wt.",
ret_hetwt, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
as.data.frame(pair_mtrx[, c("retmut", "retwt")])
plotted <-"label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
plotted[[ ggplot(
ret_mutwt <-
plotted,aes_string(x="retmut", y="retwt", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
ret_mutwt ggplotly_url(
ret_mutwt_clicky <-"ret_mutwt.html", title="Retina expression, mutant vs. wt.",
ret_mutwt, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
as.data.frame(pair_mtrx[, c("dlgnhet", "dlgnwt")])
plotted <-"label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
plotted[[ ggplot(
dlgn_hetwt <-
plotted,aes_string(x="dlgnhet", y="dlgnwt", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
dlgn_hetwt ggplotly_url(
dlgn_hetwt_clicky <-"dlgn_hetwt.html", title="dlgn expression, het vs. wt.",
dlgn_hetwt, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
as.data.frame(pair_mtrx[, c("dlgnmut", "dlgnwt")])
plotted <-"label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
plotted[[ ggplot(
dlgn_mutwt <-
plotted,aes_string(x="dlgnmut", y="dlgnwt", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
dlgn_mutwt ggplotly_url(
dlgn_mutwt_clicky <-"dlgn_mutwt.html", title="dlgn expression, mut vs. wt.",
dlgn_mutwt, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
as.data.frame(pair_mtrx[, c("scnmut", "scnwt")])
plotted <-"label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
plotted[[ ggplot(
scn_mutwt <-
plotted,aes_string(x="scnmut", y="scnwt", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
scn_mutwt ggplotly_url(
scn_mutwt_clicky <-"scn_mutwt.html", title="scn expression, mut vs. wt.",
scn_mutwt, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
## x-axis: normdlgn_vs_normret or normret_vs_normdlgn,
## ^^^^
## y-axis: dlgnwt-retwt (baseline dlgn - baseline retina)
as.data.frame(pair_mtrx[, c("normdlgn_vs_normret", "wt_dlgnret")])
plotted <- normret > 0
red_idx <-## Note that this order is opposite of above.
"color"] <- ifelse(red_idx, "black", "red")
plotted[, "label"]] <- rownames(plotted)
plotted[[ ggplot(
axon_trans_ret_target <-
plotted,aes_string(x="normdlgn_vs_normret", y="wt_dlgnret", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
axon_trans_ret_target ggplotly_url(
axon_trans_ret_target_clicky <-"axon_trans_ret_target.html", title="Axon translatome, retina target.",
axon_trans_ret_target, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
as.data.frame(pair_mtrx[, c("normret", "normdlgn")])
plotted <-"label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
plotted[[ ggplot(
normret_normdlgn <-
plotted,aes_string(x="normret", y="normdlgn", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
normret_normdlgn ggplotly_url(
normret_normdlgn_clicky <-"normret_normdlgn.html", title="Normal retina translatome vs normal dlgn translatome.",
normret_normdlgn, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
as.data.frame(pair_mtrx[, c("koret", "kodlgn")])
plotted <-"label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
plotted[[ ggplot(
koret_kodlgn <-
plotted,aes_string(x="koret", y="kodlgn", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
koret_kodlgn ggplotly_url(
koret_kodlgn_clicky <-"koret_kodlgn.html", title="KO retina translatome vs KO dlgn translatome.",
koret_kodlgn, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
as.data.frame(pair_mtrx[, c("koret", "koscn")])
plotted <-"label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
plotted[[ ggplot(
koret_koscn <-
plotted,aes_string(x="koret", y="koscn", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
koret_koscn ggplotly_url(
koret_koscn_clicky <-"koret_koscn.html", title="KO retina translatome vs KO scn translatome.",
koret_koscn, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
as.data.frame(pair_mtrx[, c("normdlgn", "kodlgn")])
plotted <-"label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
plotted[[ ggplot(
normdlgn_kodlgn <-
plotted,aes_string(x="normdlgn", y="kodlgn", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
normdlgn_kodlgn ggplotly_url(
normdlgn_kodlgn_clicky <-"normdlgn_kodlgn.html", title="Normal dlgn translatome vs KO dlgn translatome.",
normdlgn_kodlgn, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
as.data.frame(pair_mtrx[, c("normret", "koret")])
plotted <-"label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
plotted[[ ggplot(
normret_koret <-
plotted,aes_string(x="normret", y="koret", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
normret_koret ggplotly_url(
normret_koret_clicky <-"normret_koret.html", title="Normal retina translatome vs KO retina translatome.",
normret_koret, url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
as.data.frame(pair_mtrx[, c("normret_vs_normdlgn", "koret_vs_kodlgn")])
plotted <-"label"]] <- rownames(plotted)
plotted[["color"]] <- "black"
plotted[[ ggplot(
normal_ko_axon_translatome <-
plotted,aes_string(x="normret_vs_normdlgn", y="koret_vs_kodlgn", label="label", color="color")) +
geom_point(alpha=0.5) +
scale_color_manual(values=c("black", "red"))
normal_ko_axon_translatome ggplotly_url(
normal_ko_axon_translatome_clicky <-"normal_ko_axon_translatome.html",
normal_ko_axon_translatome, title="Normal retina ko axon translatome.",
url_data="http://useast.ensembl.org/Mus_musculus/Gene/Summary?g={ids}")
function(pair_mtrx, x_axis="koret", y_axis="koscn", lfc=NULL, fc=NULL, linewidth=1.5,
translatome_plotter <-up_color="red", down_color="#098534", line_color="#fcba03", alpha=0.5,
x_limit=c(-7, 7), y_limit=c(-7, 7)) {
if (is.null(fc) & is.null(lfc)) {
message("No fc/lfc was provided, defaulting to 10 fold.")
log2(10)
lfc <-else if (is.null(lfc)) {
} log2(fc)
lfc <-
} as.data.frame(pair_mtrx[, c(x_axis, y_axis)])
plotted <- is.na(plotted)
na_idx <- 0
plotted[na_idx] <- plotted[, y_axis] - plotted[, x_axis] >= lfc
up_idx <- plotted[, y_axis] - plotted[, x_axis] <= (lfc * -1)
down_idx <- rownames(plotted)[up_idx]
up_genes <- rownames(plotted)[down_idx]
down_genes <-## Note that this order is opposite of above.
"color"]] <- "black"
plotted[["color"] <- up_color
plotted[up_idx, "color"] <- down_color
plotted[down_idx, "color"]] <- as.factor(plotted[["color"]])
plotted[[levels(plotted[["color"]]) <- c("black", up_color, down_color)
ggplot2::ggplot(plotted, aes_string(x=x_axis,
plt <-y=y_axis,
color="color")) +
geom_abline(size=1.1, slope=1, intercept=lfc, color="orange") +
geom_abline(size=1.1, slope=1, intercept=(-1 * lfc), color="orange") +
scale_x_continuous(limits=c(x_limit)) +
scale_y_continuous(limits=c(y_limit)) +
geom_point(alpha=alpha) +
scale_color_manual(values=c(down_color, "black", up_color))
list(
retlist <-"mtrx" = plotted,
"ups" = up_genes,
"downs" = down_genes,
"plot" = plt)
return(retlist)
}
First plot: KO scn translatome on y axis vs. KO retina translatome on x axis.
translatome_plotter(pair_mtrx)
scnko_wrt_retko_translatome <-$plot
scnko_wrt_retko_translatome simple_gprofiler(sig_genes=scnko_wrt_retko_translatome$ups,
scnko_wrt_retko_up_go <-species="mmusculus")
simple_gprofiler(sig_genes=scnko_wrt_retko_translatome$downs,
scnko_wrt_retko_down_go <-species="mmusculus")
$go
scnko_wrt_retko_down_go$kegg
scnko_wrt_retko_down_go$corum scnko_wrt_retko_down_go
translatome_plotter(pair_mtrx,
dlgnnorm_wrt_retnorm_translatome <-x_axis="normret", y_axis="normdlgn")
$plot
dlgnnorm_wrt_retnorm_translatome simple_gprofiler(sig_genes=dlgnnorm_wrt_retnorm_translatome$ups,
dlgnnorm_wrt_retnorm_up_go <-species="mmusculus")
simple_gprofiler(sig_genes=dlgnnorm_wrt_retnorm_translatome$downs,
dlgnnorm_wrt_retnorm_down_go <-species="mmusculus")
$go
dlgnnorm_wrt_retnorm_down_go$pvalue_plots$bpp_plot_over
dlgnnorm_wrt_retnorm_down_go$pvalue_plots$ccp_plot_over dlgnnorm_wrt_retnorm_down_go
translatome_plotter(pair_mtrx,
dlgnko_wrt_retko_translatome <-x_axis="koret", y_axis="kodlgn")
$plot
dlgnko_wrt_retko_translatome simple_gprofiler(sig_genes=dlgnko_wrt_retko_translatome$ups,
dlgnko_wrt_retko_up_go <-species="mmusculus")
$go
dlgnko_wrt_retko_up_go$kegg
dlgnko_wrt_retko_up_go$reac
dlgnko_wrt_retko_up_go simple_gprofiler(sig_genes=dlgnko_wrt_retko_translatome$downs,
dlgnko_wrt_retko_down_go <-species="mmusculus")
$go
dlgnko_wrt_retko_down_go$kegg
dlgnko_wrt_retko_down_go$reac dlgnko_wrt_retko_down_go
translatome_plotter(pair_mtrx,
dlgnnorm_wrt_dlgnko_translatome <-x_axis="normdlgn",
y_axis="kodlgn")
$plot
dlgnnorm_wrt_dlgnko_translatome simple_gprofiler(sig_genes=dlgnnorm_wrt_dlgnko_translatome$ups,
dlgnnorm_wrt_dlgnko_up_go <-species="mmusculus")
simple_gprofiler(sig_genes=dlgnnorm_wrt_dlgnko_translatome$downs,
dlgnnorm_wrt_dlgnko_down_go <-species="mmusculus")
$go dlgnnorm_wrt_dlgnko_down_go
translatome_plotter(pair_mtrx,
dlgnko_wrt_scnko_translatome <-x_axis="kodlgn",
y_axis="koscn")
$plot
dlgnko_wrt_scnko_translatome simple_gprofiler(sig_genes=dlgnko_wrt_scnko_translatome$ups,
dlgnko_wrt_scnko_up_go <-species="mmusculus")
$go
dlgnko_wrt_scnko_up_go$pvalue_plots$bpp_plot_over
dlgnko_wrt_scnko_up_go simple_gprofiler(sig_genes=dlgnko_wrt_scnko_translatome$downs,
dlgnko_wrt_scnko_down_go <-species="mmusculus")
$go dlgnko_wrt_scnko_down_go
As I understand it, there is some interest in an ontology search using the ratio of ratios.
normko_retdlgn
ror <- ror >= 1
up_idx <- ror <= -1
down_idx <- ror[up_idx]
ror_up <-length(ror_up)
ror[down_idx]
ror_down <-length(ror_down)
simple_gprofiler(
ror_gprofiler_up <-sig_genes=ror_up, species="mmusculus",
excel=glue::glue("excel/{rundate}mm_ror_gpfoiler_up-v{ver}.xlsx"))
$pvalue_plots$mfp_plot_over
ror_gprofiler_up$pvalue_plots$bpp_plot_over
ror_gprofiler_up$pvalue_plots$ccp_plot_over
ror_gprofiler_up$pvalue_plots$tf_plot_over
ror_gprofiler_up$pvalue_plots$hp_plot_over
ror_gprofiler_up
simple_gprofiler(
ror_gprofiler_down <-sig_genes=ror_down, species="mmusculus",
excel=glue::glue("excel/{rundate}mm_ror_gpfoiler_down-v{ver}.xlsx"))
$pvalue_plots$mfp_plot_over
ror_gprofiler_down$pvalue_plots$bpp_plot_over
ror_gprofiler_down$pvalue_plots$reactome_plot_over
ror_gprofiler_down$pvalue_plots$ccp_plot_over
ror_gprofiler_down$pvalue_plots$tf_plot_over ror_gprofiler_down
::pander(sessionInfo())
pandermessage(paste0("This is hpgltools commit: ", get_git_commit()))
paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
this_save <-message(paste0("Saving to ", this_save))
sm(saveme(filename=this_save)) tmp <-
loadme(filename=this_save)