index.html preprocessing.html annotation.html sample_estimation.html
In ‘sample_estimation’, we did a series of analyses to try to pick out some of the surrogate variables in the data. Now we will perform a set of differential expression analyses using the results from that.
In the following, I will perform a set of differential expression analyses for all samples, once using the miRNA alignments, and once with the transcript alignments. Depending on what happens with them, I will repeat after separating the data between exosomes/cells.
I am also going to need to consider different methodologies for the DE analyses, but since the first round takes a while, I just want to see what they look like.
Let us gather the set of mature miRNA information into this table
extra_annotations <- read.table("reference/mi_mappings.tab")
## This is a set of enseble _GENE_ ids, mirbase ids, 5' accession/id, and 3' accession/ids.
rownames(extra_annotations) <- make.names(extra_annotations$ensembl_gene_id, unique=TRUE)
expressionset_columns <- as.data.frame(Biobase::fData(mmmi_small$expressionset))[, c("ensembl_gene_id","description")]
expressionset_columns$id_with_chr <- rownames(expressionset_columns)
final_extra_annotations <- merge(expressionset_columns, extra_annotations,
by.x="ensembl_gene_id", by.y="row.names")## Warning in merge.data.frame(expressionset_columns, extra_annotations, by.x =
## "ensembl_gene_id", : column name 'ensembl_gene_id' is duplicated in the result
rownames(final_extra_annotations) <- final_extra_annotations$id_with_chr
final_extra_annotations <- final_extra_annotations[, c(6, 7, 8, 9)]
initial_mature_table <- as.data.frame(Biobase::exprs(mmmi_mature$expressionset))
initial_mature_table$ID <- gsub(pattern="\\.", replacement="\\-", x=rownames(initial_mature_table))mmmi_small <- expt_subset(mmmi_small, subset="sampleid!='sHPGL0555'")
mmmi_small_filt <- normalize_expt(mmmi_small, filter=TRUE)## This function will replace the expt$expressionset slot with:
## cbcb(data)
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 1020 low-count genes (953 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
mmmi_small_simple <- normalize_expt(mmmi_small, filter="simple", thresh=1)## This function will replace the expt$expressionset slot with:
## simple(data)
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: simple
## Removing 516 low-count genes (1457 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
mmmi_small_combat <- normalize_expt(mmmi_small, filter="simple", batch="combat_scale")## This function will replace the expt$expressionset slot with:
## combat_scale(simple(data))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: simple
## Removing 625 low-count genes (1348 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: doing batch correction with combat_scale.
## Note to self: If you get an error like 'x contains missing values'; I think this
## means that the data has too many 0's and needs to have a better low-count filter applied.
## batch_counts: Before batch correction, 10571 entries are >= 0.
## batch_counts: Using sva::combat with a prior for batch correction and with scaling.
## The number of elements which are < 0 after batch correction is: 3902
## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE
mi_keepers <- list(
"mutvwt_cell" = c("cell_mirna_mut", "cell_mirna_wt"),
"mutvwt_exo" = c("exo_mirna_mut", "exo_mirna_wt"),
"exovcell_wt" = c("exo_mirna_wt", "cell_mirna_wt"),
"exovcell_mut" = c("exo_mirna_mut", "cell_mirna_mut"))
ups <- c("chr11_ENSMUSG00000065601", "chrX_ENSMUSG00000089357", "chr11_ENSMUSG00000092734")
downs <- c("chr14_ENSMUSG00000065403", "chrX_ENSMUSG00000065471", "chr4_ENSMUSG00000065490")
mi_comparisons_sva <- sm(all_pairwise(mmmi_small_simple, model_batch="svaseq", deseq_method="short", force=TRUE,
parallel=FALSE, edger_method="short", test_type="lrt",
which_voom="limma_weighted"))mi_abundant_sva <- sm(extract_abundant_genes(mi_comparisons_sva, n=100,
excel=paste0("excel/all_mi_svamodel_abundant-v", ver, ".xlsx")))
mi_tables_sva <- sm(combine_de_tables(mi_comparisons_sva,
keepers=mi_keepers,
excel=paste0("excel/all_mi_svamodel_comparisons-v", ver, ".xlsx"),
extra_annot=final_extra_annotations))mi_tables_sva$data$exovcell_mut[ups, ]## ensemblgeneid ensembltranscriptid
## chr11_ENSMUSG00000065601 ENSMUSG00000065601 ENSMUST00000083667
## chrX_ENSMUSG00000089357 ENSMUSG00000089357 ENSMUST00000158732
## chr11_ENSMUSG00000092734 ENSMUSG00000092734 ENSMUST00000174993
## description
## chr11_ENSMUSG00000065601 microRNA 146 [Source:MGI Symbol;Acc:MGI:2676831]
## chrX_ENSMUSG00000089357 microRNA 2137 [Source:MGI Symbol;Acc:MGI:4358923]
## chr11_ENSMUSG00000092734 microRNA 5100 [Source:MGI Symbol;Acc:MGI:4950420]
## externalgenename mirbaseaccession mirbaseid mgitranscriptname
## chr11_ENSMUSG00000065601 Mir146 MI0000170 mmu-mir-146a Mir146-201
## chrX_ENSMUSG00000089357 Mir2137 MI0010750 mmu-mir-2137 Mir2137-201
## chr11_ENSMUSG00000092734 Mir5100 MI0018008 mmu-mir-5100 Mir5100-201
## chromosomename id fivepaccession
## chr11_ENSMUSG00000065601 11 chr11_ENSMUSG00000065601 MIMAT0000158
## chrX_ENSMUSG00000089357 X chrX_ENSMUSG00000089357 MIMAT0011213
## chr11_ENSMUSG00000092734 11 chr11_ENSMUSG00000092734 MIMAT0020607
## fivepid threepaccession threepid limma_logfc
## chr11_ENSMUSG00000065601 mmu-miR-146a-5p MIMAT0016989 mmu-miR-146a-3p 4.079
## chrX_ENSMUSG00000089357 mmu-miR-2137 <NA> <NA> 5.572
## chr11_ENSMUSG00000092734 mmu-miR-5100 <NA> <NA> 2.918
## limma_adjp deseq_logfc deseq_adjp edger_logfc
## chr11_ENSMUSG00000065601 0.090270 3.389 0.001480000000 2.641
## chrX_ENSMUSG00000089357 0.001322 9.525 0.000000004464 10.270
## chr11_ENSMUSG00000092734 0.019890 4.415 0.000003054000 3.382
## edger_adjp limma_ave limma_t limma_b limma_p
## chr11_ENSMUSG00000065601 0.28850000000000 10.070 2.996 -2.7230 0.009355000
## chrX_ENSMUSG00000089357 0.00000000008273 1.008 5.942 4.0840 0.000004538
## chr11_ENSMUSG00000092734 0.00208900000000 9.510 4.229 0.3075 0.000314000
## deseq_basemean deseq_lfcse deseq_stat deseq_p
## chr11_ENSMUSG00000065601 3607.00 0.8575 3.953 0.00007726000000
## chrX_ENSMUSG00000089357 61.11 1.4120 6.747 0.00000000001513
## chr11_ENSMUSG00000092734 3800.00 0.8199 5.385 0.00000007247000
## edger_logcpm edger_lr edger_p basic_nummed
## chr11_ENSMUSG00000065601 11.130 4.525 0.03340000000000000 12.100
## chrX_ENSMUSG00000089357 4.342 56.480 0.00000000000005678 3.322
## chr11_ENSMUSG00000092734 10.750 17.700 0.00002581000000000 11.810
## basic_denmed basic_numvar basic_denvar basic_logfc basic_t
## chr11_ENSMUSG00000065601 8.778 6.357e+00 1.783e+00 3.325 -0.9937
## chrX_ENSMUSG00000089357 0.000 3.083e+00 0.000e+00 3.322 -4.9730
## chr11_ENSMUSG00000092734 9.331 4.904e+00 3.789e-01 2.483 -1.8580
## basic_p basic_adjp basic_adjp_fdr deseq_adjp_fdr
## chr11_ENSMUSG00000065601 3.582e-01 5.710e-01 5.709e-01 3.656e-03
## chrX_ENSMUSG00000089357 7.634e-03 3.335e-01 3.335e-01 1.102e-08
## chr11_ENSMUSG00000092734 1.271e-01 4.749e-01 4.748e-01 7.542e-06
## edger_adjp_fdr limma_adjp_fdr fc_meta fc_var fc_varbymed
## chr11_ENSMUSG00000065601 2.885e-01 9.027e-02 3.630 1.589e+00 4.378e-01
## chrX_ENSMUSG00000089357 8.273e-11 1.322e-03 8.347 1.143e+00 1.369e-01
## chr11_ENSMUSG00000092734 2.089e-03 1.989e-02 3.620 6.936e-01 1.916e-01
## p_meta p_var
## chr11_ENSMUSG00000065601 1.428e-02 2.958e-04
## chrX_ENSMUSG00000089357 1.513e-06 6.864e-12
## chr11_ENSMUSG00000092734 1.133e-04 3.038e-08
mi_tables_sva$data$exovcell_mut[downs, ]## ensemblgeneid ensembltranscriptid
## chr14_ENSMUSG00000065403 ENSMUSG00000065403 ENSMUST00000083469
## chrX_ENSMUSG00000065471 ENSMUSG00000065471 ENSMUST00000083537
## chr4_ENSMUSG00000065490 ENSMUSG00000065490 ENSMUST00000083556
## description
## chr14_ENSMUSG00000065403 microRNA 18 [Source:MGI Symbol;Acc:MGI:2676844]
## chrX_ENSMUSG00000065471 microRNA 222 [Source:MGI Symbol;Acc:MGI:3619118]
## chr4_ENSMUSG00000065490 microRNA 30c-1 [Source:MGI Symbol;Acc:MGI:2676909]
## externalgenename mirbaseaccession mirbaseid
## chr14_ENSMUSG00000065403 Mir18 MI0000567 mmu-mir-18a
## chrX_ENSMUSG00000065471 Mir222 MI0000710 mmu-mir-222
## chr4_ENSMUSG00000065490 Mir30c-1 MI0000547 mmu-mir-30c-1
## mgitranscriptname chromosomename id
## chr14_ENSMUSG00000065403 Mir18-201 14 chr14_ENSMUSG00000065403
## chrX_ENSMUSG00000065471 Mir222-201 X chrX_ENSMUSG00000065471
## chr4_ENSMUSG00000065490 Mir30c-1-201 4 chr4_ENSMUSG00000065490
## fivepaccession fivepid threepaccession threepid
## chr14_ENSMUSG00000065403 MIMAT0000528 mmu-miR-18a-5p MIMAT0004626 mmu-miR-18a-3p
## chrX_ENSMUSG00000065471 MIMAT0017061 mmu-miR-222-5p MIMAT0000670 mmu-miR-222-3p
## chr4_ENSMUSG00000065490 MIMAT0000514 mmu-miR-30c-5p MIMAT0004616 mmu-miR-30c-1-3p
## limma_logfc limma_adjp deseq_logfc deseq_adjp edger_logfc
## chr14_ENSMUSG00000065403 -2.6310 0.08125 -4.120 0.00000031 -3.0730
## chrX_ENSMUSG00000065471 -0.9607 0.29690 -1.650 0.04536000 -0.4827
## chr4_ENSMUSG00000065490 -1.4780 0.21690 -2.644 0.00861400 -0.9010
## edger_adjp limma_ave limma_t limma_b limma_p deseq_basemean
## chr14_ENSMUSG00000065403 0.06052 7.494 -2.971 -2.457 0.006803 516.7
## chrX_ENSMUSG00000065471 1.00000 9.111 -1.546 -5.106 0.135600 971.0
## chr4_ENSMUSG00000065490 0.86210 7.026 -1.814 -4.604 0.082610 321.0
## deseq_lfcse deseq_stat deseq_p edger_logcpm edger_lr
## chr14_ENSMUSG00000065403 0.7075 -5.823 0.00000000578 8.982 8.5760
## chrX_ENSMUSG00000065471 0.6067 -2.720 0.00653500000 10.050 0.4294
## chr4_ENSMUSG00000065490 0.7789 -3.395 0.00068620000 8.530 0.9413
## edger_p basic_nummed basic_denmed basic_numvar basic_denvar
## chr14_ENSMUSG00000065403 0.003406 4.392 11.320 7.377e+00 3.661e+00
## chrX_ENSMUSG00000065471 0.512300 6.570 11.500 1.662e+01 5.566e+00
## chr4_ENSMUSG00000065490 0.331900 5.358 9.716 1.329e+01 6.769e+00
## basic_logfc basic_t basic_p basic_adjp basic_adjp_fdr
## chr14_ENSMUSG00000065403 -6.928 4.289 3.403e-03 3.335e-01 3.335e-01
## chrX_ENSMUSG00000065471 -4.932 2.285 5.966e-02 3.915e-01 3.916e-01
## chr4_ENSMUSG00000065490 -4.358 2.935 2.107e-02 3.335e-01 3.335e-01
## deseq_adjp_fdr edger_adjp_fdr limma_adjp_fdr fc_meta fc_var
## chr14_ENSMUSG00000065403 7.656e-07 6.052e-02 8.125e-02 -4.155 6.659e+00
## chrX_ENSMUSG00000065471 1.120e-01 1.000e+00 2.969e-01 -1.298 1.667e+00
## chr4_ENSMUSG00000065490 2.127e-02 8.620e-01 2.169e-01 -2.210 4.463e+00
## fc_varbymed p_meta p_var
## chr14_ENSMUSG00000065403 -1.603e+00 3.403e-03 1.157e-05
## chrX_ENSMUSG00000065471 -1.284e+00 2.181e-01 6.906e-02
## chr4_ENSMUSG00000065490 -2.019e+00 1.384e-01 2.976e-02
mi_sig_sva <- sm(extract_significant_genes(mi_tables_sva, p_type="p",
excel=paste0("excel/all_mi_svamodel_signficant-v", ver, ".xlsx"),
according_to="all"))
nrow(mi_sig_sva$limma$ups$exovcell_mut) ## Previous 141## [1] 303
nrow(mi_sig_sva$limma$downs$exovcell_mut) ## Previous 339## [1] 88
nrow(mi_sig_sva$limma$ups$exovcell_wt) ## Previous 120## [1] 138
nrow(mi_sig_sva$limma$downs$exovcell_wt) ## Previous 421## [1] 119
nrow(mi_sig_sva$limma$ups$mutvwt_cell) ## Previous 43## [1] 31
nrow(mi_sig_sva$limma$downs$mutvwt_cell) ## Previous 119## [1] 70
nrow(mi_sig_sva$limma$ups$mutvwt_exo) ## Previous 127## [1] 48
nrow(mi_sig_sva$limma$downs$mutvwt_exo) ## Previous 105## [1] 53
mi_comparisons_batch <- sm(all_pairwise(mmmi_small_simple, model_batch=TRUE, deseq_method="short",
parallel=FALSE, edger_method="long", test_type="lrt",
which_voom="limma_weighted"))mi_abundant_batch <- sm(extract_abundant_genes(mi_comparisons_batch, n=100,
excel=paste0("excel/all_mi_batchmodel_abundant-v", ver, ".xlsx")))
mi_tables_batch <- sm(combine_de_tables(mi_comparisons_batch,
keepers=mi_keepers,
padj_type="BY",
excel=paste0("excel/all_mi_batchmodel_comparisons-v", ver, ".xlsx"),
extra_annot=final_extra_annotations))mi_tables_batch$data$exovcell_mut[ups, ]## ensemblgeneid ensembltranscriptid
## chr11_ENSMUSG00000065601 ENSMUSG00000065601 ENSMUST00000083667
## chrX_ENSMUSG00000089357 ENSMUSG00000089357 ENSMUST00000158732
## chr11_ENSMUSG00000092734 ENSMUSG00000092734 ENSMUST00000174993
## description
## chr11_ENSMUSG00000065601 microRNA 146 [Source:MGI Symbol;Acc:MGI:2676831]
## chrX_ENSMUSG00000089357 microRNA 2137 [Source:MGI Symbol;Acc:MGI:4358923]
## chr11_ENSMUSG00000092734 microRNA 5100 [Source:MGI Symbol;Acc:MGI:4950420]
## externalgenename mirbaseaccession mirbaseid mgitranscriptname
## chr11_ENSMUSG00000065601 Mir146 MI0000170 mmu-mir-146a Mir146-201
## chrX_ENSMUSG00000089357 Mir2137 MI0010750 mmu-mir-2137 Mir2137-201
## chr11_ENSMUSG00000092734 Mir5100 MI0018008 mmu-mir-5100 Mir5100-201
## chromosomename id fivepaccession
## chr11_ENSMUSG00000065601 11 chr11_ENSMUSG00000065601 MIMAT0000158
## chrX_ENSMUSG00000089357 X chrX_ENSMUSG00000089357 MIMAT0011213
## chr11_ENSMUSG00000092734 11 chr11_ENSMUSG00000092734 MIMAT0020607
## fivepid threepaccession threepid limma_logfc
## chr11_ENSMUSG00000065601 mmu-miR-146a-5p MIMAT0016989 mmu-miR-146a-3p 4.981
## chrX_ENSMUSG00000089357 mmu-miR-2137 <NA> <NA> 6.181
## chr11_ENSMUSG00000092734 mmu-miR-5100 <NA> <NA> 4.402
## limma_adjp deseq_logfc deseq_adjp edger_logfc
## chr11_ENSMUSG00000065601 0.00000006001000 3.991 0.0000009096000 3.159
## chrX_ENSMUSG00000089357 0.00000000003054 8.294 0.0000000002599 9.246
## chr11_ENSMUSG00000092734 0.00000008413000 4.357 0.0000000001266 3.797
## edger_adjp limma_ave limma_t limma_b limma_p
## chr11_ENSMUSG00000065601 0.0084360000 10.070 7.768 12.70 0.00000000052350000
## chrX_ENSMUSG00000089357 0.0000002631 1.008 10.520 21.65 0.00000000000004193
## chr11_ENSMUSG00000092734 0.0000183700 9.510 7.497 11.90 0.00000000121300000
## deseq_basemean deseq_lfcse deseq_stat deseq_p
## chr11_ENSMUSG00000065601 3607.00 0.7202 5.542 0.000000029950000
## chrX_ENSMUSG00000089357 61.11 1.1900 6.971 0.000000000003153
## chr11_ENSMUSG00000092734 3800.00 0.6143 7.093 0.000000000001317
## edger_logcpm edger_lr edger_p basic_nummed basic_denmed
## chr11_ENSMUSG00000065601 11.130 11.88 0.0005660000000 12.23 8.138
## chrX_ENSMUSG00000089357 4.464 38.52 0.0000000005417 2.98 -2.417
## chr11_ENSMUSG00000092734 10.750 27.74 0.0000001387000 13.87 7.766
## basic_numvar basic_denvar basic_logfc basic_t basic_p
## chr11_ENSMUSG00000065601 1.243e+00 3.708e+00 4.089 -4.142 5.255e-03
## chrX_ENSMUSG00000089357 1.449e+01 2.344e-09 5.397 -4.470 1.108e-02
## chr11_ENSMUSG00000092734 2.048e+00 2.715e+00 6.106 -4.817 1.400e-03
## basic_adjp basic_adjp_by deseq_adjp_by edger_adjp_by
## chr11_ENSMUSG00000065601 3.819e-01 1.000e+00 1.806e-05 6.632e-02
## chrX_ENSMUSG00000089357 3.819e-01 1.000e+00 5.159e-09 2.068e-06
## chr11_ENSMUSG00000092734 2.267e-01 1.000e+00 2.514e-09 1.444e-04
## limma_adjp_by fc_meta fc_var fc_varbymed p_meta p_var
## chr11_ENSMUSG00000065601 4.718e-07 4.096 8.703e-01 2.125e-01 1.887e-04 1.068e-07
## chrX_ENSMUSG00000089357 2.401e-10 7.769 2.223e+00 2.862e-01 1.816e-10 9.724e-20
## chr11_ENSMUSG00000092734 6.616e-07 4.185 2.054e-01 4.908e-02 4.664e-08 6.357e-15
mi_tables_batch$data$exovcell_mut[downs, ]## ensemblgeneid ensembltranscriptid
## chr14_ENSMUSG00000065403 ENSMUSG00000065403 ENSMUST00000083469
## chrX_ENSMUSG00000065471 ENSMUSG00000065471 ENSMUST00000083537
## chr4_ENSMUSG00000065490 ENSMUSG00000065490 ENSMUST00000083556
## description
## chr14_ENSMUSG00000065403 microRNA 18 [Source:MGI Symbol;Acc:MGI:2676844]
## chrX_ENSMUSG00000065471 microRNA 222 [Source:MGI Symbol;Acc:MGI:3619118]
## chr4_ENSMUSG00000065490 microRNA 30c-1 [Source:MGI Symbol;Acc:MGI:2676909]
## externalgenename mirbaseaccession mirbaseid
## chr14_ENSMUSG00000065403 Mir18 MI0000567 mmu-mir-18a
## chrX_ENSMUSG00000065471 Mir222 MI0000710 mmu-mir-222
## chr4_ENSMUSG00000065490 Mir30c-1 MI0000547 mmu-mir-30c-1
## mgitranscriptname chromosomename id
## chr14_ENSMUSG00000065403 Mir18-201 14 chr14_ENSMUSG00000065403
## chrX_ENSMUSG00000065471 Mir222-201 X chrX_ENSMUSG00000065471
## chr4_ENSMUSG00000065490 Mir30c-1-201 4 chr4_ENSMUSG00000065490
## fivepaccession fivepid threepaccession threepid
## chr14_ENSMUSG00000065403 MIMAT0000528 mmu-miR-18a-5p MIMAT0004626 mmu-miR-18a-3p
## chrX_ENSMUSG00000065471 MIMAT0017061 mmu-miR-222-5p MIMAT0000670 mmu-miR-222-3p
## chr4_ENSMUSG00000065490 MIMAT0000514 mmu-miR-30c-5p MIMAT0004616 mmu-miR-30c-1-3p
## limma_logfc limma_adjp deseq_logfc deseq_adjp edger_logfc
## chr14_ENSMUSG00000065403 -3.304 0.000025890 -3.851 0.000000252 -4.978
## chrX_ENSMUSG00000065471 -1.819 0.006672000 -1.834 0.001417000 -2.698
## chr4_ENSMUSG00000065490 -3.850 0.000003175 -2.544 0.003905000 -3.281
## edger_adjp limma_ave limma_t limma_b limma_p deseq_basemean
## chr14_ENSMUSG00000065403 0.00003081 7.494 -5.414 4.885 0.0000019020 516.7
## chrX_ENSMUSG00000065471 0.00567300 9.111 -3.106 -2.090 0.0031640000 971.0
## chr4_ENSMUSG00000065490 0.00498100 7.026 -6.211 7.552 0.0000001155 321.0
## deseq_lfcse deseq_stat deseq_p edger_logcpm edger_lr
## chr14_ENSMUSG00000065403 0.6649 -5.791 0.000000006988 8.979 26.57
## chrX_ENSMUSG00000065471 0.4775 -3.840 0.000122800000 10.050 13.51
## chr4_ENSMUSG00000065490 0.7246 -3.511 0.000446700000 8.526 13.88
## edger_p basic_nummed basic_denmed basic_numvar basic_denvar
## chr14_ENSMUSG00000065403 0.0000002538 5.087 9.464 1.094e+01 1.242e+00
## chrX_ENSMUSG00000065471 0.0002375000 8.460 10.010 2.305e+01 2.676e-01
## chr4_ENSMUSG00000065490 0.0001948000 6.400 8.186 2.239e+01 1.098e+00
## basic_logfc basic_t basic_p basic_adjp basic_adjp_by
## chr14_ENSMUSG00000065403 -4.377 3.684 1.477e-02 3.819e-01 1.000e+00
## chrX_ENSMUSG00000065471 -1.545 1.708 1.612e-01 5.963e-01 1.000e+00
## chr4_ENSMUSG00000065490 -1.787 2.461 6.401e-02 5.152e-01 1.000e+00
## deseq_adjp_by edger_adjp_by limma_adjp_by fc_meta fc_var
## chr14_ENSMUSG00000065403 5.003e-06 2.423e-04 2.036e-04 -4.080 6.616e-01
## chrX_ENSMUSG00000065471 2.813e-02 4.460e-02 5.245e-02 -2.122 3.584e-01
## chr4_ENSMUSG00000065490 7.753e-02 3.915e-02 2.496e-05 -3.446 1.716e+00
## fc_varbymed p_meta p_var
## chr14_ENSMUSG00000065403 -1.621e-01 7.209e-07 1.061e-12
## chrX_ENSMUSG00000065471 -1.689e-01 1.175e-03 2.971e-06
## chr4_ENSMUSG00000065490 -4.980e-01 2.139e-04 5.013e-08
mi_sig_batch <- sm(extract_significant_genes(mi_tables_batch, p_type="adjp",
excel=paste0("excel/all_mi_batchmodel_signficant-v", ver, ".xlsx"),
according_to="all"))
nrow(mi_sig_batch$limma$ups$exovcell_mut) ## Previous 141## [1] 804
nrow(mi_sig_batch$limma$downs$exovcell_mut) ## Previous 339## [1] 151
nrow(mi_sig_batch$limma$ups$exovcell_wt) ## Previous 120## [1] 434
nrow(mi_sig_batch$limma$downs$exovcell_wt) ## Previous 421## [1] 148
nrow(mi_sig_batch$limma$ups$mutvwt_cell) ## Previous 43## [1] 61
nrow(mi_sig_batch$limma$downs$mutvwt_cell) ## Previous 119## [1] 92
nrow(mi_sig_batch$limma$ups$mutvwt_exo) ## Previous 127## [1] 100
nrow(mi_sig_batch$limma$downs$mutvwt_exo) ## Previous 105## [1] 50
testing <- sm(normalize_expt(mmmi_small, batch="sva", filter=TRUE, low_to_zero=TRUE))
test_pairwise <- sm(all_pairwise(input=testing, model_batch=FALSE, force=TRUE, modify_p=FALSE, parallel=FALSE))## 1. Do nothing This should be our baseline worst option.
test_mmmi_small <- sm(normalize_expt(mmmi_small, filter=TRUE))
mi_comparisons <- sm(all_pairwise(input=test_mmmi_small, model_batch=FALSE))
mi_tables <- sm(combine_de_tables(mi_comparisons,
keepers=mi_keepers,
extra_annot=final_extra_annotations,
excel=NULL))
mi_tables$data$exovcell_mut[ups, ]
mi_tables$data$exovcell_mut[downs, ]
## The first one is limma-p insignificant
## 3. Add sva in the model, do nothing else.
mi_comparisons <- sm(all_pairwise(input=test_mmmi_small, model_batch="sva"))
mi_tables <- sm(combine_de_tables(mi_comparisons,
keepers=mi_keepers,
extra_annot=final_extra_annotations,
excel=NULL))
mi_tables$data$exovcell_mut[ups, ]
mi_tables$data$exovcell_mut[downs, ]
## The first one is limma-p insignificant
## 4. Add sva in the model, use f.pvalue()
mi_comparisons <- sm(all_pairwise(input=test_mmmi_small, model_batch="sva", modify_p=TRUE))
mi_tables <- sm(combine_de_tables(mi_comparisons,
keepers=mi_keepers,
extra_annot=final_extra_annotations,
excel=NULL))
mi_tables$data$exovcell_mut[ups, ]
mi_tables$data$exovcell_mut[downs, ]
## The p-values all go off the rails.
## 5. Add svaseq in the model, no f.pvalue()
mi_comparisons <- sm(all_pairwise(input=test_mmmi_small, model_batch="svaseq", modify_p=FALSE))
mi_tables <- sm(combine_de_tables(mi_comparisons,
keepers=mi_keepers,
extra_annot=final_extra_annotations,
excel=NULL))
mi_tables$data$exovcell_mut[ups, ]
mi_tables$data$exovcell_mut[downs, ]
## The first one is limma-p insig.
## 6. svaseq with f.pvalue()
mi_comparisons <- sm(all_pairwise(input=test_mmmi_small, model_batch="svaseq", modify_p=TRUE))
mi_tables <- sm(combine_de_tables(mi_comparisons,
keepers=mi_keepers,
extra_annot=final_extra_annotations,
excel=NULL))
mi_tables$data$exovcell_mut[ups, ]
mi_tables$data$exovcell_mut[downs, ]
## Same as #4 above.
## 7. ruv_supervised without f.pvalue()
## Note to self: ruv_empirical and ruv_residuals failed -- look into that.
mi_comparisons <- sm(all_pairwise(input=test_mmmi_small, model_batch="ruv_supervised", modify_p=FALSE))
mi_tables <- sm(combine_de_tables(mi_comparisons,
keepers=mi_keepers,
extra_annot=final_extra_annotations))
mi_tables$data$exovcell_mut[ups, ]
mi_tables$data$exovcell_mut[downs, ]
## All sig.
## 8. ruv_empirical with f.pvalue()
mi_comparisons <- sm(all_pairwise(mmmi_small, model_batch="ruv_supervised", modify_p=TRUE))
mi_tables <- sm(combine_de_tables(mi_comparisons,
keepers=mi_keepers,
extra_annot=final_extra_annotations))
mi_tables$data$exovcell_mut[ups, ]
mi_tables$data$exovcell_mut[downs, ]
## 9. pca
mi_comparisons <- sm(all_pairwise(mmmi_small, model_batch="pca", modify_p=FALSE))
mi_tables <- sm(combine_de_tables(mi_comparisons,
keepers=mi_keepers,
extra_annot=final_extra_annotations,
excel=NULL))
mi_tables$data$exovcell_mut[ups, ]
mi_tables$data$exovcell_mut[downs, ]
## FC values are more restrictive to sva, and the p-values are much more restrictiveGiven an expressionset of smallRNAs in both cells and exosomes. I want to query it for the set of non-zero miRNA species in cells and exosomes separately. Then I want to take those two sets of miRNA species and see where they are identical and not. I wish to visualize this via a venn diagram.
exo_samples_wt <- expt_subset(mmmi_small, subset="sampletype=='exo'&genotype=='wt'")
cell_samples_wt <- expt_subset(mmmi_small, subset="sampletype=='cell'&genotype=='wt'")
exo_samples_mut <- expt_subset(mmmi_small, subset="sampletype=='exo'&genotype=='mut'")
cell_samples_mut <- expt_subset(mmmi_small, subset="sampletype=='cell'&genotype=='mut'")
exo_samples_wtnorm <- sm(normalize_expt(exo_samples_wt, convert="cpm", norm="quant"))
exo_samples_names <- rownames(exprs(exo_samples_wt$expressionset))
exo_samples_wtnorm <- rowMedians(Biobase::exprs(exo_samples_wtnorm$expressionset))
names(exo_samples_wtnorm) <- exo_samples_names
exo_samples_mutnorm <- sm(normalize_expt(exo_samples_mut, convert="cpm", norm="quant"))
exo_samples_names <- rownames(exprs(exo_samples_mut$expressionset))
exo_samples_mutnorm <- rowMedians(Biobase::exprs(exo_samples_mutnorm$expressionset))
names(exo_samples_mutnorm) <- exo_samples_names
cell_samples_wtnorm <- sm(normalize_expt(cell_samples_wt, convert="cpm", norm="quant"))
cell_samples_names <- rownames(exprs(cell_samples_wt$expressionset))
cell_samples_wtnorm <- rowMedians(Biobase::exprs(cell_samples_wtnorm$expressionset))
names(cell_samples_wtnorm) <- cell_samples_names
cell_samples_mutnorm <- sm(normalize_expt(cell_samples_mut, convert="cpm", norm="quant"))
cell_samples_names <- rownames(exprs(cell_samples_mut$expressionset))
cell_samples_mutnorm <- rowMedians(Biobase::exprs(cell_samples_mutnorm$expressionset))
names(cell_samples_mutnorm) <- cell_samples_names
## ok, so now I have 4 sets of normalized medians/miRNA species, pick a cutoff which is equivalent to '0'
## and find how many in each category do and do not match this criterion.
##exo_samples_wtnorm > 0.1
exo_wt_df <- ifelse(exo_samples_wtnorm > 0.0, 1, 0)
exo_mut_df <- ifelse(exo_samples_mutnorm > 0.0, 1, 0)
cell_wt_df <- ifelse(cell_samples_wtnorm > 0.0, 1, 0)
cell_mut_df <- ifelse(cell_samples_mutnorm > 0.0, 1, 0)
wt_df <- merge(as.data.frame(cell_wt_df), as.data.frame(exo_wt_df), by="row.names")
mut_df <- merge(as.data.frame(cell_mut_df), as.data.frame(exo_mut_df), by="row.names")
cell_df <- merge(as.data.frame(cell_wt_df), as.data.frame(cell_mut_df), by="row.names")
exo_df <- merge(as.data.frame(exo_wt_df), as.data.frame(exo_mut_df), by="row.names")
library(Vennerable)
wt_none <- nrow(wt_df[ wt_df$cell_wt_df == 0 & wt_df$exo_wt_df == 0, ])
wt_cell_alone <- nrow(wt_df[ wt_df$cell_wt_df == 1 & wt_df$exo_wt_df == 0, ])
wt_exo_alone <- nrow(wt_df[ wt_df$cell_wt_df == 0 & wt_df$exo_wt_df == 1, ])
wt_both <- nrow(wt_df[ wt_df$cell_wt_df == 1 & wt_df$exo_wt_df == 1, ])
wt_venn <- Vennerable::Venn(SetNames = c("Cells", "Exosomes"),
Weight = c(wt_none, wt_cell_alone, wt_exo_alone, wt_both))
plot(wt_venn)mut_none <- nrow(mut_df[ mut_df$cell_mut_df == 0 & mut_df$exo_mut_df == 0, ])
mut_cell_alone <- nrow(mut_df[ mut_df$cell_mut_df == 1 & mut_df$exo_mut_df == 0, ])
mut_exo_alone <- nrow(mut_df[ mut_df$cell_mut_df == 0 & mut_df$exo_mut_df == 1, ])
mut_both <- nrow(mut_df[ mut_df$cell_mut_df == 1 & mut_df$exo_mut_df == 1, ])
mut_venn <- Vennerable::Venn(SetNames = c("Cells", "Exosomes"),
Weight = c(mut_none, mut_cell_alone, mut_exo_alone, mut_both))
plot(mut_venn)cell_none <- nrow(cell_df[ cell_df$cell_wt_df == 0 & cell_df$cell_mut_df == 0, ])
cell_wt_alone <- nrow(cell_df[ cell_df$cell_wt_df == 1 & cell_df$cell_mut_df == 0, ])
cell_mut_alone <- nrow(cell_df[ cell_df$cell_wt_df == 0 & cell_df$cell_mut_df == 1, ])
cell_both <- nrow(cell_df[ cell_df$cell_wt_df == 1 & cell_df$cell_mut_df == 1, ])
cell_venn <- Vennerable::Venn(SetNames = c("Conv.", "Infl."),
Weight = c(cell_none, cell_wt_alone, cell_mut_alone, cell_both))
plot(cell_venn)exo_none <- nrow(exo_df[ exo_df$exo_wt_df == 0 & exo_df$exo_mut_df == 0, ])
exo_wt_alone <- nrow(exo_df[ exo_df$exo_wt_df == 1 & exo_df$exo_mut_df == 0, ])
exo_mut_alone <- nrow(exo_df[ exo_df$exo_wt_df == 0 & exo_df$exo_mut_df == 1, ])
exo_both <- nrow(exo_df[ exo_df$exo_wt_df == 1 & exo_df$exo_mut_df == 1, ])
exo_venn <- Vennerable::Venn(SetNames = c("Conv.", "Infl."),
Weight = c(exo_none, exo_wt_alone, exo_mut_alone, exo_both))
plot(exo_venn)## Repeat, but take the set of things which are greater than the mean of all.
exo_wt_df <- ifelse(exo_samples_wtnorm > mean(exo_samples_wtnorm), 1, 0)
exo_mut_df <- ifelse(exo_samples_mutnorm > mean(exo_samples_mutnorm), 1, 0)
cell_wt_df <- ifelse(cell_samples_wtnorm > mean(cell_samples_wtnorm), 1, 0)
cell_mut_df <- ifelse(cell_samples_wtnorm > mean(cell_samples_mutnorm), 1, 0)
wt_df <- merge(as.data.frame(cell_wt_df), as.data.frame(exo_wt_df), by="row.names")
mut_df <- merge(as.data.frame(cell_mut_df), as.data.frame(exo_mut_df), by="row.names")
cell_df <- merge(as.data.frame(cell_wt_df), as.data.frame(cell_mut_df), by="row.names")
exo_df <- merge(as.data.frame(exo_wt_df), as.data.frame(exo_mut_df), by="row.names")
wt_none <- nrow(wt_df[ wt_df$cell_wt_df == 0 & wt_df$exo_wt_df == 0, ])
wt_cell_alone <- nrow(wt_df[ wt_df$cell_wt_df == 1 & wt_df$exo_wt_df == 0, ])
wt_exo_alone <- nrow(wt_df[ wt_df$cell_wt_df == 0 & wt_df$exo_wt_df == 1, ])
wt_both <- nrow(wt_df[ wt_df$cell_wt_df == 1 & wt_df$exo_wt_df == 1, ])
wt_venn <- Vennerable::Venn(SetNames = c("Cells", "Exosomes"),
Weight = c(wt_none, wt_cell_alone, wt_exo_alone, wt_both))
plot(wt_venn)mut_none <- nrow(mut_df[ mut_df$cell_mut_df == 0 & mut_df$exo_mut_df == 0, ])
mut_cell_alone <- nrow(mut_df[ mut_df$cell_mut_df == 1 & mut_df$exo_mut_df == 0, ])
mut_exo_alone <- nrow(mut_df[ mut_df$cell_mut_df == 0 & mut_df$exo_mut_df == 1, ])
mut_both <- nrow(mut_df[ mut_df$cell_mut_df == 1 & mut_df$exo_mut_df == 1, ])
mut_venn <- Vennerable::Venn(SetNames = c("Cells", "Exosomes"),
Weight = c(mut_none, mut_cell_alone, mut_exo_alone, mut_both))
plot(mut_venn)cell_none <- nrow(cell_df[ cell_df$cell_wt_df == 0 & cell_df$cell_mut_df == 0, ])
cell_wt_alone <- nrow(cell_df[ cell_df$cell_wt_df == 1 & cell_df$cell_mut_df == 0, ])
cell_mut_alone <- nrow(cell_df[ cell_df$cell_wt_df == 0 & cell_df$cell_mut_df == 1, ])
cell_both <- nrow(cell_df[ cell_df$cell_wt_df == 1 & cell_df$cell_mut_df == 1, ])
cell_venn <- Vennerable::Venn(SetNames = c("Conv.", "Infl."),
Weight = c(cell_none, cell_wt_alone, cell_mut_alone, cell_both))
plot(cell_venn)exo_none <- nrow(exo_df[ exo_df$exo_wt_df == 0 & exo_df$exo_mut_df == 0, ])
exo_wt_alone <- nrow(exo_df[ exo_df$exo_wt_df == 1 & exo_df$exo_mut_df == 0, ])
exo_mut_alone <- nrow(exo_df[ exo_df$exo_wt_df == 0 & exo_df$exo_mut_df == 1, ])
exo_both <- nrow(exo_df[ exo_df$exo_wt_df == 1 & exo_df$exo_mut_df == 1, ])
exo_venn <- Vennerable::Venn(SetNames = c("Conv.", "Infl."),
Weight = c(exo_none, exo_wt_alone, exo_mut_alone, exo_both))
plot(exo_venn)tt <- sm(saveme(filename=this_save))index.html preprocessing.html annotation.html sample_estimation.html