DESEq2 analysis example (p14 trascriptome to p10 transcriptome)
## example p14 to p10 axonal transcripts
library(tximport)
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice() masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
"%ni%" <- Negate("%in%")
###I. PREPARE THE COUNT MATRICES
##0. Salmon-Tximport pipeline
## sample table
sampleTable.working <- readRDS("~/downloads/axonTRAP_forSpeerC_sampleTable.RDS")
sampleTable.working
## sample tissue compartment age genotype rna group
## AX31 AX31 sc axon p14 wt tx axon_tx_p14
## AX32 AX32 sc axon p14 wt tx axon_tx_p14
## AX33 AX33 sc axon p14 wt tx axon_tx_p14
## AX41 AX41 sc axon p10 wt tx axon_tx_p10
## AX42 AX42 sc axon p10 wt tx axon_tx_p10
## AX43 AX43 sc axon p10 wt tx axon_tx_p10
## salmon files
salmon.files.working <- readRDS("~/downloads/axonTRAP_forSpeerC_salmonFiles..RDS")
salmon.files.working %>% str_extract("/q.*.sf")
## [1] "/quant.sf" "/quant.sf" "/quant.sf" "/quant.sf" "/quant.sf" "/quant.sf"
## tx2gene matrix (genecode M24)
tx2gene <- readRDS("~/downloads/axonTRAP_forSpeerC_tx2gene..RDS")
tx2gene %>% head()
## TXNAME GENEID
## 1 ENSMUST00000193812 ENSMUSG00000102693
## 2 ENSMUST00000082908 ENSMUSG00000064842
## 3 ENSMUST00000192857 ENSMUSG00000102851
## 4 ENSMUST00000161581 ENSMUSG00000089699
## 5 ENSMUST00000192183 ENSMUSG00000103147
## 6 ENSMUST00000193244 ENSMUSG00000102348
## tximport object
txi.working <- tximport(salmon.files.working, type = "salmon", tx2gene = tx2gene,
ignoreTxVersion = T)
## reading in files with read_tsv
## 1 2 3 4 5 6
## summarizing abundance
## summarizing counts
## summarizing length
## DESeq2 object
dds.working <- DESeqDataSetFromTximport(txi.working,
colData = sampleTable.working,
design = ~ group)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
## DESeq analysis
dds.working <- DESeq(dds.working)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## DESeq results
resultsNames(dds.working)
## [1] "Intercept" "group_axon_tx_p14_vs_axon_tx_p10"
## DESeq1 contrast(experimental to control)
res.working <- results(dds.working, contrast = c("group", "axon_tx_p14", "axon_tx_p10"))
## annotation of DESeq2 results columns
res.working$sig <- res.working$padj < 0.05
res.working$sig[is.na(res.working$sig)] <- F
res.working$log2FoldChange_sig <- res.working$log2FoldChange
res.working$log2FoldChange_sig[res.working$sig!=T] <- NA
res.working$sig[res.working$log2FoldChange_sig >0] <- "mature"
res.working$sig[res.working$log2FoldChange_sig <0] <- "wiring"
res.working$sig[res.working$sig==F] <- "not_sig"
res.working
## log2 fold change (MLE): group axon_tx_p14 vs axon_tx_p10
## Wald test p-value: group axon tx p14 vs axon tx p10
## DataFrame with 54331 rows and 8 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSMUSG00000000001 37.50038 -2.725749 1.082249 -2.518598 1.17823e-02
## ENSMUSG00000000003 0.00000 NA NA NA NA
## ENSMUSG00000000028 45.03266 1.166625 0.783449 1.489087 1.36464e-01
## ENSMUSG00000000031 606.88495 -2.308231 0.467432 -4.938109 7.88839e-07
## ENSMUSG00000000037 2.45472 0.439344 3.170820 0.138559 8.89799e-01
## ... ... ... ... ... ...
## ENSMUSG00000118636 1.34359 -3.15894 3.957 -0.798319 0.424686
## ENSMUSG00000118637 0.00000 NA NA NA NA
## ENSMUSG00000118638 0.00000 NA NA NA NA
## ENSMUSG00000118639 0.00000 NA NA NA NA
## ENSMUSG00000118640 0.00000 NA NA NA NA
## padj sig log2FoldChange_sig
## <numeric> <character> <numeric>
## ENSMUSG00000000001 9.86756e-02 not_sig NA
## ENSMUSG00000000003 NA not_sig NA
## ENSMUSG00000000028 3.97325e-01 not_sig NA
## ENSMUSG00000000031 9.17251e-05 wiring -2.30823
## ENSMUSG00000000037 NA not_sig NA
## ... ... ... ...
## ENSMUSG00000118636 NA not_sig NA
## ENSMUSG00000118637 NA not_sig NA
## ENSMUSG00000118638 NA not_sig NA
## ENSMUSG00000118639 NA not_sig NA
## ENSMUSG00000118640 NA not_sig NA
## concatenation of multiple DESeq2 results
# colnames(res.working) <- paste('eu.P14ToP10', colnames(res.working), sep = '_')
# I made several DESeq2 analyses (e.g., P99 to P14) and changed the column names as below, and "cbind" all the results
## to make the dataframe that contains all the DESeq results for each gene (ommited in this document)
res.3d <- readRDS("~/downloads/axonTRAP_forSpeerC_res.3d..RDS")
identical(res.working %>% rownames(), res.3d$ensembl)
## [1] TRUE
res.working <- cbind(res.3d[,c('symbol', 'entrez')], res.working)
res.working %>% head()
## symbol entrez baseMean log2FoldChange lfcSE
## ENSMUSG00000000001 Gnai3 14679 37.5003794 -2.7257493 1.0822487
## ENSMUSG00000000003 Pbsn 54192 0.0000000 NA NA
## ENSMUSG00000000028 Cdc45 12544 45.0326622 1.1666246 0.7834495
## ENSMUSG00000000031 H19 14955 606.8849529 -2.3082315 0.4674323
## ENSMUSG00000000037 Scml2 107815 2.4547213 0.4393441 3.1708201
## ENSMUSG00000000049 Apoh 11818 0.9366795 0.4976118 3.9438898
## stat pvalue padj sig
## ENSMUSG00000000001 -2.5185979 1.178231e-02 9.867562e-02 not_sig
## ENSMUSG00000000003 NA NA NA not_sig
## ENSMUSG00000000028 1.4890872 1.364644e-01 3.973255e-01 not_sig
## ENSMUSG00000000031 -4.9381085 7.888395e-07 9.172505e-05 wiring
## ENSMUSG00000000037 0.1385585 8.897990e-01 NA not_sig
## ENSMUSG00000000049 0.1261728 8.995951e-01 NA not_sig
## log2FoldChange_sig
## ENSMUSG00000000001 NA
## ENSMUSG00000000003 NA
## ENSMUSG00000000028 NA
## ENSMUSG00000000031 -2.308231
## ENSMUSG00000000037 NA
## ENSMUSG00000000049 NA
## color theme
col.young <- brewer_pal("seq", palette = 7)(9)[3]
col.wiring <- brewer_pal("seq", palette = 7)(9)[6]
col.mature <- brewer_pal("seq", palette = 7)(9)[9]
# plot in figure 2H
p.p14.to.p10 <- res.3d %>% data.frame() %>%
dplyr::filter(tpm.axonalTx_p10 + tpm.axonalTx_p14 + tpm.axonalTx_p99> 10) %>%
dplyr::mutate(this = eu.P14ToP10_sig) %>%
dplyr::arrange(this) %>%
ggplot2::ggplot(ggplot2::aes(x= tpm.axonalTx_p10, y=tpm.axonalTx_p14)) + ## wt janeseq3 to facsseq
scale_x_log10() +
scale_y_log10() +
geom_point(aes(col = this), alpha = .4, size = 1) +
geom_hline(yintercept = 1, linetype = "dashed", size = .5, alpha = .5) +
geom_vline(xintercept = 1, linetype = "dashed", size = .5, alpha = .5) +
ggplot2::ggtitle(bquote("Transport (day 10 vs day 14))")) +
scale_x_log10(labels = scientific) +
scale_y_log10(labels = scientific) +
xlab("Wiring") + ylab("Mature") +
ggtitle("Wiring vs Mature") +
theme(strip.text = element_blank(), strip.background = element_blank()) +
theme(axis.text = element_text(size = 10)) +
theme(axis.title = element_text(size = 16)) +
theme(plot.title = element_text(size = 16, face = "plain", hjust = .5)) +
theme(panel.grid.minor.x = element_blank()) +
theme(panel.grid.major.x = element_blank()) +
theme(panel.grid.minor.y = element_blank()) +
scale_color_manual(values = c("grey", col.wiring, col.young)) +
scale_alpha_manual(values = c(.1, .8, .8)) +
scale_size_manual(values = c(1, 2,2)) +
theme(axis.text.y = element_text(angle = 90, hjust =.5)) +
theme(panel.background = element_rect(fill = "white", colour = "black"),
legend.direction = 'horizontal',
legend.background = element_rect(color = 'black', size = .2),
legend.text = element_text(face = 'plain'),
legend.margin = unit(x= c(t=0.1,r=1,b=0.2,l=0.2), unit ='pt')) +
theme(legend.position = "none")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `legend.margin` must be specified using `margin()`
## ℹ For the old behavior use `legend.spacing`