DESEq2 analysis example (p14 trascriptome to p10 transcriptome)

## example p14 to p10 axonal transcripts
library(tximport)
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse()     masks IRanges::collapse()
## ✖ dplyr::combine()      masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()        masks matrixStats::count()
## ✖ dplyr::desc()         masks IRanges::desc()
## ✖ tidyr::expand()       masks S4Vectors::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ dplyr::first()        masks S4Vectors::first()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ ggplot2::Position()   masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce()       masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()       masks S4Vectors::rename()
## ✖ lubridate::second()   masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice()        masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
"%ni%" <- Negate("%in%")

###I. PREPARE THE COUNT MATRICES
##0. Salmon-Tximport pipeline

## sample table
sampleTable.working <- readRDS("~/downloads/axonTRAP_forSpeerC_sampleTable.RDS")
sampleTable.working
##      sample tissue compartment age genotype rna       group
## AX31   AX31     sc        axon p14       wt  tx axon_tx_p14
## AX32   AX32     sc        axon p14       wt  tx axon_tx_p14
## AX33   AX33     sc        axon p14       wt  tx axon_tx_p14
## AX41   AX41     sc        axon p10       wt  tx axon_tx_p10
## AX42   AX42     sc        axon p10       wt  tx axon_tx_p10
## AX43   AX43     sc        axon p10       wt  tx axon_tx_p10
## salmon files
salmon.files.working <- readRDS("~/downloads/axonTRAP_forSpeerC_salmonFiles..RDS")
salmon.files.working  %>% str_extract("/q.*.sf")
## [1] "/quant.sf" "/quant.sf" "/quant.sf" "/quant.sf" "/quant.sf" "/quant.sf"
## tx2gene matrix (genecode M24)
tx2gene <- readRDS("~/downloads/axonTRAP_forSpeerC_tx2gene..RDS")
tx2gene %>% head() 
##               TXNAME             GENEID
## 1 ENSMUST00000193812 ENSMUSG00000102693
## 2 ENSMUST00000082908 ENSMUSG00000064842
## 3 ENSMUST00000192857 ENSMUSG00000102851
## 4 ENSMUST00000161581 ENSMUSG00000089699
## 5 ENSMUST00000192183 ENSMUSG00000103147
## 6 ENSMUST00000193244 ENSMUSG00000102348
## tximport object
txi.working <- tximport(salmon.files.working, type = "salmon", tx2gene = tx2gene,
                                   ignoreTxVersion = T)
## reading in files with read_tsv
## 1 2 3 4 5 6 
## summarizing abundance
## summarizing counts
## summarizing length
## DESeq2 object
dds.working <- DESeqDataSetFromTximport(txi.working,
                                            colData = sampleTable.working,
                                            design = ~ group)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
## DESeq analysis
dds.working <- DESeq(dds.working)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## DESeq results
resultsNames(dds.working)
## [1] "Intercept"                        "group_axon_tx_p14_vs_axon_tx_p10"
## DESeq1 contrast(experimental to control)
res.working <- results(dds.working, contrast = c("group", "axon_tx_p14", "axon_tx_p10")) 

## annotation of DESeq2 results columns
res.working$sig <- res.working$padj < 0.05
res.working$sig[is.na(res.working$sig)] <- F
res.working$log2FoldChange_sig <- res.working$log2FoldChange
res.working$log2FoldChange_sig[res.working$sig!=T]  <- NA
res.working$sig[res.working$log2FoldChange_sig >0] <- "mature"
res.working$sig[res.working$log2FoldChange_sig <0] <- "wiring"
res.working$sig[res.working$sig==F] <- "not_sig"
res.working
## log2 fold change (MLE): group axon_tx_p14 vs axon_tx_p10 
## Wald test p-value: group axon tx p14 vs axon tx p10 
## DataFrame with 54331 rows and 8 columns
##                     baseMean log2FoldChange     lfcSE      stat      pvalue
##                    <numeric>      <numeric> <numeric> <numeric>   <numeric>
## ENSMUSG00000000001  37.50038      -2.725749  1.082249 -2.518598 1.17823e-02
## ENSMUSG00000000003   0.00000             NA        NA        NA          NA
## ENSMUSG00000000028  45.03266       1.166625  0.783449  1.489087 1.36464e-01
## ENSMUSG00000000031 606.88495      -2.308231  0.467432 -4.938109 7.88839e-07
## ENSMUSG00000000037   2.45472       0.439344  3.170820  0.138559 8.89799e-01
## ...                      ...            ...       ...       ...         ...
## ENSMUSG00000118636   1.34359       -3.15894     3.957 -0.798319    0.424686
## ENSMUSG00000118637   0.00000             NA        NA        NA          NA
## ENSMUSG00000118638   0.00000             NA        NA        NA          NA
## ENSMUSG00000118639   0.00000             NA        NA        NA          NA
## ENSMUSG00000118640   0.00000             NA        NA        NA          NA
##                           padj         sig log2FoldChange_sig
##                      <numeric> <character>          <numeric>
## ENSMUSG00000000001 9.86756e-02     not_sig                 NA
## ENSMUSG00000000003          NA     not_sig                 NA
## ENSMUSG00000000028 3.97325e-01     not_sig                 NA
## ENSMUSG00000000031 9.17251e-05      wiring           -2.30823
## ENSMUSG00000000037          NA     not_sig                 NA
## ...                        ...         ...                ...
## ENSMUSG00000118636          NA     not_sig                 NA
## ENSMUSG00000118637          NA     not_sig                 NA
## ENSMUSG00000118638          NA     not_sig                 NA
## ENSMUSG00000118639          NA     not_sig                 NA
## ENSMUSG00000118640          NA     not_sig                 NA
## concatenation of multiple DESeq2 results
# colnames(res.working) <- paste('eu.P14ToP10', colnames(res.working), sep = '_')  

# I made several DESeq2 analyses (e.g., P99 to P14) and changed the column names as below, and "cbind" all the results 
## to make the dataframe that contains all the DESeq results for each gene (ommited in this document)
res.3d <- readRDS("~/downloads/axonTRAP_forSpeerC_res.3d..RDS")

identical(res.working %>% rownames(), res.3d$ensembl) 
## [1] TRUE
res.working <- cbind(res.3d[,c('symbol', 'entrez')], res.working) 
res.working %>% head()
##                    symbol entrez    baseMean log2FoldChange     lfcSE
## ENSMUSG00000000001  Gnai3  14679  37.5003794     -2.7257493 1.0822487
## ENSMUSG00000000003   Pbsn  54192   0.0000000             NA        NA
## ENSMUSG00000000028  Cdc45  12544  45.0326622      1.1666246 0.7834495
## ENSMUSG00000000031    H19  14955 606.8849529     -2.3082315 0.4674323
## ENSMUSG00000000037  Scml2 107815   2.4547213      0.4393441 3.1708201
## ENSMUSG00000000049   Apoh  11818   0.9366795      0.4976118 3.9438898
##                          stat       pvalue         padj     sig
## ENSMUSG00000000001 -2.5185979 1.178231e-02 9.867562e-02 not_sig
## ENSMUSG00000000003         NA           NA           NA not_sig
## ENSMUSG00000000028  1.4890872 1.364644e-01 3.973255e-01 not_sig
## ENSMUSG00000000031 -4.9381085 7.888395e-07 9.172505e-05  wiring
## ENSMUSG00000000037  0.1385585 8.897990e-01           NA not_sig
## ENSMUSG00000000049  0.1261728 8.995951e-01           NA not_sig
##                    log2FoldChange_sig
## ENSMUSG00000000001                 NA
## ENSMUSG00000000003                 NA
## ENSMUSG00000000028                 NA
## ENSMUSG00000000031          -2.308231
## ENSMUSG00000000037                 NA
## ENSMUSG00000000049                 NA
## color theme
col.young <-  brewer_pal("seq", palette = 7)(9)[3]
col.wiring <- brewer_pal("seq", palette = 7)(9)[6]
col.mature <- brewer_pal("seq", palette = 7)(9)[9]


# plot in figure 2H
p.p14.to.p10 <- res.3d %>% data.frame() %>% 
  dplyr::filter(tpm.axonalTx_p10 +  tpm.axonalTx_p14 + tpm.axonalTx_p99> 10) %>% 
  dplyr::mutate(this = eu.P14ToP10_sig) %>% 
  dplyr::arrange(this) %>% 
  ggplot2::ggplot(ggplot2::aes(x= tpm.axonalTx_p10, y=tpm.axonalTx_p14)) + ## wt janeseq3 to facsseq
  scale_x_log10() + 
  scale_y_log10() +
  geom_point(aes(col = this), alpha = .4, size = 1) +
  geom_hline(yintercept = 1, linetype = "dashed", size = .5, alpha = .5) +
  geom_vline(xintercept = 1, linetype = "dashed", size = .5, alpha = .5) +
  ggplot2::ggtitle(bquote("Transport (day 10 vs day 14))")) +
  scale_x_log10(labels = scientific) +
  scale_y_log10(labels = scientific) +
  xlab("Wiring") + ylab("Mature") +
  ggtitle("Wiring vs Mature") +
  theme(strip.text = element_blank(), strip.background = element_blank()) +
  theme(axis.text = element_text(size = 10)) +
  theme(axis.title = element_text(size = 16)) +
  theme(plot.title = element_text(size = 16, face = "plain", hjust = .5)) +
  theme(panel.grid.minor.x = element_blank()) +
  theme(panel.grid.major.x = element_blank()) +
  theme(panel.grid.minor.y = element_blank()) +
  scale_color_manual(values = c("grey", col.wiring, col.young)) + 
  scale_alpha_manual(values = c(.1, .8, .8)) +
  scale_size_manual(values = c(1, 2,2)) + 
  theme(axis.text.y = element_text(angle = 90, hjust =.5)) + 
  theme(panel.background = element_rect(fill = "white", colour = "black"),
        legend.direction = 'horizontal',
        legend.background  = element_rect(color = 'black', size = .2),
        legend.text = element_text(face = 'plain'),
        legend.margin = unit(x= c(t=0.1,r=1,b=0.2,l=0.2), unit ='pt')) +
  theme(legend.position = "none")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `legend.margin` must be specified using `margin()`
## ℹ For the old behavior use `legend.spacing`

Visualization

## Warning in scale_x_log10(labels = scientific): log-10 transformation introduced
## infinite values.
## Warning in scale_y_log10(labels = scientific): log-10 transformation introduced
## infinite values.