I am going to intersperse the original comments and code, then compare with my own analyses.
#metacyclic to 4-hr amastigote transition and the amastigote samples across timepoints. #This log is for parasite samples only using the count table restricted to CDS only. #Count table generation is described in logs: #lminfectome_dillonl_20141001_TopHat_HTSeq_Count_Table_hg19_Lamazonensis_Lmajor_beads_HPGL0434-446_452-472_cecilia.log #lminfectome_dillonl_20141211_TopHat_HTSeq_Count_Table_hg19_Lamazonensis_Lmajor_beads_HPGL0491-510_cecilia.log #Lmexicana81 (L. amazonensis) samples #Name condition batch #HPGL0435 metac D #HPGL0437 amastLA4 D #HPGL0440 amastLA24 D #HPGL0443 amastLA48 D #HPGL0446 amastLA72 D #HPGL0454 metac E #HPGL0458 amastLA4 E #HPGL0462 amastLA24 E #HPGL0466 amastLA48 E #HPGL0470 amastLA72 E #HPGL0492 metac F #HPGL0496 amastLA4 F #HPGL0500 amastLA24 F #HPGL0504 amastLA48 F #HPGL0508 amastLA72 F #Count table is: #20141211_Lmexicana81_434-435_437_440_443_446_453-454_458_462_466_470_491-492_496_500_504_508_CDSonly.count #Includes headers #7-28-15 #L. amazonensis #Create diagnostic plots #Hector recommended that all diagnostics be done using the rawest data possible. For correlation analyses between samples, #use countsTable data without filtering out lowly expressed genes and without normalization. Size factor normalization #will be used to show the distribution of gene expression levels. #This analysis uses count tables that have not been restricted to _CDS only.
library(cbcbSEQ)
## Loading required package: limma
## Loading required package: corpcor
## Loading required package: preprocessCore
## Loading required package: sva
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-23. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
## Loading required package: BiocParallel
##
## Attaching package: 'cbcbSEQ'
## The following object is masked from 'package:hpgltools':
##
## pcRes
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(matrixStats)
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:genefilter':
##
## rowSds, rowVars
library(siggenes)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colMeans,
## colnames, colSums, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax,
## pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
## rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## Loading required package: multtest
##
## Attaching package: 'multtest'
## The following object is masked from 'package:gplots':
##
## wapply
## Loading required package: splines
library(ReportingTools)
## Loading required package: knitr
##
##
library(hwriter)
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:gplots':
##
## space
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:nlme':
##
## collapse
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: DelayedArray
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:base':
##
## apply
library(RColorBrewer)
#Read in counts table
countsTable <- read.table("preprocessing/dillonl/20141211_Lmexicana81_434-435_437_440_443_446_453-454_458_462_466_470_491-492_496_500_504_508_CDSonly.count.xz", header=TRUE)
#Remove id as an actual column, use as row names instead
rownames(countsTable) <- countsTable$id
countsTable <- countsTable[,-1]
#Restrict to samples of interest (exclude procyclic promastigote samples)
countsTable <- countsTable[,-c(1, 7, 13)]
head(countsTable, n=3L)
## HPGL0435 HPGL0437 HPGL0440 HPGL0443 HPGL0446 HPGL0454
## LmxM.01.0010-1 1910 154 34 38 56 2255
## LmxM.01.0020-1 1103 370 119 162 277 1639
## LmxM.01.0030-1 849 211 60 100 195 789
## HPGL0458 HPGL0462 HPGL0466 HPGL0470 HPGL0492 HPGL0496
## LmxM.01.0010-1 264 151 217 203 1512 51
## LmxM.01.0020-1 1153 1519 1315 1345 1528 153
## LmxM.01.0030-1 475 668 735 689 785 88
## HPGL0500 HPGL0504 HPGL0508
## LmxM.01.0010-1 52 13 8
## LmxM.01.0020-1 225 62 66
## LmxM.01.0030-1 135 29 48
# HPGL0435 HPGL0437 HPGL0440 HPGL0443 HPGL0446 HPGL0454 HPGL0458 HPGL0462 HPGL0466 HPGL0470 HPGL0492 HPGL0496 HPGL0500 HPGL0504 HPGL0508
#LmxM.01.0010-1 1910 154 34 38 56 2255 264 151 217 203 1512 51 52 13 8
#LmxM.01.0020-1 1103 370 119 162 277 1639 1153 1519 1315 1345 1528 153 225 62 66
#LmxM.01.0030-1 849 211 60 100 195 789 475 668 735 689 785 88 135 29 48
#Establish metadata for samples
sampleID <- colnames(countsTable)
condition <- rep(c("metac", "amast4", "amast24", "amast48", "amast72"), times=3)
batch <- rep(c("D", "E", "F"), each=5)
#Create condition and batch factor
condition <- factor(condition, levels=c("metac", "amast4", "amast24", "amast48", "amast72"))
batch <- factor(batch, levels=c("D", "E", "F"))
design <- data.frame(sampleID=sampleID, condition=condition, batch=batch)
design
## sampleID condition batch
## 1 HPGL0435 metac D
## 2 HPGL0437 amast4 D
## 3 HPGL0440 amast24 D
## 4 HPGL0443 amast48 D
## 5 HPGL0446 amast72 D
## 6 HPGL0454 metac E
## 7 HPGL0458 amast4 E
## 8 HPGL0462 amast24 E
## 9 HPGL0466 amast48 E
## 10 HPGL0470 amast72 E
## 11 HPGL0492 metac F
## 12 HPGL0496 amast4 F
## 13 HPGL0500 amast24 F
## 14 HPGL0504 amast48 F
## 15 HPGL0508 amast72 F
# sampleID condition batch
#1 HPGL0435 metac D
#2 HPGL0437 amast4 D
#3 HPGL0440 amast24 D
#4 HPGL0443 amast48 D
#5 HPGL0446 amast72 D
#6 HPGL0454 metac E
#7 HPGL0458 amast4 E
#8 HPGL0462 amast24 E
#9 HPGL0466 amast48 E
#10 HPGL0470 amast72 E
#11 HPGL0492 metac F
#12 HPGL0496 amast4 F
#13 HPGL0500 amast24 F
#14 HPGL0504 amast48 F
#15 HPGL0508 amast72 F
colnames(countsTable) <- paste(condition, batch, 1:length(condition), sep=".")
barplot(colSums(countsTable), las=3, ylim=c(0,3e+07))
#Normalize raw counts using DESeq size factors
df <- data.frame(cond=(condition))
dds <- DESeqDataSetFromMatrix(countData=countsTable, colData = df, design = ~ cond)
dds <- estimateSizeFactors(dds)
ncts <- counts(dds, normalized=TRUE)
y <- log(ncts + 1)
#Determine median and quantiles of the size factor normalized, log2 counts
median(y)
## [1] 5.789
#[1] 5.788727
quantile(y)
## 0% 25% 50% 75% 100%
## 0.000 5.331 5.789 6.257 10.702
# 0% 25% 50% 75% 100%
# 0.000000 5.330752 5.788727 6.256596 10.701582
#Determine number of genes per sample with less than log2 of 2 counts (less than 4 counts per mill)
ydf <- as.data.frame(y)
colnames(ydf) <- sampleID
col.nam <- paste(condition, batch, 1:length(condition), sep=".")
#Boxplot of per sample log of size-factor-normalized counts (+ 1)
par(mar=c(10.5,4.5,2,1))
par(oma=c(0,0,0,0))
boxplot(y, names=col.nam, las=3)
#Heatmap of Pearson correlation between samples
#(delete Rowv, Colv, and dendrogram parameters if want samples to sort and show dendrogram)
#Correlation analysis (Pearson is default for cor unless othewise specified)
#Using raw counts
datCor <- cor(countsTable)
heatmap.2(datCor, Rowv=NA, Colv=NA,
margins=c(10, 10),
labRow=col.nam,
labCol=col.nam,
dendrogram="none",
scale="none",
trace="none",
srtCol=45)
#Median pairwise correlation
#Using raw counts
corM <- matrixStats::rowMedians(cor(countsTable))
qs <- quantile(corM,p=c(1,3)/4)
iqr <- diff(qs)
outLimit <- qs[1] - 1.5 * iqr
ylim <- c(pmin(min(corM),outLimit),max(corM))
col <- ifelse(condition=="amast4", "cornflowerblue",
ifelse(condition=="amast24", "gold",
ifelse(condition=="amast48", "green3",
ifelse(condition=="amast72", "coral", "mediumorchid4"))))
plot(corM, xaxt="n", ylim=ylim, ylab="Median Pairwise Correlation", xlab="", main="", col=col, pch=16, cex=2.2)
axis(side=1,at=seq(along=corM),labels=paste(condition,batch,sep=":"),las=2)
abline(h=outLimit,lty=2)
abline(v=1:length(col.nam), lty=3, col="black")
#Plot without IQR cutoff
#Use filterCounts method to filter for low counts
#Define filterCounts function
filterCounts = function (counts, lib.size = NULL, thresh = 1, minSamples = 2) {
cpms <- 2^log2CPM(counts, lib.size = lib.size)$y
keep <- rowSums(cpms > thresh) >= minSamples
counts <- counts[keep, ]
counts
}
x <- table(condition)
dim(countsTable)
## [1] 8336 15
#[1] 8336 15
counts <- filterCounts(countsTable, thresh=1, minSamples=min(x))
dim(counts)
## [1] 8310 15
#[1] 8310 15
#Quantile normalize counts
countsSubQ <- qNorm(counts)
#Explore data for batch effects
#Transform data to log2 counts per million
x <- log2CPM(countsSubQ)
#Compute principal components
s <- makeSVD(x$y)
#Compute variance of each PC and how they correlate with batch and condition
pcRes(s$v, s$d, condition, batch)
## propVar cumPropVar cond.R2 batch.R2
## 1 59.12 59.12 96.62 2.72
## 2 9.04 68.16 75.86 11.13
## 3 6.50 74.66 21.37 38.43
## 4 5.03 79.69 14.54 67.85
## 5 4.03 83.72 1.61 48.30
## 6 3.36 87.08 26.63 17.28
## 7 2.85 89.93 18.80 5.67
## 8 2.52 92.45 48.05 0.46
## 9 1.77 94.22 12.92 3.09
## 10 1.67 95.89 19.51 0.70
## 11 1.37 97.26 9.44 0.57
## 12 1.21 98.47 16.94 0.59
## 13 1.10 99.57 11.11 3.03
## 14 0.44 100.01 26.60 0.18
# propVar cumPropVar cond.R2 batch.R2
#1 59.12 59.12 96.62 2.72
#2 9.04 68.16 75.86 11.13
#3 6.50 74.66 21.37 38.43
#4 5.03 79.69 14.54 67.85
#5 4.03 83.72 1.61 48.30
#6 3.36 87.08 26.63 17.28
#7 2.85 89.93 18.80 5.67
#8 2.52 92.45 48.05 0.46
#9 1.77 94.22 12.92 3.09
#10 1.67 95.89 19.51 0.70
#11 1.37 97.26 9.44 0.57
#12 1.21 98.47 16.94 0.59
#13 1.10 99.57 11.11 3.03
#14 0.44 100.01 26.60 0.18
#Plot PC1 vs. PC2, with black outlines and color fills
#Save as eps
condnum <- as.numeric(condition)
plotPC(s$v,
s$d,
col="black",
pch=ifelse(batch=="D", 25, ifelse(batch=="B", 21, ifelse(batch=="C", 22, ifelse(batch=="E", 24, 23)))),
bg=ifelse(condnum==1, "mediumorchid4", ifelse(condnum==2, "cornflowerblue", ifelse(condnum==3, "gold", ifelse(condnum==4, "green3", "coral")))), cex=2.6
)
legend(x=-0.23, y=0.39, legend=c("metac", "amast4", "amast24", "amast48", "amast72"), pch=22, col=0,
pt.bg=c("mediumorchid4", "cornflowerblue", "gold", "green3", "coral"), pt.cex=2.6, bty="n")
text(s$v[,1], s$v[,2], colnames(countsTable), cex=.7, pos=4)
#View as a Euclidean distance heatmap
dists <- dist(t(counts))
mat <- as.matrix(dists)
rownames(mat) <- colnames(mat) <- with(design, paste(colnames(countsTable)))
hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
vec.batch <- rainbow(nlevels(batch), start=0, end=.8)
batch.color <- rep(0, length(batch))
for (i in 1:length(batch)) {
batch.color[i] <- vec.batch[batch[i]==levels(batch)]
}
vec.condition <- c("mediumorchid4", "cornflowerblue", "gold", "green3", "coral")
condition.color <- rep(0, length(condition))
for (i in 1:length(condition)) {
condition.color[i] <- vec.condition[condition[i]==levels(condition)]
}
heatmap <- heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(11,11), ColSideColors=condition.color,
RowSideColors=batch.color, key="FALSE", srtCol=45)
dev.off()
## null device
## 1
#I would like to create a PCA plot and heatmap to reflect the use of batch in the limma model
#Specify model
mod <- model.matrix(~batch)
#Use voom to determine weights for fitting the mean-variance trend for each gene
v <- voom(countsSubQ, mod)
#Fit the linear model for each gene
fit <- lmFit(v)
#Get the residual (i.e. everything except for the batch effect)
newData <- residuals(fit, v)
#Explore data for batch effects
#Compute principal components
s <- makeSVD(newData)
#Compute variance of each PC and how they correlate with batch and condition
pcRes(s$v, s$d, condition, batch)
## propVar cumPropVar cond.R2 batch.R2
## 1 65.02 65.02 99.34 0
## 2 9.65 74.67 89.05 0
## 3 6.18 80.85 15.30 0
## 4 4.18 85.03 31.59 0
## 5 3.31 88.34 22.22 0
## 6 2.85 91.19 50.41 0
## 7 2.11 93.30 12.60 0
## 8 1.91 95.21 19.87 0
## 9 1.57 96.78 6.37 0
## 10 1.40 98.18 17.29 0
## 11 1.33 99.51 10.67 0
## 12 0.51 100.02 25.31 0
## 13 0.00 100.02 0.00 100
## 14 0.00 100.02 0.00 100
# propVar cumPropVar cond.R2 batch.R2
#1 65.02 65.02 99.34 0
#2 9.65 74.67 89.05 0
#3 6.18 80.85 15.30 0
#4 4.18 85.03 31.59 0
#5 3.31 88.34 22.22 0
#6 2.85 91.19 50.41 0
#7 2.11 93.30 12.60 0
#8 1.91 95.21 19.87 0
#9 1.57 96.78 6.37 0
#10 1.40 98.18 17.29 0
#11 1.33 99.51 10.67 0
#12 0.51 100.02 25.31 0
#13 0.00 100.02 0.00 100
#14 0.00 100.02 0.00 100
#Plot PC1 vs. PC2, with black outlines and color fills
condnum <- as.numeric(condition)
plotPC(s$v,
s$d,
col="black",
pch=ifelse(batch=="D", 25, ifelse(batch=="B", 21, ifelse(batch=="C", 22, ifelse(batch=="E", 24, 23)))),
bg=ifelse(condnum==1, "mediumorchid4", ifelse(condnum==2, "cornflowerblue", ifelse(condnum==3, "gold", ifelse(condnum==4, "green3", "coral")))), cex=2.6
)
legend(x=-0.48, y=-0.24, legend=c("metac", "amast4", "amast24", "amast48", "amast72"), pch=22, col=0,
pt.bg=c("mediumorchid4", "cornflowerblue", "gold", "green3", "coral"), pt.cex=2.6, bty="n")
text(s$v[,1], s$v[,2], colnames(countsTable), cex=.7, pos=4)
#View as a Euclidean distance heatmap
dists <- dist(t(newData))
mat <- as.matrix(dists)
rownames(mat) <- colnames(mat) <- with(design, paste(colnames(countsTable)))
hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
vec.batch <- rainbow(nlevels(batch), start=0, end=.8)
batch.color <- rep(0, length(batch))
for (i in 1:length(batch)) {
batch.color[i] <- vec.batch[batch[i]==levels(batch)]
}
vec.condition <- c("mediumorchid4", "cornflowerblue", "gold", "green3", "coral")
condition.color <- rep(0, length(condition))
for (i in 1:length(condition)) {
condition.color[i] <- vec.condition[condition[i]==levels(condition)]
}
heatmap <- heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(11,11), ColSideColors=condition.color,
RowSideColors=batch.color, key="FALSE", srtCol=45)
#############################
#############################
#DE analysis
#Quantile normalize counts
countsSubQ <- qNorm(counts)
#Specify model
mod = model.matrix(~0+condition+batch)
#View mean-variance trend
voom(countsSubQ, mod, plot=TRUE)
## An object of class "EList"
## $E
## metac.D.1 amast4.D.2 amast24.D.3 amast48.D.4 amast72.D.5
## LmxM.01.0010-1 6.872 5.124 4.686 4.055 4.147
## LmxM.01.0020-1 6.183 6.444 6.524 6.220 6.580
## LmxM.01.0030-1 5.842 5.599 5.498 5.496 6.041
## LmxM.01.0040-1 5.248 5.579 5.340 4.670 4.986
## LmxM.01.0050-1 6.555 6.640 6.931 6.445 6.731
## metac.E.6 amast4.E.7 amast24.E.8 amast48.E.9 amast72.E.10
## LmxM.01.0010-1 6.862 4.549 3.743 3.998 4.062
## LmxM.01.0020-1 6.435 6.784 7.028 6.728 6.909
## LmxM.01.0030-1 5.414 5.428 5.754 5.858 5.875
## LmxM.01.0040-1 5.578 5.712 5.873 5.459 5.637
## LmxM.01.0050-1 6.487 6.679 6.877 6.861 7.005
## metac.F.11 amast4.F.12 amast24.F.13 amast48.F.14
## LmxM.01.0010-1 6.479 4.832 4.295 4.253
## LmxM.01.0020-1 6.495 6.449 6.411 6.373
## LmxM.01.0030-1 5.568 5.630 5.681 5.313
## LmxM.01.0040-1 5.368 4.744 5.248 4.985
## LmxM.01.0050-1 6.524 6.647 6.826 6.565
## amast72.F.15
## LmxM.01.0010-1 3.286
## LmxM.01.0020-1 6.173
## LmxM.01.0030-1 5.695
## LmxM.01.0040-1 5.261
## LmxM.01.0050-1 6.744
## 8305 more rows ...
##
## $weights
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
## [1,] 38.25 21.44 14.88 13.67 11.61 36.74 17.46 12.15 11.18 9.523 36.67
## [2,] 34.43 35.62 36.18 34.89 35.59 36.75 37.69 38.06 37.11 37.662 34.36
## [3,] 28.87 28.23 29.28 28.27 31.66 28.53 27.86 28.94 27.90 31.367 27.46
## [4,] 23.88 23.21 25.02 19.29 22.53 29.79 29.18 30.71 25.49 28.604 23.34
## [5,] 35.90 36.65 37.76 36.49 37.52 36.59 37.28 38.23 37.13 38.027 35.91
## [,12] [,13] [,14] [,15]
## [1,] 17.32 12.05 11.09 9.445
## [2,] 35.56 36.12 34.83 35.530
## [3,] 26.81 27.90 26.85 30.459
## [4,] 22.63 24.46 18.79 21.965
## [5,] 36.66 37.76 36.49 37.523
## 8305 more rows ...
##
## $design
## conditionmetac conditionamast4 conditionamast24 conditionamast48
## 1 1 0 0 0
## 2 0 1 0 0
## 3 0 0 1 0
## 4 0 0 0 1
## 5 0 0 0 0
## conditionamast72 batchE batchF
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 1 0 0
## 10 more rows ...
##
## $targets
## lib.size
## metac.D.1 7666699
## amast4.D.2 7666679
## amast24.D.3 7666716
## amast48.D.4 7666698
## amast72.D.5 7666711
## 10 more rows ...
#Use voom to transform quantile-normalized count data to log2-counts per million, estimate mean-variance relationship
#and use m-v relationship to computer appropriate observational-level weights
v <- voom(countsSubQ, mod)
#Fit a linear model for each gene using the specified design contained in v
fit <- lmFit(v)
##metac v amast4##
#eBayes finds an F-statistic from the set of t-statistics for that gene
metac.amast4.contr.mat <- makeContrasts(metac_v_amast4=conditionamast4-conditionmetac, levels=v$design)
metac.amast4.fit <- contrasts.fit(fit, metac.amast4.contr.mat)
metac.amast4.eb <- eBayes(metac.amast4.fit)
metac.amast4.topTab <- topTable(metac.amast4.eb, coef="metac_v_amast4", number=nrow(v$E))
#View the list of DE genes
head(metac.amast4.topTab, n=3L)
## logFC AveExpr t P.Value adj.P.Val B
## LmxM.31.1680-1 -3.044 5.086 -20.99 2.381e-15 1.978e-11 24.65
## LmxM.30.1450partial-1 2.429 9.485 18.74 2.181e-14 5.010e-11 23.02
## LmxM.29.2850-1 -2.509 6.005 -18.79 2.072e-14 5.010e-11 22.95
# logFC AveExpr t P.Value adj.P.Val B
#LmxM.31.1680-1 -3.043546 5.085537 -20.98676 2.380772e-15 1.978422e-11 24.65466
#LmxM.30.1450partial-1 2.429366 9.484824 18.73963 2.181083e-14 5.009522e-11 23.02019
metac.amast4.topTab$fold_change <- 2 ^ metac.amast4.topTab$logFC
write.csv(file="csv/lamazonensis_metac_vs_amast4.csv", x=metac.amast4.topTab)
#Limit list to genes with an adjusted p value < 0.05
metac.amast4.sigGenes <- metac.amast4.topTab[metac.amast4.topTab$adj.P.Val <0.05, ]
length(metac.amast4.sigGenes$logFC)
## [1] 3896
#3896
#Filter out rows with less than 2-fold change (log2 fold change of > 1)
metac.amast4.sigGenesFold1 <- subset(metac.amast4.sigGenes, abs(logFC) > 1)
length(metac.amast4.sigGenesFold1$logFC)
## [1] 649
#649
#Filter out rows with less than 4-fold change (log2 fold change of > 2)
metac.amast4.sigGenesFold2 <- subset(metac.amast4.sigGenes, abs(logFC) > 2)
length(metac.amast4.sigGenesFold2$logFC)
## [1] 44
#44
metac.amast4.sigGenes <- metac.amast4.sigGenes[order(-metac.amast4.sigGenes$logFC), ]
#Make an MA plot
sel = metac.amast4.topTab$adj.P.Val < 0.05
top = metac.amast4.topTab
sub = paste("No. of sig. genes: ", sum(sel),"/",length(sel))
cpm = v$E
plot(rowMeans(cpm[rownames(top),]), top$logFC, pch=16, cex=0.5,col="darkgrey",
main="metac_amast4 model batch adjusted",
ylab="log FC", xlab="Average Expression",
ylim=c(-3,3), sub=sub)
points(rowMeans(cpm[rownames(top),])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1,0,1), col="red")
##amast4 v amast24##
#eBayes finds an F-statistic from the set of t-statistics for that gene
amast4.amast24.contr.mat <- makeContrasts(amast4_v_amast24=conditionamast24-conditionamast4, levels=v$design)
amast4.amast24.fit <- contrasts.fit(fit, amast4.amast24.contr.mat)
amast4.amast24.eb <- eBayes(amast4.amast24.fit)
amast4.amast24.topTab <- topTable(amast4.amast24.eb, coef="amast4_v_amast24", number=nrow(v$E))
#View the list of DE genes
head(amast4.amast24.topTab, n=3L)
## logFC AveExpr t P.Value adj.P.Val B
## LmxM.09.0060-1 1.543 9.332 10.945 5.039e-10 3.34e-06 12.916
## LmxM.17.0890-1 -1.712 7.707 -10.658 8.040e-10 3.34e-06 12.499
## LmxM.23.1665-1 -1.647 5.517 -9.207 9.855e-09 2.73e-05 9.985
# logFC AveExpr t P.Value adj.P.Val B
#LmxM.09.0060-1 1.543364 9.332460 10.944657 5.038502e-10 3.340466e-06 12.915671
#LmxM.17.0890-1 -1.712316 7.707216 -10.657815 8.039628e-10 3.340466e-06 12.499147
#LmxM.23.1665-1 -1.646699 5.517480 -9.207189 9.854612e-09 2.729727e-05 9.985394
#Limit list to genes with an adjusted p value < 0.05
amast4.amast24.sigGenes <- amast4.amast24.topTab[amast4.amast24.topTab$adj.P.Val <0.05, ]
length(amast4.amast24.sigGenes$logFC)
## [1] 577
#577
#Filter out rows with less than 2-fold change (log2 fold change of > 1)
amast4.amast24.sigGenesFold1 <- subset(amast4.amast24.sigGenes, abs(logFC) > 1)
length(amast4.amast24.sigGenesFold1$logFC)
## [1] 104
#104
#Filter out rows with less than 4-fold change (log2 fold change of > 2)
amast4.amast24.sigGenesFold2 <- subset(amast4.amast24.sigGenes, abs(logFC) > 2)
length(amast4.amast24.sigGenesFold2$logFC)
## [1] 1
#1
amast4.amast24.sigGenes <- amast4.amast24.sigGenes[order(-amast4.amast24.sigGenes$logFC), ]
#Make an MA plot
sel = amast4.amast24.topTab$adj.P.Val < 0.05
top = amast4.amast24.topTab
sub = paste("No. of sig. genes: ", sum(sel),"/",length(sel))
cpm = v$E
plot(rowMeans(cpm[rownames(top),]), top$logFC, pch=16, cex=0.5,col="darkgrey",
main="amast4_amast24 model batch adjusted",
ylab="log FC", xlab="Average Expression",
ylim=c(-3,3), sub=sub)
points(rowMeans(cpm[rownames(top),])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1,0,1), col="red")
##amast24 v amast48##
#eBayes finds an F-statistic from the set of t-statistics for that gene
amast24.amast48.contr.mat <- makeContrasts(amast24_v_amast48=conditionamast48-conditionamast24, levels=v$design)
amast24.amast48.fit <- contrasts.fit(fit, amast24.amast48.contr.mat)
amast24.amast48.eb <- eBayes(amast24.amast48.fit)
amast24.amast48.topTab <- topTable(amast24.amast48.eb, coef="amast24_v_amast48", number=nrow(v$E))
#View the list of DE genes
head(amast24.amast48.topTab, n=3L)
## logFC AveExpr t P.Value adj.P.Val B
## LmxM.02.0460-1 -1.0270 6.482 -7.943 1.082e-07 0.0008988 4.933
## LmxM.30.1165-1 1.0818 5.463 7.038 6.838e-07 0.0028414 3.130
## LmxM.30.1190-1 0.8976 6.136 5.998 6.518e-06 0.0180550 2.413
# logFC AveExpr t P.Value adj.P.Val B
#LmxM.02.0460-1 -1.0270442 6.481687 -7.943049 1.081563e-07 0.0008987787 4.933005
#LmxM.30.1165-1 1.0817746 5.463023 7.038413 6.838410e-07 0.0028413593 3.129606
#LmxM.30.1190-1 0.8976157 6.135557 5.998049 6.518059e-06 0.0180550222 2.413080
#Limit list to genes with an adjusted p value < 0.05
amast24.amast48.sigGenes <- amast24.amast48.topTab[amast24.amast48.topTab$adj.P.Val <0.05, ]
length(amast24.amast48.sigGenes$logFC)
## [1] 3
#3
#Filter out rows with less than 2-fold change (log2 fold change of > 1)
amast24.amast48.sigGenesFold1 <- subset(amast24.amast48.sigGenes, abs(logFC) > 1)
length(amast24.amast48.sigGenesFold1$logFC)
## [1] 2
#2
#Filter out rows with less than 4-fold change (log2 fold change of > 2)
amast24.amast48.sigGenesFold2 <- subset(amast24.amast48.sigGenes, abs(logFC) > 2)
length(amast24.amast48.sigGenesFold2$logFC)
## [1] 0
#0
amast24.amast48.sigGenes <- amast24.amast48.sigGenes[order(-amast24.amast48.sigGenes$logFC), ]
#Make an MA plot
sel = amast24.amast48.topTab$adj.P.Val < 0.05
top = amast24.amast48.topTab
sub = paste("No. of sig. genes: ", sum(sel),"/",length(sel))
cpm = v$E
plot(rowMeans(cpm[rownames(top),]), top$logFC, pch=16, cex=0.5,col="darkgrey",
main="amast24_amast48 model batch adjusted",
ylab="log FC", xlab="Average Expression",
ylim=c(-3,3), sub=sub)
points(rowMeans(cpm[rownames(top),])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1,0,1), col="red")
##amast48 v amast72##
#eBayes finds an F-statistic from the set of t-statistics for that gene
amast48.amast72.contr.mat <- makeContrasts(amast48_v_amast72=conditionamast72-conditionamast48, levels=v$design)
amast48.amast72.fit <- contrasts.fit(fit, amast48.amast72.contr.mat)
amast48.amast72.eb <- eBayes(amast48.amast72.fit)
amast48.amast72.topTab <- topTable(amast48.amast72.eb, coef="amast48_v_amast72", number=nrow(v$E))
#View the list of DE genes
head(amast48.amast72.topTab, n=3L)
## logFC AveExpr t P.Value adj.P.Val B
## LmxM.02.0460-1 -0.6433 6.482 -4.585 0.000169 0.9987 -2.970
## LmxM.13.0390-1 -0.4710 12.405 -3.565 0.001878 0.9987 -3.190
## LmxM.08_29.0251-1 -0.7466 6.216 -3.701 0.001365 0.9987 -3.248
# logFC AveExpr t P.Value adj.P.Val B
#LmxM.02.0460-1 -0.6433210 6.481687 -4.584932 0.0001689891 0.9987343 -2.970448
#LmxM.13.0390-1 -0.4709751 12.404965 -3.565121 0.0018782419 0.9987343 -3.190347
#LmxM.08_29.0251-1 -0.7466192 6.215922 -3.700966 0.0013650425 0.9987343 -3.248311
#Limit list to genes with an adjusted p value < 0.05
amast48.amast72.sigGenes <- amast48.amast72.topTab[amast48.amast72.topTab$adj.P.Val <0.05, ]
length(amast48.amast72.sigGenes$logFC)
## [1] 0
#0
#Make an MA plot
sel = amast48.amast72.topTab$adj.P.Val < 0.05
top = amast48.amast72.topTab
sub = paste("No. of sig. genes: ", sum(sel),"/",length(sel))
cpm = v$E
plot(rowMeans(cpm[rownames(top),]), top$logFC, pch=16, cex=0.5,col="darkgrey",
main="amast48_amast72 model batch adjusted",
ylab="log FC", xlab="Average Expression",
ylim=c(-3,3), sub=sub)
points(rowMeans(cpm[rownames(top),])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1,0,1), col="red")