I am going to intersperse the original comments and code, then compare with my own analyses.
lminfectome_dillonl_20150609_PCA_heatmap_samplediagnostics_limmaac_amast_CDSonly_mouse_forpublication.log
#Complete batches are named B, D, and E for historical reasons. #Samples #Name condition batch #HPGL0165 metac (Ficoll) B #HPGL0167 amast4 B #HPGL0169 amast24 B #HPGL0171 amast48 B #HPGL0173 amast72 B #HPGL0230 metac (Ficoll) D #HPGL0232 amast4 D #HPGL0234 amast24 D #HPGL0236 amast48 D #HPGL0238 amast72 D #HPGL0329 metac (Ficoll) E #HPGL0389 amast4 E #HPGL0391 amast24 E #HPGL0393 amast48 E #HPGL0395 amast72 E #Counts in counts tables: #/cbcb/nelsayed-scratch/dillonl/count_tables #20140501_Lmajor60_167_169_171_173_191_232_234_236_238_389_391_393_395_CDSonly.count #20140127_Lmajor60_proc_metac_096_097_098_164_165_192_193_228_229_230_324_325_326_finalset_CDSonly.count #HPGL0329_Lmajor60_20150609_CDSonly.count #Includes headers
tt <- sm(library(DESeq2))
tt <- sm(library(preprocessCore))
tt <- sm(library(cbcbSEQ))
tt <- sm(library(edgeR))
tt <- sm(library(RColorBrewer))
tt <- sm(library(gplots))
##Amastigote samples
countsTable <- read.table("preprocessing/dillonl/20140501_Lmajor60_167_169_171_173_191_232_234_236_238_389_391_393_395_CDSonly.count.xz", header=TRUE)
#Remove batch C (HPGL0191)
countsTable <- countsTable[,-6]
#Remove "exon_" from gene IDs
countsTable$id <- gsub("exon_", "", countsTable$id)
#Metacyclic samples
metac <- read.table("preprocessing/dillonl/20140127_Lmajor60_proc_metac_096_097_098_164_165_192_193_228_229_230_324_325_326_finalset_CDSonly.count.xz", header=TRUE)
#Restrict to HPGL0165 and HPGL0230
metac <- metac[,c(1,6,11)]
HPGL0329 <- read.table("preprocessing/dillonl/HPGL0329_Lmajor60_20150609_CDSonly.count.xz", header=FALSE)
colnames(HPGL0329) <- c("id", "HPGL0329")
#Remove "exon_" from gene IDs
HPGL0329$id <- gsub("exon_", "", HPGL0329$id)
metac <- merge(metac, HPGL0329, by="id", all=TRUE)
#Create single count table
countsTable <- merge(countsTable, metac, by="id", all=TRUE)
#Remove id as an actual column, use as row names instead
rownames(countsTable) <- countsTable$id
countsTable <- countsTable[,-1]
head(countsTable, n=3L)
## HPGL0167 HPGL0169 HPGL0171 HPGL0173 HPGL0232 HPGL0234
## LmjF.01.0010-1 715 407 302 90 779 553
## LmjF.01.0020-1 678 428 327 105 954 658
## LmjF.01.0030-1 401 344 341 100 736 640
## HPGL0236 HPGL0238 HPGL0389 HPGL0391 HPGL0393 HPGL0395
## LmjF.01.0010-1 314 231 291 189 90 68
## LmjF.01.0020-1 279 248 286 218 120 86
## LmjF.01.0030-1 231 203 233 177 98 80
## HPGL0165 HPGL0230 HPGL0329
## LmjF.01.0010-1 1554 1905 1835
## LmjF.01.0020-1 480 951 980
## LmjF.01.0030-1 345 1219 953
# HPGL0167 HPGL0169 HPGL0171 HPGL0173 HPGL0232 HPGL0234 HPGL0236 HPGL0238 HPGL0389 HPGL0391 HPGL0393 HPGL0395 HPGL0165 HPGL0230 HPGL0329
#LmjF.01.0010-1 715 407 302 90 779 553 314 231 291 189 90 68 1554 1905 1835
#LmjF.01.0020-1 678 428 327 105 954 658 279 248 286 218 120 86 480 951 980
#LmjF.01.0030-1 401 344 341 100 736 640 231 203 233 177 98 80 345 1219 953
#Reorder into batches
countsTable <- countsTable[,c(13,1:4,14,5:8,15,9:12)]
head(countsTable, n=3L)
## HPGL0165 HPGL0167 HPGL0169 HPGL0171 HPGL0173 HPGL0230
## LmjF.01.0010-1 1554 715 407 302 90 1905
## LmjF.01.0020-1 480 678 428 327 105 951
## LmjF.01.0030-1 345 401 344 341 100 1219
## HPGL0232 HPGL0234 HPGL0236 HPGL0238 HPGL0329 HPGL0389
## LmjF.01.0010-1 779 553 314 231 1835 291
## LmjF.01.0020-1 954 658 279 248 980 286
## LmjF.01.0030-1 736 640 231 203 953 233
## HPGL0391 HPGL0393 HPGL0395
## LmjF.01.0010-1 189 90 68
## LmjF.01.0020-1 218 120 86
## LmjF.01.0030-1 177 98 80
# HPGL0165 HPGL0167 HPGL0169 HPGL0171 HPGL0173 HPGL0230 HPGL0232 HPGL0234 HPGL0236 HPGL0238 HPGL0329 HPGL0389 HPGL0391 HPGL0393 HPGL0395
#LmjF.01.0010-1 1554 715 407 302 90 1905 779 553 314 231 1835 291 189 90 68
#Establish metadata for samples
sampleID <- colnames(countsTable)
condition <- rep(c("metac", "amast4", "amast24", "amast48", "amast72"), times=3)
batch <- rep(c("B", "D", "E"), each=5)
#Create condition and batch factor
condition <- factor(condition, levels=c("metac", "amast4", "amast24", "amast48", "amast72"))
batch <- factor(batch, levels=c("B", "D", "E"))
design <- data.frame(sampleID=sampleID, condition=condition, batch=batch)
design
## sampleID condition batch
## 1 HPGL0165 metac B
## 2 HPGL0167 amast4 B
## 3 HPGL0169 amast24 B
## 4 HPGL0171 amast48 B
## 5 HPGL0173 amast72 B
## 6 HPGL0230 metac D
## 7 HPGL0232 amast4 D
## 8 HPGL0234 amast24 D
## 9 HPGL0236 amast48 D
## 10 HPGL0238 amast72 D
## 11 HPGL0329 metac E
## 12 HPGL0389 amast4 E
## 13 HPGL0391 amast24 E
## 14 HPGL0393 amast48 E
## 15 HPGL0395 amast72 E
## sampleID condition batch
#1 HPGL0165 metac B
#2 HPGL0167 amast4 B
#3 HPGL0169 amast24 B
#4 HPGL0171 amast48 B
#5 HPGL0173 amast72 B
#6 HPGL0230 metac D
#7 HPGL0232 amast4 D
#8 HPGL0234 amast24 D
#9 HPGL0236 amast48 D
#10 HPGL0238 amast72 D
#11 HPGL0329 metac E
#12 HPGL0389 amast4 E
#13 HPGL0391 amast24 E
#14 HPGL0393 amast48 E
#15 HPGL0395 amast72 E
colnames(countsTable) <- paste(condition, batch, 1:length(condition), sep=".")
head(countsTable, n=3L)
## metac.B.1 amast4.B.2 amast24.B.3 amast48.B.4 amast72.B.5
## LmjF.01.0010-1 1554 715 407 302 90
## LmjF.01.0020-1 480 678 428 327 105
## LmjF.01.0030-1 345 401 344 341 100
## metac.D.6 amast4.D.7 amast24.D.8 amast48.D.9 amast72.D.10
## LmjF.01.0010-1 1905 779 553 314 231
## LmjF.01.0020-1 951 954 658 279 248
## LmjF.01.0030-1 1219 736 640 231 203
## metac.E.11 amast4.E.12 amast24.E.13 amast48.E.14
## LmjF.01.0010-1 1835 291 189 90
## LmjF.01.0020-1 980 286 218 120
## LmjF.01.0030-1 953 233 177 98
## amast72.E.15
## LmjF.01.0010-1 68
## LmjF.01.0020-1 86
## LmjF.01.0030-1 80
# metac.B.1 amast4.B.2 amast24.B.3 amast48.B.4 amast72.B.5 metac.D.6 amast4.D.7 amast24.D.8 amast48.D.9
#LmjF.01.0010-1 1554 715 407 302 90 1905 779 553 314
#LmjF.01.0020-1 480 678 428 327 105 951 954 658 279
#LmjF.01.0030-1 345 401 344 341 100 1219 736 640 231
# amast72.D.10 metac.E.11 amast4.E.12 amast24.E.13 amast48.E.14 amast72.E.15
#LmjF.01.0010-1 231 1835 291 189 90 68
#LmjF.01.0020-1 248 980 286 218 120 86
#LmjF.01.0030-1 203 953 233 177 98 80
#View barplot of raw counts by sample
barplot(colSums(countsTable), las=3, ylim=c(0,2.5e+07))
#Normalize raw counts using DESeq size factors
df <- data.frame(cond=(condition))
dds <- DESeqDataSetFromMatrix(countData=countsTable, colData = df, design = ~ cond)
dds <- estimateSizeFactors(dds)
ncts <- counts(dds, normalized=TRUE)
y <- log(ncts + 1)
#Determine median and quantiles of the size factor normalized, log2 counts
median(y)
## [1] 6.035
#[1] 6.035387
#compared to 5.776822 without metac samples included
quantile(y)
## 0% 25% 50% 75% 100%
## 0.000 5.598 6.035 6.471 11.901
# 0% 25% 50% 75% 100%
# 0.000000 5.597645 6.035387 6.470512 11.900627
#compare to:
# 0% 25% 50% 75% 100%
# 0.000000 5.348148 5.776822 6.196398 11.638203
#without metac samples included
#Determine number of genes per sample with less than log2 of 2 counts (less than 4 counts per mill)
ydf <- as.data.frame(y)
colnames(ydf) <- sampleID
colnames(ydf)
## [1] "HPGL0165" "HPGL0167" "HPGL0169" "HPGL0171" "HPGL0173" "HPGL0230"
## [7] "HPGL0232" "HPGL0234" "HPGL0236" "HPGL0238" "HPGL0329" "HPGL0389"
## [13] "HPGL0391" "HPGL0393" "HPGL0395"
# [1] "HPGL0165" "HPGL0167" "HPGL0169" "HPGL0171" "HPGL0173" "HPGL0230" "HPGL0232" "HPGL0234" "HPGL0236" "HPGL0238"
#[11] "HPGL0329" "HPGL0389" "HPGL0391" "HPGL0393" "HPGL0395"
col.nam <- paste(condition, batch, 1:length(condition), sep=".")
#Boxplot of per sample log of size-factor-normalized counts (+ 1)
par(mar=c(10.5,4.5,2,1))
par(oma=c(0,0,0,0))
boxplot(y, names=col.nam, las=3)
#Heatmap of Pearson correlation between samples
#(delete Rowv, Colv, and dendrogram parameters if want samples to sort and show dendrogram)
#Correlation analysis (Pearson is default for cor unless othewise specified)
#Using raw counts
datCor <- cor(countsTable)
heatmap.2(datCor, Rowv=NA, Colv=NA,
margins=c(10, 10),
labRow=col.nam,
labCol=col.nam,
dendrogram="none",
scale="none",
trace="none",
srtCol=45)
#Median pairwise correlation
#Using raw counts
corM <- matrixStats::rowMedians(cor(countsTable))
qs <- quantile(corM,p=c(1,3)/4)
iqr <- diff(qs)
outLimit <- qs[1] - 1.5 * iqr
ylim <- c(pmin(min(corM),outLimit),max(corM))
col <- ifelse(condition=="amast4", "cornflowerblue",
ifelse(condition=="amast24", "gold",
ifelse(condition=="amast48", "green3",
ifelse(condition=="amast72", "coral", "mediumorchid4"))))
plot(corM, xaxt="n", ylim=ylim, ylab="Median Pairwise Correlation", xlab="", main="", col=col, pch=16, cex=2.2)
axis(side=1,at=seq(along=corM),labels=paste(condition,batch,sep=":"),las=2)
abline(h=outLimit,lty=2)
abline(v=1:length(col.nam), lty=3, col="black")
#Plot without IQR cutoff
#Use filterCounts method to filter for low counts
#Define filterCounts function
filterCounts = function (counts, lib.size = NULL, thresh = 1, minSamples = 2) {
cpms <- 2^log2CPM(counts, lib.size = lib.size)$y
keep <- rowSums(cpms > thresh) >= minSamples
counts <- counts[keep, ]
counts
}
x <- table(condition)
dim(countsTable)
## [1] 8486 15
#[1] 8486 15
counts <- filterCounts(countsTable, thresh=1, minSamples=min(x))
dim(counts)
## [1] 8479 15
#[1] 8479 15
#Same number after filtering as when metac is not included
#Quantile normalize counts
countsSubQ <- qNorm(counts)
#Explore data for batch effects
##Transform data to log2 counts per million
x <- cbcbSEQ::log2CPM(countsSubQ)
#Compute principal components
s <- makeSVD(x$y)
#Compute variance of each PC and how they correlate with batch and condition
pcRes(s$v, s$d, condition, batch)
## propVar cumPropVar cond.R2 batch.R2
## 1 41.84 41.84 97.04 0.20
## 2 16.51 58.35 1.25 91.84
## 3 11.34 69.69 70.11 6.92
## 4 7.43 77.12 58.10 23.26
## 5 5.84 82.96 47.21 29.73
## 6 4.20 87.16 15.35 2.93
## 7 2.81 89.97 13.41 1.59
## 8 2.21 92.18 12.63 17.54
## 9 1.90 94.08 17.81 10.47
## 10 1.61 95.69 26.25 0.42
## 11 1.31 97.00 18.15 12.73
## 12 1.16 98.16 3.36 0.26
## 13 0.97 99.13 17.20 0.29
## 14 0.85 99.98 2.10 1.82
# propVar cumPropVar cond.R2 batch.R2
#1 41.84 41.84 97.04 0.20
#2 16.51 58.35 1.25 91.84
#3 11.34 69.69 70.11 6.92
#4 7.43 77.12 58.10 23.26
#5 5.84 82.96 47.21 29.73
#6 4.20 87.16 15.35 2.93
#7 2.81 89.97 13.41 1.59
#8 2.21 92.18 12.63 17.54
#9 1.90 94.08 17.81 10.47
#10 1.61 95.69 26.25 0.42
#11 1.31 97.00 18.15 12.73
#12 1.16 98.16 3.36 0.26
#13 0.97 99.13 17.20 0.29
#14 0.85 99.98 2.10 1.82
#Plot PC1 vs. PC2, with black outlines and color fills
#Save as eps
condnum <- as.numeric(condition)
plotPC(s$v,
s$d,
col="black",
pch=ifelse(batch=="B", 25, ifelse(batch=="D", 21, ifelse(batch=="E", 22, ifelse(batch=="A", 24, 23)))),
bg=ifelse(condnum==1, "mediumorchid4", ifelse(condnum==2, "cornflowerblue", ifelse(condnum==3, "gold", ifelse(condnum==4, "green3", "coral")))), cex=2.6
)
legend(x=-0.34, y=0.45, legend=c("metac", "amast4", "amast24", "amast48", "amast72"), pch=22, col=0,
pt.bg=c("mediumorchid4", "cornflowerblue", "gold", "green3", "coral"), pt.cex=2.6, bty="n")
text(s$v[,1], s$v[,2], colnames(countsTable), cex=.7, pos=4)
#View as a Euclidean distance heatmap
dists <- dist(t(counts))
mat <- as.matrix(dists)
rownames(mat) <- colnames(mat) <- with(design, paste(colnames(countsTable)))
hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
vec.batch <- rainbow(nlevels(batch), start=0, end=.8)
batch.color <- rep(0, length(batch))
for (i in 1:length(batch)) {
batch.color[i] <- vec.batch[batch[i]==levels(batch)]
}
vec.condition <- c("mediumorchid4", "cornflowerblue", "gold", "green3", "coral")
condition.color <- rep(0, length(condition))
for (i in 1:length(condition)) {
condition.color[i] <- vec.condition[condition[i]==levels(condition)]
}
heatmap <- heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(11,11), ColSideColors=condition.color,
RowSideColors=batch.color, key="FALSE", srtCol=45)
#I would like to create a PCA plot and heatmap to reflect the use of batch in the limma model
#Specify model
mod <- model.matrix(~batch)
#Use voom to determine weights for fitting the mean-variance trend for each gene
v <- voom(countsSubQ, mod)
#Fit the linear model for each gene
fit <- lmFit(v)
#Get the residual (i.e. everything except for the batch effect)
newData <- residuals(fit, v)
#Explore data for batch effects
#Compute principal components
s <- makeSVD(newData)
#Compute variance of each PC and how they correlate with batch and condition
pcRes(s$v, s$d, condition, batch)
## propVar cumPropVar cond.R2 batch.R2
## 1 52.52 52.52 97.16 0
## 2 13.73 66.25 78.69 0
## 3 9.09 75.34 79.68 0
## 4 5.75 81.09 21.45 0
## 5 4.93 86.02 13.54 0
## 6 3.58 89.60 17.97 0
## 7 2.56 92.16 19.05 0
## 8 2.04 94.20 11.38 0
## 9 1.93 96.13 36.76 0
## 10 1.47 97.60 3.75 0
## 11 1.27 98.87 14.08 0
## 12 1.12 99.99 6.48 0
## 13 0.00 99.99 0.00 100
## 14 0.00 99.99 0.00 100
# propVar cumPropVar cond.R2 batch.R2
#1 52.52 52.52 97.16 0
#2 13.73 66.25 78.69 0
#3 9.09 75.34 79.68 0
#4 5.75 81.09 21.45 0
#5 4.93 86.02 13.54 0
#6 3.58 89.60 17.97 0
#7 2.56 92.16 19.05 0
#8 2.04 94.20 11.38 0
#9 1.93 96.13 36.76 0
#10 1.47 97.60 3.75 0
#11 1.27 98.87 14.08 0
#12 1.12 99.99 6.48 0
#13 0.00 99.99 0.00 100
#14 0.00 99.99 0.00 100
#Plot PC1 vs. PC2, with black outlines and color fills
#Save as eps
condnum <- as.numeric(condition)
plotPC(s$v,
s$d,
col="black",
pch=ifelse(batch=="B", 25, ifelse(batch=="D", 21, ifelse(batch=="E", 22, ifelse(batch=="A", 24, 23)))),
bg=ifelse(condnum==1, "mediumorchid4", ifelse(condnum==2, "cornflowerblue", ifelse(condnum==3, "gold", ifelse(condnum==4, "green3", "coral")))), cex=2.6
)
legend(x=-0.48, y=-0.24, legend=c("metac", "amast4", "amast24", "amast48", "amast72"), pch=22, col=0,
pt.bg=c("mediumorchid4", "cornflowerblue", "gold", "green3", "coral"), pt.cex=2.6, bty="n")
text(s$v[,1], s$v[,2], colnames(countsTable), cex=.7, pos=4)
#View as a Euclidean distance heatmap
dists <- dist(t(newData))
mat <- as.matrix(dists)
rownames(mat) <- colnames(mat) <- with(design, paste(colnames(countsTable)))
hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
vec.batch <- rainbow(nlevels(batch), start=0, end=.8)
batch.color <- rep(0, length(batch))
for (i in 1:length(batch)) {
batch.color[i] <- vec.batch[batch[i]==levels(batch)]
}
vec.condition <- c("mediumorchid4", "cornflowerblue", "gold", "green3", "coral")
condition.color <- rep(0, length(condition))
for (i in 1:length(condition)) {
condition.color[i] <- vec.condition[condition[i]==levels(condition)]
}
heatmap <- heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(11,11), ColSideColors=condition.color,
RowSideColors=batch.color, key="FALSE", srtCol=45)
#############################
#############################
#DE analysis
#Quantile normalize counts
countsSubQ <- qNorm(counts)
#Specify model
mod = model.matrix(~0+condition+batch)
#View mean-variance trend
voom(countsSubQ, mod, plot=TRUE)
## An object of class "EList"
## $E
## metac.B.1 amast4.B.2 amast24.B.3 amast48.B.4 amast72.B.5
## LmjF.01.0010-1 7.113 6.130 5.969 5.958 5.934
## LmjF.01.0020-1 5.599 6.048 6.043 6.087 6.166
## LmjF.01.0030-1 5.117 5.231 5.721 6.149 6.094
## LmjF.01.0040-1 5.008 5.140 5.632 5.465 5.849
## LmjF.01.0050-1 7.415 6.845 6.640 6.595 6.950
## metac.D.6 amast4.D.7 amast24.D.8 amast48.D.9 amast72.D.10
## LmjF.01.0010-1 6.542 5.695 5.784 6.101 5.930
## LmjF.01.0020-1 5.588 6.000 6.037 5.919 6.035
## LmjF.01.0030-1 5.925 5.608 5.999 5.648 5.741
## LmjF.01.0040-1 4.559 4.875 5.109 5.483 5.450
## LmjF.01.0050-1 7.009 6.339 6.738 7.124 6.953
## metac.E.11 amast4.E.12 amast24.E.13 amast48.E.14
## LmjF.01.0010-1 6.784 6.006 5.920 5.870
## LmjF.01.0020-1 5.930 5.982 6.130 6.316
## LmjF.01.0030-1 5.888 5.692 5.820 6.004
## LmjF.01.0040-1 5.103 5.130 5.144 5.545
## LmjF.01.0050-1 7.254 6.639 7.117 6.812
## amast72.E.15
## LmjF.01.0010-1 5.820
## LmjF.01.0020-1 6.162
## LmjF.01.0030-1 6.060
## LmjF.01.0040-1 5.560
## LmjF.01.0050-1 6.833
## 8474 more rows ...
##
## $weights
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
## [1,] 39.19 34.48 34.01 34.77 34.04 38.80 32.35 31.74 32.71 31.78 38.96
## [2,] 30.51 33.89 34.44 34.77 34.90 29.65 33.12 33.75 34.12 34.24 31.85
## [3,] 28.51 26.85 30.97 31.95 32.32 29.99 28.37 32.35 33.28 33.61 31.28
## [4,] 22.45 24.43 27.50 29.97 31.42 18.82 20.50 23.45 25.99 27.51 20.99
## [5,] 38.74 38.45 39.05 39.06 39.17 38.93 38.23 38.94 38.96 39.08 38.53
## [,12] [,13] [,14] [,15]
## [1,] 33.09 32.54 33.43 32.59
## [2,] 34.93 35.44 35.74 35.85
## [3,] 29.70 33.50 34.35 34.63
## [4,] 22.90 25.97 28.48 29.97
## [5,] 38.61 39.11 39.13 39.21
## 8474 more rows ...
##
## $design
## conditionmetac conditionamast4 conditionamast24 conditionamast48
## 1 1 0 0 0
## 2 0 1 0 0
## 3 0 0 1 0
## 4 0 0 0 1
## 5 0 0 0 0
## conditionamast72 batchD batchE
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 1 0 0
## 10 more rows ...
##
## $targets
## lib.size
## metac.B.1 7548103
## amast4.B.2 7548074
## amast24.B.3 7548098
## amast48.B.4 7548094
## amast72.B.5 7548024
## 10 more rows ...
#Use voom to transform quantile-normalized count data to log2-counts per million, estimate mean-variance relationship
#and use m-v relationship to computer appropriate observational-level weights
v <- voom(countsSubQ, mod)
#Fit a linear model for each gene using the specified design contained in v
fit <- lmFit(v)
##metac v amast4##
#eBayes finds an F-statistic from the set of t-statistics for that gene
metac.amast4.contr.mat <- makeContrasts(metac_v_amast4=conditionamast4-conditionmetac, levels=v$design)
metac.amast4.fit <- contrasts.fit(fit, metac.amast4.contr.mat)
metac.amast4.eb <- eBayes(metac.amast4.fit)
metac.amast4.topTab <- topTable(metac.amast4.eb, coef="metac_v_amast4", number=nrow(v$E))
write.csv(file="csv/lmajor_metac_vs_amast4_mmusculus.csv", x=metac.amast4.topTab)
#View the list of DE genes
head(metac.amast4.topTab, n=3L)
## logFC AveExpr t P.Value adj.P.Val B
## LmjF.12.0940-1 2.810 10.85 14.54 4.900e-11 2.472e-07 15.40
## LmjF.12.1000-1 2.671 11.47 14.37 5.909e-11 2.472e-07 15.19
## LmjF.31.0350-1 -1.733 10.23 -13.98 9.146e-11 2.472e-07 14.76
# logFC AveExpr t P.Value adj.P.Val B
#LmjF.12.0940-1 2.810295 10.85477 14.54169 4.899879e-11 2.472428e-07 15.39693
#LmjF.12.1000-1 2.671061 11.47061 14.37061 5.908732e-11 2.472428e-07 15.18740
#LmjF.31.0350-1 -1.733422 10.22653 -13.97803 9.146495e-11 2.472428e-07 14.75628
#Limit list to genes with an adjusted p value < 0.05
metac.amast4.sigGenes <- metac.amast4.topTab[metac.amast4.topTab$adj.P.Val <0.05, ]
length(metac.amast4.sigGenes$logFC)
## [1] 2962
#2962
#Filter out rows with less than 2-fold change (log2 fold change of > 1)
metac.amast4.sigGenesFold1 <- subset(metac.amast4.sigGenes, abs(logFC) > 1)
length(metac.amast4.sigGenesFold1$logFC)
## [1] 299
#299
#Filter out rows with less than 4-fold change (log2 fold change of > 2)
metac.amast4.sigGenesFold2 <- subset(metac.amast4.sigGenes, abs(logFC) > 2)
length(metac.amast4.sigGenesFold2$logFC)
## [1] 18
#18
metac.amast4.sigGenes <- metac.amast4.sigGenes[order(-metac.amast4.sigGenes$logFC), ]
#Make an MA plot
sel = metac.amast4.topTab$adj.P.Val < 0.05
top = metac.amast4.topTab
sub = paste("No. of sig. genes: ", sum(sel),"/",length(sel))
cpm = v$E
plot(rowMeans(cpm[rownames(top),]), top$logFC, pch=16, cex=0.5,col="darkgrey",
main="metac_amast4 model batch adjusted",
ylab="log FC", xlab="Average Expression",
ylim=c(-3,3), sub=sub)
points(rowMeans(cpm[rownames(top),])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1,0,1), col="red")
##amast4 v amast24##
##eBayes finds an F-statistic from the set of t-statistics for that gene
amast4.amast24.contr.mat <- makeContrasts(amast4_v_amast24=conditionamast24-conditionamast4, levels=v$design)
amast4.amast24.fit <- contrasts.fit(fit, amast4.amast24.contr.mat)
amast4.amast24.eb <- eBayes(amast4.amast24.fit)
amast4.amast24.topTab <- topTable(amast4.amast24.eb, coef="amast4_v_amast24", number=nrow(v$E))
#View the list of DE genes
head(amast4.amast24.topTab, n=3L)
## logFC AveExpr t P.Value adj.P.Val B
## LmjF.12.0920-1 2.103 8.008 14.28 6.537e-11 2.666e-07 14.23
## LmjF.12.1040-1 2.035 7.566 14.06 8.299e-11 2.666e-07 14.02
## LmjF.12.1020-1 2.148 7.649 13.85 1.053e-10 2.666e-07 13.83
#Limit list to genes with an adjusted p value < 0.05
amast4.amast24.sigGenes <- amast4.amast24.topTab[amast4.amast24.topTab$adj.P.Val <0.05, ]
length(amast4.amast24.sigGenes$logFC)
## [1] 301
#301
#281 before metac included
#Filter out rows with less than 2-fold change (log2 fold change of > 1)
amast4.amast24.sigGenesFold1 <- subset(amast4.amast24.sigGenes, abs(logFC) > 1)
length(amast4.amast24.sigGenesFold1$logFC)
## [1] 34
#34
#27 before metac included
#Filter out rows with less than 4-fold change (log2 fold change of > 2)
amast4.amast24.sigGenesFold2 <- subset(amast4.amast24.sigGenes, abs(logFC) > 2)
length(amast4.amast24.sigGenesFold2$logFC)
## [1] 6
#6
#1 before metac included
amast4.amast24.sigGenes <- amast4.amast24.sigGenes[order(-amast4.amast24.sigGenes$logFC), ]
#Make an MA plot
sel = amast4.amast24.topTab$adj.P.Val < 0.05
top = amast4.amast24.topTab
sub = paste("No. of sig. genes: ", sum(sel),"/",length(sel))
cpm = v$E
plot(rowMeans(cpm[rownames(top),]), top$logFC, pch=16, cex=0.5,col="darkgrey",
main="amast4_amast24 model batch adjusted",
ylab="log FC", xlab="Average Expression",
ylim=c(-3,3), sub=sub)
points(rowMeans(cpm[rownames(top),])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1,0,1), col="red")
##amast24 v amast48##
#eBayes finds an F-statistic from the set of t-statistics for that gene
amast24.amast48.contr.mat <- makeContrasts(amast24_v_amast48=conditionamast48-conditionamast24, levels=v$design)
amast24.amast48.fit <- contrasts.fit(fit, amast24.amast48.contr.mat)
amast24.amast48.eb <- eBayes(amast24.amast48.fit)
amast24.amast48.topTab <- topTable(amast24.amast48.eb, coef="amast24_v_amast48", number=nrow(v$E))
#View the list of DE genes
head(amast24.amast48.topTab, n=3L)
## logFC AveExpr t P.Value adj.P.Val B
## LmjF.16.0500-1 0.9526 6.184 5.831 1.985e-05 0.05165 2.881
## LmjF.36.2350-1 1.6293 8.059 5.612 3.083e-05 0.05165 2.513
## LmjF.31.1700-1 0.6852 7.338 5.528 3.662e-05 0.05165 2.379
# logFC AveExpr t P.Value adj.P.Val B
#LmjF.16.0500-1 0.9526423 6.184427 5.831439 1.985098e-05 0.05165267 2.881463
#LmjF.36.2350-1 1.6292766 8.058532 5.612429 3.082699e-05 0.05165267 2.513140
#LmjF.31.1700-1 0.6852255 7.338441 5.527579 3.661813e-05 0.05165267 2.379194
#Limit list to genes with an adjusted p value < 0.05
amast24.amast48.sigGenes <- amast24.amast48.topTab[amast24.amast48.topTab$adj.P.Val <0.05, ]
length(amast24.amast48.sigGenes$logFC)
## [1] 0
#0
#0 before metac included
#Make an MA plot
sel = amast24.amast48.topTab$adj.P.Val < 0.05
top = amast24.amast48.topTab
sub = paste("No. of sig. genes: ", sum(sel),"/",length(sel))
cpm = v$E
plot(rowMeans(cpm[rownames(top),]), top$logFC, pch=16, cex=0.5,col="darkgrey",
main="amast24_amast48 model batch adjusted",
ylab="log FC", xlab="Average Expression",
ylim=c(-3,3), sub=sub)
points(rowMeans(cpm[rownames(top),])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1,0,1), col="red")
##amast48 v amast72##
#eBayes finds an F-statistic from the set of t-statistics for that gene
amast48.amast72.contr.mat <- makeContrasts(amast48_v_amast72=conditionamast72-conditionamast48, levels=v$design)
amast48.amast72.fit <- contrasts.fit(fit, amast48.amast72.contr.mat)
amast48.amast72.eb <- eBayes(amast48.amast72.fit)
amast48.amast72.topTab <- topTable(amast48.amast72.eb, coef="amast48_v_amast72", number=nrow(v$E))
#View the list of DE genes
head(amast48.amast72.topTab, n=3L)
## logFC AveExpr t P.Value adj.P.Val B
## LmjF.36.5250-1 0.8051 7.689 5.757 2.306e-05 0.1273 0.8850
## LmjF.15.0440-5 1.1439 7.165 5.625 3.004e-05 0.1273 0.8114
## LmjF.05.0240-1 0.5681 6.693 5.272 6.179e-05 0.1747 0.5072
# logFC AveExpr t P.Value adj.P.Val B
#LmjF.36.5250-1 0.8051206 7.688751 5.756536 2.305982e-05 0.1273414 0.8849812
#LmjF.15.0440-5 1.1439317 7.165058 5.625265 3.003690e-05 0.1273414 0.8114074
#LmjF.05.0240-1 0.5680832 6.693406 5.272282 6.179471e-05 0.1746524 0.5072436
#Limit list to genes with an adjusted p value < 0.05
amast48.amast72.sigGenes <- amast48.amast72.topTab[amast48.amast72.topTab$adj.P.Val <0.05, ]
length(amast48.amast72.sigGenes$logFC)
## [1] 0
#0
#0 before metac included
#Make an MA plot
sel = amast48.amast72.topTab$adj.P.Val < 0.05
top = amast48.amast72.topTab
sub = paste("No. of sig. genes: ", sum(sel),"/",length(sel))
cpm = v$E
plot(rowMeans(cpm[rownames(top),]), top$logFC, pch=16, cex=0.5,col="darkgrey",
main="amast48_amast72 model batch adjusted",
ylab="log FC", xlab="Average Expression",
ylim=c(-3,3), sub=sub)
points(rowMeans(cpm[rownames(top),])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1,0,1), col="red")