Libraries

library(devtools)
library(tximport)
library(biomaRt)
library(hpgltools)
library(DESeq2)
library(gplots)
library(cbcbSEQ)
library(RColorBrewer)
library(Vennerable)
library(edgeR)
library(calibrate)
library(scales)

Set working directory, import metadata and abundance files

design <- read.table('MetaData only 4 hour.txt', header=T, sep='\t')
files <- file.path("kallisto abundance files/", design$HPGL.Identifier, "abundance.tsv")
names(files) <- paste0("HPGL09", c(12:31, 42:60))

Convert transcript ID to gene ID

ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
tx2gene <- getBM(attributes=c("ensembl_transcript_id", "ensembl_gene_id", "chromosome_name"), mart=ensembl)
## Cache found
good_idx <- grepl(x=tx2gene[["chromosome_name"]], pattern="^[[:alnum:]]{1,2}$")
good_ones <- tx2gene[good_idx, -3]

Create counts table

txi.kallisto.tsv <- tximport(files, type="kallisto", tx2gene=good_ones, countsFromAbundance="lengthScaledTPM")
## Note: importing `abundance.h5` is typically faster than `abundance.tsv`
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 
## transcripts missing from tx2gene: 14613
## summarizing abundance
## summarizing counts
## summarizing length
nrow(txi.kallisto.tsv$counts)
## [1] 34431
write.table(txi.kallisto.tsv$counts, "TPM_MvGM_CDS_20180711.txt", col.names=T, row.names=F, quote=F)
write.csv(txi.kallisto.tsv$counts, "TPM_MvGM_20180711.csv", row.names=T, quote=F)

Create DESeqDataSet

Not sure what this is actually doing

df <- data.frame(condition=paste(design$HPGL.Identifier, design$Growth, design$Stimulation, design$Patient, sep="_"))
rownames(df)=colnames(txi.kallisto.tsv$counts)
dds <- DESeqDataSetFromTximport(txi.kallisto.tsv, df, ~condition)
## using just counts from tximport
nrow(dds)
## [1] 34431

Bar Plot of Counts

par(mar=c(10, 4.5, 3.5, 1))
par(oma=c(0, 0, 0, 0))
barplot(colSums(txi.kallisto.tsv$counts), las=3, main='Raw Counts By Sample')

Box Plot of Counts

dds <- estimateSizeFactors(dds)
ncts <- counts(dds, normalized=TRUE)
y <- log(ncts + 1)

par(mar=c(5, 5, 2, 1))
par(oma=c(0, 0, 0, 0))
boxplot(y, names=colnames(txi.kallisto.tsv$counts), las=3, main='Per Sample Log of Size-factor Counts')

Heatmap of Pearson Correlation

datCor <- cor(txi.kallisto.tsv$counts)
heatmap.2(datCor, Rowv=NA, Colv=NA,
          margins=c(10, 10),
          labRow=df$condition,
          labCol=df$condition,
          dendrogram="none",
          scale="none",
          trace="none",
          srtCol=45, main='Pearson Correlation')

Median Pairwise Correlation

corM <- matrixStats::rowMedians(cor(txi.kallisto.tsv$counts))
qs <- quantile(corM, p=c(1, 3)/4)
iqr <- diff(qs)
outLimit <- qs[1] - 1.5 * iqr
ylim <- c(pmin(min(corM), outLimit), max(corM))
cond <- paste(design$Growth, design$Stimulation, sep="_")
col <- ifelse(cond=="M_NS", "gray60",
              ifelse(cond=="M_LPS", "deeppink3",
                     ifelse(cond=="M_LA", "darkseagreen2",
                            ifelse(cond=="M_LP", "lavender",
                                   ifelse(cond=="GM_NS", "lightpink1",
                                          ifelse(cond=="GM_LPS", "moccasin",
                                                 ifelse(cond=="GM_LA", "black",
                                                        ifelse(cond=="GM_LP", "springgreen4",
                                                               "blue2"))))))))
par(mar=c(5, 4.5, 2, 1))
plot(corM, xaxt="n", ylim=ylim, ylab="Median Pairwise Correlation", xlab="", main="", col=col, pch=16, cex=1.5)
axis(side=1, at=seq(along=corM), labels=colnames(txi.kallisto.tsv$counts), las=2)
abline(h=outLimit, lty=2)
abline(v=1:ncol(txi.kallisto.tsv$counts), lty=3, col="black")

Filter and Normalize Counts

filterCounts = function (counts, lib.size = NULL, thresh = 1, minSamples = 2) {
  cpms <- 2^log2CPM(counts, lib.size = lib.size)$y
  keep <- rowSums(cpms > thresh) >= minSamples
  counts <- counts[keep, ]
  counts
}
x <- table(design$Stimulation)
dim(txi.kallisto.tsv$counts)
## [1] 34431    39
counts <- filterCounts(txi.kallisto.tsv$counts, thresh=1, minSamples=min(x))
dim(counts)
## [1] 12858    39
countsSubQ <- qNorm(counts)
x <- log2CPM(countsSubQ)
s <- makeSVD(x$y)

PCA

pcRes(s$v, s$d, cond, design$Batch)[1:5, ]
##   propVar cumPropVar cond.R2 batch.R2
## 1   28.82      28.82   69.66    25.21
## 2   13.93      42.75   41.92    44.39
## 3   10.96      53.71   73.10    18.68
## 4    7.39      61.10    9.06    75.08
## 5    5.80      66.90    5.43    78.47
pcRes(s$v, s$d, cond, design$Patient)[1:5, ]
##   propVar cumPropVar cond.R2 batch.R2
## 1   28.82      28.82   69.66    30.95
## 2   13.93      42.75   41.92    46.07
## 3   10.96      53.71   73.10    20.98
## 4    7.39      61.10    9.06    80.68
## 5    5.80      66.90    5.43    91.16

Plot PC1 v PC2

condnum <- as.numeric(design$Stimulation)
condnum <- ifelse(condnum==4, "green",
                  ifelse(condnum==3, "lightblue",
                         ifelse(condnum==2, "pink",
                                ifelse(condnum==1, "purple", "black"))))
patnum <- as.numeric(design$Patient)
patnum <- ifelse(patnum==4, 21,
                 ifelse(patnum==3, 22,
                        ifelse(patnum==2, 23,
                               ifelse(patnum==1, 24,
                                      ifelse(patnum==5, 25, 1)))))
plotPC(s$v, s$d, col="black", pch=patnum, bg=condnum)

legend(x=0.2, y=0.2, legend=unique(design$Stimulation), pch=22, col=0,
       pt.bg=c("green","lightblue","pink","purple"), pt.cex=1.5, cex=0.5, bty="n")
legend(x=0.2, y=-0.1, legend=unique(design$Patient), pch=unique(patnum), col=0,
       pt.bg="gray90", pt.cex=1.5, cex=0.5, bty="n")

my_design <- design
rownames(my_design) <- my_design[[1]]
colnames(my_design) <- tolower(colnames(my_design))
my_design[["condition"]] <- my_design[["stimulation"]]
my_design[["batch"]] <- my_design[["patient"]]
data <- x[["y"]]

another <- plot_pca(data=data, design=my_design)
## Warning in RColorBrewer::brewer.pal(12, "Dark2"): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors

Euclidian Distance Heat Map

dists <- dist(t(counts))
mat <- as.matrix(dists)
rownames(mat) <- colnames(mat) <- cond
hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
vec.patient <- rainbow(nlevels(design$Patient), start=0, end=.8)
patient.color <- rep(0, length(design$Patient))
for (i in 1:length(design$Patient))
  patient.color[i] <- vec.patient[design$Patient[i]==levels(design$Patient)]
vec.condition <- c("green", "lightblue", "pink", "purple")
condition.color <- rep(0, length(design$Stimulation))
for (i in 1:length(design$Stimulation))
  condition.color[i] <- vec.condition[design$Stimulation[i]==levels(design$Stimulation)]

heatmap <- heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(11, 11), ColSideColors=condition.color,
                     RowSideColors=patient.color, key="FALSE", srtCol=45)

Correct for Patient in Limma model

mod <- model.matrix(~design$Patient)
v <- voom(countsSubQ, mod)
fit <- lmFit(v)
newData <- residuals(fit, v)
s <- makeSVD(newData)

pcRes(s$v, s$d, cond, design$Batch)[1:5, ]
##   propVar cumPropVar cond.R2 batch.R2
## 1   36.04      36.04   95.47     5.38
## 2   17.81      53.85   91.87     6.83
## 3    8.05      61.90   29.00    25.52
## 4    5.39      67.29   78.12    21.01
## 5    3.06      70.35    9.63     8.75
pcRes(s$v, s$d, cond, design$Patient)[1:5, ]
##   propVar cumPropVar cond.R2 batch.R2
## 1   36.04      36.04   95.47        0
## 2   17.81      53.85   91.87        0
## 3    8.05      61.90   29.00        0
## 4    5.39      67.29   78.12        0
## 5    3.06      70.35    9.63        0

Plot PC1 and PC2 with patient correction

gronum <- as.numeric(design$Growth)
gronum <- ifelse(gronum==2, 19,
                 ifelse(gronum==1, 15, 1))
patnum <- as.numeric(design$Patient)
patnum <- ifelse(patnum==1, "pink",
                ifelse(patnum==2, "green",
                        ifelse(patnum==3, "blue",
                              ifelse(patnum==4, "yellow",
                                      ifelse(patnum==5, "black", "orange")))))
samplenum <- as.numeric(design$Stimulation)
samplenum <- ifelse(samplenum==4, "green",
                ifelse(samplenum==3, "blue",
                        ifelse(samplenum==1, "black",
                              ifelse(samplenum==2, "yellow", "grey"))))
plotPC(s$v, s$d, pch=gronum, col=samplenum, cex=2)

legend(x=0.05, y=0.3, legend=c("NS", "LPS", "LP", "LA"), pch=22, col=0,
       pt.bg=c("green", "blue", "black", "yellow"), pt.cex=1.5, cex=1, bty="n")
legend(x=0.05, y=0.1, legend=unique(design$Growth), pch=unique(gronum), col="black",
       pt.bg="black", pt.cex=1.0, cex=1, bty="n")

another_v2 <- plot_pca(data=newData, design=my_design)
## Warning in RColorBrewer::brewer.pal(12, "Dark2"): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors
another_v2$plot

Heatmap with patient correction??

dists <- dist(t(newData))
mat <- as.matrix(dists)
rownames(mat) <- colnames(mat) <- cond
heatmap <- heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(11, 11), ColSideColors=condition.color,
                     RowSideColors=patient.color, key="FALSE", srtCol=45)

DE analysis

countsSubQ <- qNorm(counts)
patient <- design$Patient
mod <- model.matrix(~0+cond+patient, data=design)
voom(countsSubQ, mod, plot=TRUE)

## An object of class "EList"
## $E
##                  HPGL0912   HPGL0913  HPGL0914  HPGL0915   HPGL0916  HPGL0917
## ENSG00000000419  5.122947  5.1270568  5.024619  4.473779  4.7167417  5.045765
## ENSG00000000457  3.298952  3.1666572  3.050232  3.024500  3.1611143  3.545003
## ENSG00000000460  2.399415  2.5290360  2.601569  2.435568  2.6406067  2.059072
## ENSG00000000938 10.041971  9.2476291  9.983340  9.741007  9.3622950  9.819416
## ENSG00000000971 -2.495677 -0.7789675 -1.031357 -1.765510 -0.1931129 -1.383149
##                  HPGL0918  HPGL0919   HPGL0920  HPGL0921   HPGL0922 HPGL0923
## ENSG00000000419 5.2660078  5.299251  4.6931386  4.855997  5.1076756 5.370218
## ENSG00000000457 4.0989374  3.419582  3.6972710  4.106408  3.6040736 3.993299
## ENSG00000000460 1.6421080  2.213396  1.9492232  1.795535  2.6303621 2.609675
## ENSG00000000938 8.8113183  9.567086  9.2411743  9.073456  9.4465028 8.449657
## ENSG00000000971 0.4631257 -2.026586 -0.7620683 -1.940476 -0.4053762 1.069277
##                   HPGL0924   HPGL0925   HPGL0926   HPGL0927  HPGL0928  HPGL0929
## ENSG00000000419  5.2902881  4.8084011  5.3797921  5.0693210 5.3190135  4.968390
## ENSG00000000457  3.3724325  3.7876025  3.9403856  3.8096880 3.8024362  3.782917
## ENSG00000000460  2.2422507  2.5160580  1.9281870  2.4927624 2.1826216  2.360953
## ENSG00000000938  9.6454386  8.8593722  8.4518594  9.4225328 8.4160319  9.485652
## ENSG00000000971 -0.1317729 -0.0409803 -0.5388732 -0.1522648 0.6414257 -1.201507
##                  HPGL0930   HPGL0931  HPGL0942  HPGL0943  HPGL0944  HPGL0945
## ENSG00000000419 4.9496118  4.7623073  5.102506  5.085227  4.874986  4.821552
## ENSG00000000457 4.0062619  4.3876971  3.224322  3.306427  3.496547  3.745348
## ENSG00000000460 2.0858879  2.1599903  2.750830  2.455631  3.031471  2.247731
## ENSG00000000938 8.5774052  8.2823716 10.605055 10.368233 10.030512 10.403797
## ENSG00000000971 0.3219542 -0.5602922 -1.046947 -2.082412  1.534188  1.090770
##                   HPGL0946 HPGL0947   HPGL0948 HPGL0949 HPGL0950   HPGL0951
## ENSG00000000419  5.2504270 5.259219  5.0182394 5.253533 5.071247  5.4802737
## ENSG00000000457  3.5212117 3.316758  3.2636573 3.172614 3.587398  3.5583401
## ENSG00000000460  2.3072570 1.611214  2.3985966 2.789688 2.524471  2.6507018
## ENSG00000000938 10.3682327 9.790238 10.3477295 9.189028 9.698073 10.2340652
## ENSG00000000971 -0.2224197 1.582295 -0.3171797 2.535629 2.737828 -0.1407236
##                 HPGL0952  HPGL0953 HPGL0954 HPGL0955   HPGL0956 HPGL0957
## ENSG00000000419 5.276192  5.194370 5.100043 5.218377  5.5547019 5.241558
## ENSG00000000457 3.551739  3.364224 3.631766 3.900960  3.6441932 3.645204
## ENSG00000000460 2.332391  2.304077 2.944163 2.609675  2.1551877 1.914653
## ENSG00000000938 9.430261 10.179109 9.337255 9.247627 10.2083643 9.380096
## ENSG00000000971 2.251412 -2.233885 1.842173 2.605888 -0.6588511 2.057933
##                  HPGL0958 HPGL0959 HPGL0960
## ENSG00000000419  5.194370 5.326119 5.215988
## ENSG00000000457  3.364224 3.834283 3.953882
## ENSG00000000460  2.304077 3.124578 2.645245
## ENSG00000000938 10.179109 9.012222 9.083070
## ENSG00000000971 -2.233885 2.871823 3.217145
## 12853 more rows ...
## 
## $weights
##            [,1]      [,2]       [,3]      [,4]      [,5]      [,6]      [,7]
## [1,] 19.1059000 19.258043 18.7306002 17.801781 18.134656 19.730621 19.870590
## [2,] 10.0686874 10.557347  9.3679651 10.322597 11.535487 12.996538 13.503159
## [3,]  8.0202059  7.010903  7.7570491  8.713326  7.608745  6.087962  5.333521
## [4,] 22.3245686 22.798531 22.3502695 22.758846 22.787224 22.528289 23.007849
## [5,]  0.9630185  2.058562  0.7704206  1.865260  1.853443  1.009789  2.173354
##            [,8]      [,9]     [,10]     [,11]     [,12]     [,13]     [,14]
## [1,] 19.3574504 18.448827 18.777358 20.388771 20.526170 20.045140 19.178730
## [2,] 12.2387548 13.253788 14.517731 12.837533 13.342036 12.084195 13.096368
## [3,]  5.8931482  6.614489  5.783267  7.523290  6.579483  7.277250  8.182506
## [4,] 22.5562332 22.971908 22.997609 22.720587 23.161967 22.749266 23.134740
## [5,]  0.8062579  1.967092  1.954514  1.583556  3.684342  1.242177  3.294770
##          [,15]     [,16]     [,17]     [,18]     [,19]     [,20]     [,21]
## [1,] 19.493027 19.646916 19.796030 19.275722 18.364469 18.691121 19.614978
## [2,] 14.353982 13.865867 14.374896 13.107199 14.124509 15.400927 11.496517
## [3,]  7.137670  7.080653  6.197931  6.851029  7.701533  6.720204  8.183313
## [4,] 23.154211 22.799311 23.209435 22.828122 23.188854 23.203608 22.026152
## [5,]  3.271077  1.469362  3.370888  1.155981  3.019060  2.997721  1.666861
##          [,22]     [,23]     [,24]     [,25]     [,26]     [,27]     [,28]
## [1,] 19.244538 18.332296 18.658230 20.301537 20.446349 19.959742 19.087208
## [2,] 10.759964 11.753832 13.008774 11.125048 11.619008 10.396805 11.378765
## [3,]  7.916568  8.889067  7.764450  7.317115  6.401535  7.077906  7.959203
## [4,] 22.050056 22.420407 22.446872 22.218989 22.678985 22.243941 22.639588
## [5,]  1.304883  3.498360  3.473078  2.775562  6.771951  2.117703  6.093439
##          [,29]     [,30]     [,31]     [,32]     [,33]     [,34]     [,35]
## [1,] 19.399628 20.627465 20.751759 20.305567 19.458187 19.772874 20.818403
## [2,] 12.619953 12.184772 12.692794 11.438331 12.442525 13.705569 12.590627
## [3,]  6.943077  8.197591  7.166980  7.930801  8.904568  7.778342  7.677800
## [4,] 22.667759 22.319107 22.792388 22.344713 22.752717 22.781085 22.379319
## [5,]  6.051935  2.310816  5.630274  1.782177  5.059937  5.024486  2.514568
##          [,36]     [,37]     [,38]     [,39]
## [1,] 20.944138 20.520953 19.697478 19.988184
## [2,] 13.103076 11.841095 12.859185 14.110883
## [3,]  6.714326  7.428369  8.347636  7.285165
## [4,] 22.858414 22.405679 22.819376 22.847500
## [5,]  6.135406  1.930129  5.522918  5.485154
## 12853 more rows ...
## 
## $design
##   condGM_LA condGM_LP condGM_LPS condGM_NS condM_LA condM_LP condM_LPS condM_NS
## 1         0         0          0         0        0        0         0        1
## 2         0         0          0         0        0        0         0        1
## 3         0         0          0         0        0        0         0        1
## 4         0         0          0         0        0        0         0        1
## 5         0         0          0         0        0        0         0        1
##   patientH2 patientH3 patientH4 patientH5
## 1         0         0         0         0
## 2         1         0         0         0
## 3         0         1         0         0
## 4         0         0         1         0
## 5         0         0         0         1
## 34 more rows ...
## 
## $targets
##          lib.size
## HPGL0912 20054310
## HPGL0913 20054287
## HPGL0914 20054287
## HPGL0915 20054311
## HPGL0916 20054294
## 34 more rows ...
v <- voom(countsSubQ, mod)
fit <- lmFit(v)

M_LPS v M_NS

eBayes finds an F-statistic from the set of t-statistics for that gene

M_LPS.M_NS.contr.mat <- makeContrasts(M_LPSvM_NS=(condM_LPS-condM_NS), levels=v$design)
M_LPS.M_NS.fit <- contrasts.fit(fit, M_LPS.M_NS.contr.mat)
M_LPS.M_NS.eb <- eBayes(M_LPS.M_NS.fit)
M_LPS.M_NS.topTab <- topTable(M_LPS.M_NS.eb, coef="M_LPSvM_NS", number=nrow(v$E))
M_LPS.M_NS.topTab <- cbind(rownames(M_LPS.M_NS.topTab), M_LPS.M_NS.topTab)
colnames(M_LPS.M_NS.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LPS.M_NS.topTab) <- c(1:nrow(M_LPS.M_NS.topTab))

write.csv(M_LPS.M_NS.topTab, file="topTab_M_LPSvM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

Limit list to genes with an adjusted p value < 0.05

M_LPS.M_NS.sigGenes <- M_LPS.M_NS.topTab[M_LPS.M_NS.topTab$adj.P.Val <0.05, ]
length(M_LPS.M_NS.sigGenes$ID)
## [1] 4840

Filter out rows with less than 2-fold change (log2 fold change of > 1)

M_LPS.M_NS.sigGenesFold1 <- subset(M_LPS.M_NS.sigGenes, abs(logFC) > 1)
length(M_LPS.M_NS.sigGenesFold1$ID)
## [1] 1350

33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M35;17M35;17M35;17M35;17M35;17M35;17MFilter out rows with less than 4-fold change (log2 fold change of > 2)

M_LPS.M_NS.sigGenesFold2 <- subset(M_LPS.M_NS.sigGenes, abs(logFC) > 2)
length(M_LPS.M_NS.sigGenesFold2$ID)
## [1] 431

Make an MA plot

sel <- M_LPS.M_NS.topTab$adj.P.Val < 0.05
top <- M_LPS.M_NS.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="M_LPSvM_NS adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

Annotate sigGenes list using Biomart

M_LPS.M_NS.sigGenes <- M_LPS.M_NS.sigGenes[order(-M_LPS.M_NS.sigGenes$logFC), ]

sigGenes <- M_LPS.M_NS.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id", values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")

desc$Description <- gsub(",", "", desc$Description)

DEG_LPS <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG_LPS <- subset(DEG_LPS, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG_LPS <- DEG_LPS[order(-DEG_LPS$logFC), ]

##Filter out Genes -1<FC<1
DEG_LPS <- subset(DEG_LPS, abs(DEG_LPS$logFC)>1)

##Save DE genes
write.table(DEG_LPS, "DEG_M_LPSvM_NS_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG_LPS, "DEG_M_LPSvM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

M_LPS v M_LA

#eBayes finds an F-statistic from the set of t-statistics for that gene
M_LPS.M_LA.contr.mat <- makeContrasts(M_LPSvM_LA=((condM_LA-condM_NS)-(condM_LPS-condM_NS)), levels=v$design)
M_LPS.M_LA.fit <- contrasts.fit(fit, M_LPS.M_LA.contr.mat)
M_LPS.M_LA.eb <- eBayes(M_LPS.M_LA.fit)
M_LPS.M_LA.topTab <- topTable(M_LPS.M_LA.eb, coef="M_LPSvM_LA", number=nrow(v$E))
M_LPS.M_LA.topTab <- cbind(rownames(M_LPS.M_LA.topTab), M_LPS.M_LA.topTab)
colnames(M_LPS.M_LA.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LPS.M_LA.topTab) <- c(1:nrow(M_LPS.M_LA.topTab))

write.csv(M_LPS.M_LA.topTab, file="topTab_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.csv", row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
M_LPS.M_LA.sigGenes <- M_LPS.M_LA.topTab[M_LPS.M_LA.topTab$adj.P.Val <0.05, ]
length(M_LPS.M_LA.sigGenes$ID)
## [1] 955
##[1] 617

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LPS.M_LA.sigGenesFold1 <- subset(M_LPS.M_LA.sigGenes, abs(logFC) > 1)
length(M_LPS.M_LA.sigGenesFold1$ID)
## [1] 256
##[1] 240

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
M_LPS.M_LA.sigGenesFold2 <- subset(M_LPS.M_LA.sigGenes, abs(logFC) > 2)
length(M_LPS.M_LA.sigGenesFold2$ID)
## [1] 57
##[1] 69

##Make an MA plot
sel <- M_LPS.M_LA.topTab$adj.P.Val < 0.05
top <- M_LPS.M_LA.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="M_LPSvM_LA adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.pdf", width=8, height=8)
## pdf 
##   3
dev.off()
## png 
##   2
dev.copy(png, "MAplot_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.png", width=700,
         height=700)
## png 
##   3
dev.off()
## png 
##   2
M_LPS.M_LA.sigGenes <- M_LPS.M_LA.sigGenes[order(-M_LPS.M_LA.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- M_LPS.M_LA.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]

##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)

##Save DE genes
write.table(DEG, "DEG_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.csv", row.names=F, quote=F)

M_LPS v M_LP

#eBayes finds an F-statistic from the set of t-statistics for that gene
M_LPS.M_LP.contr.mat <- makeContrasts(M_LPSvM_LP=((condM_LP-condM_NS)-(condM_LPS-condM_NS)), levels=v$design)
M_LPS.M_LP.fit <- contrasts.fit(fit, M_LPS.M_LP.contr.mat)
M_LPS.M_LP.eb <- eBayes(M_LPS.M_LP.fit)
M_LPS.M_LP.topTab <- topTable(M_LPS.M_LP.eb, coef="M_LPSvM_LP", number=nrow(v$E))
M_LPS.M_LP.topTab <- cbind(rownames(M_LPS.M_LP.topTab), M_LPS.M_LP.topTab)
colnames(M_LPS.M_LP.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LPS.M_LP.topTab) <- c(1:nrow(M_LPS.M_LP.topTab))

write.csv(M_LPS.M_LP.topTab, file="topTab_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.csv",
          row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
M_LPS.M_LP.sigGenes <- M_LPS.M_LP.topTab[M_LPS.M_LP.topTab$adj.P.Val <0.05, ]
length(M_LPS.M_LP.sigGenes$ID)
## [1] 1993
##[1] 1473

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LPS.M_LP.sigGenesFold1 <- subset(M_LPS.M_LP.sigGenes, abs(logFC) > 1)
length(M_LPS.M_LP.sigGenesFold1$ID)
## [1] 489
##[1] 467

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
M_LPS.M_LP.sigGenesFold2 <- subset(M_LPS.M_LP.sigGenes, abs(logFC) > 2)
length(M_LPS.M_LP.sigGenesFold2$ID)
## [1] 91
##[1] 125

##Make an MA plot
sel <- M_LPS.M_LP.topTab$adj.P.Val < 0.05
top <- M_LPS.M_LP.topTab
sub <- paste("No. of sig. genes: ", sum(sel),"/",length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="M_LPSvM_LP adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.pdf", width=8, height=8)
## pdf 
##   3
dev.off()
## png 
##   2
dev.copy(png, "MAplot_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.png", width=700,
         height=700)
## png 
##   3
dev.off()
## png 
##   2
M_LPS.M_LP.sigGenes <- M_LPS.M_LP.sigGenes[order(-M_LPS.M_LP.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- M_LPS.M_LP.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]

##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)

##Save DE genes
write.table(DEG, "DEG_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.csv", row.names=F, quote=F)

M_LP v M_NS

##eBayes finds an F-statistic from the set of t-statistics for that gene
M_LP.M_NS.contr.mat <- makeContrasts(M_LPvM_NS=(condM_LP-condM_NS), levels=v$design)
M_LP.M_NS.fit <- contrasts.fit(fit, M_LP.M_NS.contr.mat)
M_LP.M_NS.eb <- eBayes(M_LP.M_NS.fit)
M_LP.M_NS.topTab <- topTable(M_LP.M_NS.eb, coef="M_LPvM_NS", number=nrow(v$E))
M_LP.M_NS.topTab <- cbind(rownames(M_LP.M_NS.topTab), M_LP.M_NS.topTab)
colnames(M_LP.M_NS.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LP.M_NS.topTab) <- c(1:nrow(M_LP.M_NS.topTab))

write.csv(M_LP.M_NS.topTab, file="topTab_M_LPvM_NS_CDS_limmabatchcorrection_20171018.csv", row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
M_LP.M_NS.sigGenes <- M_LP.M_NS.topTab[M_LP.M_NS.topTab$adj.P.Val <0.05, ]
length(M_LP.M_NS.sigGenes$ID)
## [1] 4603
##[1] 4386

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LP.M_NS.sigGenesFold1 <- subset(M_LP.M_NS.sigGenes, abs(logFC) > 1)
length(M_LP.M_NS.sigGenesFold1$ID)
## [1] 1422
##[1] 1455

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
M_LP.M_NS.sigGenesFold2 <- subset(M_LP.M_NS.sigGenes, abs(logFC) > 2)
length(M_LP.M_NS.sigGenesFold2$ID)
## [1] 448
##[1] 519

##Make an MA plot
sel <- M_LP.M_NS.topTab$adj.P.Val < 0.05
top <- M_LP.M_NS.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="M_LPvM_NS adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID,])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_M_LPvM_NS_CDS_limmabatchcorrection_20171018.pdf", width=8, height=8)
## pdf 
##   3
dev.off()
## png 
##   2
dev.copy(png, "MAplot_M_LPvM_NS_CDS_limmabatchcorrection_20171018.png", width=700, height=700)
## png 
##   3
dev.off()
## png 
##   2
M_LP.M_NS.sigGenes <- M_LP.M_NS.sigGenes[order(-M_LP.M_NS.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- M_LP.M_NS.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG_LP4 <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG_LP4 <- subset(DEG_LP4, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG_LP4 <- DEG_LP4[order(-DEG_LP4$logFC), ]

##Filter out Genes -1<FC<1
DEG_LP4 <- subset(DEG_LP4, abs(DEG_LP4$logFC)>1)

##Save DE genes
write.table(DEG_LP4, "DEG_M_LPvM_NS_CDS_limmabatchcorrection_20171018.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG_LP4, "DEG_M_LPvM_NS_CDS_limmabatchcorrection_20171018.csv", row.names=F, quote=F)

M_LA v M_NS

#eBayes finds an F-statistic from the set of t-statistics for that gene
M_LA.M_NS.contr.mat <- makeContrasts(M_LAvM_NS=(condM_LA-condM_NS), levels=v$design)
M_LA.M_NS.fit <- contrasts.fit(fit, M_LA.M_NS.contr.mat)
M_LA.M_NS.eb <- eBayes(M_LA.M_NS.fit)
M_LA.M_NS.topTab <- topTable(M_LA.M_NS.eb, coef="M_LAvM_NS", number=nrow(v$E))
M_LA.M_NS.topTab <- cbind(rownames(M_LA.M_NS.topTab), M_LA.M_NS.topTab)
colnames(M_LA.M_NS.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LA.M_NS.topTab) <- c(1:nrow(M_LA.M_NS.topTab))

write.csv(M_LA.M_NS.topTab, file="topTab_M_LAvM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
M_LA.M_NS.sigGenes <- M_LA.M_NS.topTab[M_LA.M_NS.topTab$adj.P.Val <0.05, ]
length(M_LA.M_NS.sigGenes$ID)
## [1] 4922
##[1] 617

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LA.M_NS.sigGenesFold1 <- subset(M_LA.M_NS.sigGenes, abs(logFC) > 1)
length(M_LA.M_NS.sigGenesFold1$ID)
## [1] 1493
##[1] 240

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
M_LA.M_NS.sigGenesFold2 <- subset(M_LA.M_NS.sigGenes, abs(logFC) > 2)
length(M_LA.M_NS.sigGenesFold2$ID)
## [1] 478
##[1] 69

##Make an MA plot
sel <- M_LA.M_NS.topTab$adj.P.Val < 0.05
top <- M_LA.M_NS.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="M_LAvM_NS adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_M_LAvM_NS_CDS_limmabatchcorrection_20171120.pdf", width=8, height=8)
## pdf 
##   3
dev.off()
## png 
##   2
dev.copy(png, "MAplot_M_LAvM_NS_CDS_limmabatchcorrection_20171120.png", width=700, height=700)
## png 
##   3
dev.off()
## png 
##   2
M_LA.M_NS.sigGenes <- M_LA.M_NS.sigGenes[order(-M_LA.M_NS.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- M_LA.M_NS.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG_LA4 <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG_LA4 <- subset(DEG_LA4, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG_LA4 <- DEG_LA4[order(-DEG_LA4$logFC), ]

##Filter out Genes -1<FC<1
DEG_LA4 <- subset(DEG_LA4, abs(DEG_LA4$logFC)>1)

##Save DE genes
write.table(DEG_LA4, "DEG_M_LAvM_NS_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG_LA4, "DEG_M_LAvM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

##GM_LPS v. GM_NS

#eBayes finds an F-statistic from the set of t-statistics for that gene
GM_LPS.GM_NS.contr.mat <- makeContrasts(GM_LPSvGM_NS=(condGM_LPS-condGM_NS), levels=v$design)
GM_LPS.GM_NS.fit <- contrasts.fit(fit, GM_LPS.GM_NS.contr.mat)
GM_LPS.GM_NS.eb <- eBayes(GM_LPS.GM_NS.fit)
GM_LPS.GM_NS.topTab <- topTable(GM_LPS.GM_NS.eb, coef="GM_LPSvGM_NS", number=nrow(v$E))
GM_LPS.GM_NS.topTab <- cbind(rownames(GM_LPS.GM_NS.topTab), GM_LPS.GM_NS.topTab)
colnames(GM_LPS.GM_NS.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(GM_LPS.GM_NS.topTab) <- c(1:nrow(GM_LPS.GM_NS.topTab))

write.csv(GM_LPS.GM_NS.topTab, file="topTab_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
GM_LPS.GM_NS.sigGenes <- GM_LPS.GM_NS.topTab[GM_LPS.GM_NS.topTab$adj.P.Val <0.05, ]
length(GM_LPS.GM_NS.sigGenes$ID)
## [1] 653
##[1] 302

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
GM_LPS.GM_NS.sigGenesFold1 <- subset(GM_LPS.GM_NS.sigGenes, abs(logFC) > 1)
length(GM_LPS.GM_NS.sigGenesFold1$ID)
## [1] 260
##[1] 197

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
GM_LPS.GM_NS.sigGenesFold2 <- subset(GM_LPS.GM_NS.sigGenes, abs(logFC) > 2)
length(GM_LPS.GM_NS.sigGenesFold2$ID)
## [1] 99
##[1] 101

##Make an MA plot
sel <- GM_LPS.GM_NS.topTab$adj.P.Val < 0.05
top <- GM_LPS.GM_NS.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="GM_LPSvGM_NS adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.pdf", width=8, height=8)
## pdf 
##   3
dev.off()
## png 
##   2
dev.copy(png, "MAplot_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.png", width=700, height=700)
## png 
##   3
dev.off()
## png 
##   2
GM_LPS.GM_NS.sigGenes <- GM_LPS.GM_NS.sigGenes[order(-GM_LPS.GM_NS.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- GM_LPS.GM_NS.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]

##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)

##Save DE genes
write.table(DEG, "DEG_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

GM_LPS v GM_LP

#eBayes finds an F-statistic from the set of t-statistics for that gene
GM_LPS.GM_LP.contr.mat <- makeContrasts(GM_LPSvGM_LP=((condGM_LPS-condGM_NS)-(condGM_LP-condGM_NS)), levels=v$design)
GM_LPS.GM_LP.fit <- contrasts.fit(fit, GM_LPS.GM_LP.contr.mat)
GM_LPS.GM_LP.eb <- eBayes(GM_LPS.GM_LP.fit)
GM_LPS.GM_LP.topTab <- topTable(GM_LPS.GM_LP.eb, coef="GM_LPSvGM_LP", number=nrow(v$E))
GM_LPS.GM_LP.topTab <- cbind(rownames(GM_LPS.GM_LP.topTab), GM_LPS.GM_LP.topTab)
colnames(GM_LPS.GM_LP.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(GM_LPS.GM_LP.topTab) <- c(1:nrow(GM_LPS.GM_LP.topTab))

write.csv(GM_LPS.GM_LP.topTab, file="topTab_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171023.csv", row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
GM_LPS.GM_LP.sigGenes <- GM_LPS.GM_LP.topTab[GM_LPS.GM_LP.topTab$adj.P.Val <0.05, ]
length(GM_LPS.GM_LP.sigGenes$ID)
## [1] 350
##[1] 194

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
GM_LPS.GM_LP.sigGenesFold1 <- subset(GM_LPS.GM_LP.sigGenes, abs(logFC) > 1)
length(GM_LPS.GM_LP.sigGenesFold1$ID)
## [1] 126
##[1] 114

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
GM_LPS.GM_LP.sigGenesFold2 <- subset(GM_LPS.GM_LP.sigGenes, abs(logFC) > 2)
length(GM_LPS.GM_LP.sigGenesFold2$ID)
## [1] 28
##[1] 27

##Make an MA plot
sel <- GM_LPS.GM_LP.topTab$adj.P.Val < 0.05
top <- GM_LPS.GM_LP.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="GM_LPSvGM_LP adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171120.pdf", width=8, height=8)
## pdf 
##   3
dev.off()
## png 
##   2
dev.copy(png, "MAplot_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171120.png", width=700,
         height=700)
## png 
##   3
dev.off()
## png 
##   2
GM_LPS.GM_LP.sigGenes <- GM_LPS.GM_LP.sigGenes[order(-GM_LPS.GM_LP.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- GM_LPS.GM_LP.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]

##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)

##Save DE genes
write.table(DEG, "DEG_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

##GM_LPS v GM_LA

#eBayes finds an F-statistic from the set of t-statistics for that gene
GM_LPS.GM_LA.contr.mat <- makeContrasts(GM_LPSvGM_LA=((condGM_LPS-condGM_NS)-(condGM_LA-condGM_NS)), levels=v$design)
GM_LPS.GM_LA.fit <- contrasts.fit(fit, GM_LPS.GM_LA.contr.mat)
GM_LPS.GM_LA.eb <- eBayes(GM_LPS.GM_LA.fit)
GM_LPS.GM_LA.topTab <- topTable(GM_LPS.GM_LA.eb, coef="GM_LPSvGM_LA", number=nrow(v$E))
GM_LPS.GM_LA.topTab <- cbind(rownames(GM_LPS.GM_LA.topTab), GM_LPS.GM_LA.topTab)
colnames(GM_LPS.GM_LA.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(GM_LPS.GM_LA.topTab) <- c(1:nrow(GM_LPS.GM_LA.topTab))

write.csv(GM_LPS.GM_LA.topTab, file="topTab_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171023.csv", row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
GM_LPS.GM_LA.sigGenes <- GM_LPS.GM_LA.topTab[GM_LPS.GM_LA.topTab$adj.P.Val <0.05, ]
length(GM_LPS.GM_LA.sigGenes$ID)
## [1] 12
##[1] 17

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
GM_LPS.GM_LA.sigGenesFold1 <- subset(GM_LPS.GM_LA.sigGenes, abs(logFC) > 1)
length(GM_LPS.GM_LA.sigGenesFold1$ID)
## [1] 7
##[1] 15

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
GM_LPS.GM_LA.sigGenesFold2 <- subset(GM_LPS.GM_LA.sigGenes, abs(logFC) > 2)
length(GM_LPS.GM_LA.sigGenesFold2$ID)
## [1] 1
##[1] 7

##Make an MA plot
sel <- GM_LPS.GM_LA.topTab$adj.P.Val < 0.05
top <- GM_LPS.GM_LA.topTab
sub <- paste("No. of sig. genes: ", sum(sel),"/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="GM_LPSvGM_LA adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID,])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171120.pdf", width=8, height=8)
## pdf 
##   3
dev.off()
## png 
##   2
dev.copy(png, "MAplot_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171120.png", width=700, height=700)
## png 
##   3
dev.off()
## png 
##   2
GM_LPS.GM_LA.sigGenes <- GM_LPS.GM_LA.sigGenes[order(-GM_LPS.GM_LA.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- GM_LPS.GM_LA.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]

##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)

##Save DE genes
write.table(DEG, "DEG_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

Venn Diagram of shared DEG between M-CSF LPS v LA v LP

Upregulated genes compared to NS

v_data <- list(DEG_LA4[DEG_LA4$logFC >0, ]$ID, DEG_LP4[DEG_LP4$logFC >0, ]$ID, DEG_LPS[DEG_LPS$logFC >0, ]$ID)
v_data <- Venn(v_data, SetNames=c("LPS+Ado", "LPS+PGE2", "LPS"), numberOfSets=3)
plot(v_data, doWeights=F)

##dev.copy(png, "VennDiagram Upregulated LPS LA LP", width=700, height=700)
##dev.off();

Downregulated genes compared to NS

v_data <- list(DEG_LA4[DEG_LA4$logFC <0, ]$ID, DEG_LP4[DEG_LP4$logFC <0, ]$ID, DEG_LPS[DEG_LPS$logFC <0, ]$ID)
v_data <- Venn(v_data, SetNames=c("LPS+Ado", "LPS+PGE2", "LPS"), numberOfSets=3)
plot(v_data, doWeights=F)

##dev.copy(png, "VennDiagram Downregulated LPS LA LP", width=700, height=700)
##dev.off();

Volcano Plots

M_LPS v M_LA

res <- read.table("topTab_M_LPSvM_LA_CDS_limmabatchcorrection_20171120.txt", header=TRUE)
head(res)
with(res, plot(logFC, -log10(P.Value), cex=0.8, pch=20, main="Volcano plot M_LPS v M_LA", xlim=c(-7, 7)))

## Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, adj.P.Val<.05 ), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="violetred2"))
with(subset(res, abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="orange"))
with(subset(res, adj.P.Val<.05 & abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="turquoise3"))
##dev.copy(png, "VolcanoPlot M_LPS v M_LA", width=700, height=700)
##dev.off();

M_LPS v M_LP

res <- read.csv("topTab_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.csv", header=TRUE)
head(res)
with(res, plot(logFC, -log10(P.Value), cex=0.8, pch=20, main="Volcano plot M_LPS v M_LP", xlim=c(-7, 7)))

## Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, adj.P.Val<.05 ), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="violetred2"))
with(subset(res, abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="orange"))
with(subset(res, adj.P.Val<.05 & abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="turquoise3"))
##dev.copy(png, "VolcanoPlot M_LPS v M_LP", width=700, height=700)
##dev.off();

Rich Factor Graphs

GO terms shared LA LP

GOSharedpathways <- read.table("GO MF FDR .05 TOP 5 only-2.txt", header=TRUE, sep='\t')
GOSharedpathways <- GOSharedpathways[order(GOSharedpathways$p.Value, decreasing=TRUE), ]

colorRamp <- colorRampPalette(c("turquoise1", "mediumblue"))(50)
par(mar=c(5, 18, 3, 8))
plot(GOSharedpathways$Rich.Factor, c(1:5), axes=F,
     panel.first=abline(h=1:5, v=seq(0.1, 0.6, 0.1), col="grey90"),
     cex=as.numeric(GOSharedpathways$Gene.Number)/4,
     col=colorRamp[cut(as.numeric(GOSharedpathways$p.Value), breaks=50)],
     pch=20, ylab=" ", xlab="Rich Factor", main="Pathways Enriched",
     xlim=c(0, 0.04))
axis(side=2, at=1:5, labels=as.character(GOSharedpathways$GeneSet), las=2, cex.axis=0.5)
axis(side=1, tick=GOSharedpathways[["Rich.Factor"]])

KEGG LA

LApathways <- read.table("Cytoscape Pathways/LA v LPS.txt", header=TRUE, sep='\t')
LApathways <- LApathways[order(LApathways$p.Value, decreasing=TRUE), ]
#plot x v. y, change size, assign color to p-value
colorRamp <- colorRampPalette(c("turquoise1", "mediumblue"))(50)
par(mar=c(5, 18, 3, 8))
plot(LApathways$Rich.Factor, c(1:28), axes=F,
     panel.first=abline(h=1:28, v=seq(0.1, 0.4, 0.1), col="grey90"),
     cex=as.numeric(LApathways$Gene.Number)/10,
     col=colorRamp[cut(as.numeric(LApathways$p.Value), breaks=50)],
     pch=20, ylab=" ", xlab="Rich Factor", main="Pathways Enriched",
     xlim=c(0, 0.4))
axis(side=2, at=1:28, labels=as.character(LApathways$Pathway), las=2, cex.axis=0.8)
axis(side=1, tick=LApathways$Rich.Factor, pos=0.4)

GO LA

GOLApathways <- read.table("Cytoscape Pathways/GO BP LA whole network.txt", header=TRUE, sep='\t')
GOLApathways <- GOLApathways[order(GOLApathways$p.Value, decreasing=TRUE), ]
##plot x v. y, change size, assign color to p-value
colorRamp <- colorRampPalette(c("turquoise1", "mediumblue"))(50)
par(mar=c(5, 20, 3, 8))
plot(GOLApathways$Rich.Factor, c(1:57), axes=F,
     panel.first=abline(h=1:57, v=seq(0.1, 0.5, 0.1), col="grey90"),
     cex=as.numeric(GOLApathways$Gene.Number)/10,
     col=colorRamp[cut(as.numeric(GOLApathways$p.Value), breaks=50)],
     pch=20, ylab=" ", xlab="Rich Factor", main="Pathways Enriched",
     xlim=c(0, 0.5))
axis(side=2,at=1:57, labels=as.character(GOLApathways$Pathway), las=2, cex.axis=0.8)
axis(side=1, tick=GOLApathways$Rich.Factor, pos=0.4)

GO LP

GOLPpathways <- read.table("Cytoscape Pathways/GO_BP_LPvLPS.txt", header=TRUE, sep='\t')
GOLPpathways <- GOLPpathways[order(GOLPpathways$p.Value, decreasing=TRUE), ]

colorRamp <- colorRampPalette(c("turquoise1", "mediumblue"))(50)
par(mar=c(5, 18, 3, 8))
plot(GOLPpathways$Rich.Factor, c(1:35), axes=F,
     panel.first=abline(h=1:35, v=seq(0.1, 0.6, 0.1), col="grey90"),
     cex=as.numeric(GOLPpathways$Gene.Number)/20,
     col=colorRamp[cut(as.numeric(GOLPpathways$p.Value), breaks=50)],
     pch=20, ylab=" ", xlab="Rich Factor", main="Pathways Enriched",
     xlim=c(0, 0.6))
axis(side=2, at=1:35, labels=as.character(GOLPpathways$Pathway), las=2, cex.axis=0.5)
axis(side=1, tick=GOLPpathways$Rich.Factor, pos=0.4)