Human Macrophages: M-CSF v GM-CSF: LPS, LPS+Adenosine, LPS+PGE2

Libraries

library(devtools)
library(tximport)
library(biomaRt)
library(hpgltools)
library(DESeq2)
library(gplots)
library(cbcbSEQ)
library(RColorBrewer)
library(Vennerable)
library(edgeR)
library(calibrate)
library(scales)

Set working directory, import metadata and abundance files

design <- read.table('MetaData only 4 hour.txt', header=T, sep='\t')
files <- file.path("kallisto abundance files/", design$HPGL.Identifier, "abundance.tsv")
names(files) <- paste0("HPGL09", c(12:31, 42:60))

Convert transcript ID to gene ID

ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
tx2gene <- getBM(attributes=c("ensembl_transcript_id", "ensembl_gene_id", "chromosome_name"), mart=ensembl)

## Cache found

good_idx <- grepl(x=tx2gene[["chromosome_name"]], pattern="^[[:alnum:]]{1,2}$")
good_ones <- tx2gene[good_idx, -3]

Create counts table

txi.kallisto.tsv <- tximport(files, type="kallisto", tx2gene=good_ones, countsFromAbundance="lengthScaledTPM")

## Note: importing `abundance.h5` is typically faster than `abundance.tsv`

## reading in files with read_tsv

## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 
## transcripts missing from tx2gene: 14613
## summarizing abundance
## summarizing counts
## summarizing length

nrow(txi.kallisto.tsv$counts)

## [1] 34431

write.table(txi.kallisto.tsv$counts, "TPM_MvGM_CDS_20180711.txt", col.names=T, row.names=F, quote=F)
write.csv(txi.kallisto.tsv$counts, "TPM_MvGM_20180711.csv", row.names=T, quote=F)

Create DESeqDataSet

Not sure what this is actually doing

df <- data.frame(condition=paste(design$HPGL.Identifier, design$Growth, design$Stimulation, design$Patient, sep="_"))
rownames(df)=colnames(txi.kallisto.tsv$counts)
dds <- DESeqDataSetFromTximport(txi.kallisto.tsv, df, ~condition)

## using just counts from tximport

nrow(dds)

## [1] 34431

Bar Plot of Counts

par(mar=c(10, 4.5, 3.5, 1))
par(oma=c(0, 0, 0, 0))
barplot(colSums(txi.kallisto.tsv$counts), las=3, main='Raw Counts By Sample')

Box Plot of Counts

dds <- estimateSizeFactors(dds)
ncts <- counts(dds, normalized=TRUE)
y <- log(ncts + 1)

par(mar=c(5, 5, 2, 1))
par(oma=c(0, 0, 0, 0))
boxplot(y, names=colnames(txi.kallisto.tsv$counts), las=3, main='Per Sample Log of Size-factor Counts')

Heatmap of Pearson Correlation

datCor <- cor(txi.kallisto.tsv$counts)
heatmap.2(datCor, Rowv=NA, Colv=NA,
          margins=c(10, 10),
          labRow=df$condition,
          labCol=df$condition,
          dendrogram="none",
          scale="none",
          trace="none",
          srtCol=45, main='Pearson Correlation')

Median Pairwise Correlation

corM <- matrixStats::rowMedians(cor(txi.kallisto.tsv$counts))
qs <- quantile(corM, p=c(1, 3)/4)
iqr <- diff(qs)
outLimit <- qs[1] - 1.5 * iqr
ylim <- c(pmin(min(corM), outLimit), max(corM))
cond <- paste(design$Growth, design$Stimulation, sep="_")
col <- ifelse(cond=="M_NS", "gray60",
              ifelse(cond=="M_LPS", "deeppink3",
                     ifelse(cond=="M_LA", "darkseagreen2",
                            ifelse(cond=="M_LP", "lavender",
                                   ifelse(cond=="GM_NS", "lightpink1",
                                          ifelse(cond=="GM_LPS", "moccasin",
                                                 ifelse(cond=="GM_LA", "black",
                                                        ifelse(cond=="GM_LP", "springgreen4",
                                                               "blue2"))))))))
par(mar=c(5, 4.5, 2, 1))
plot(corM, xaxt="n", ylim=ylim, ylab="Median Pairwise Correlation", xlab="", main="", col=col, pch=16, cex=1.5)
axis(side=1, at=seq(along=corM), labels=colnames(txi.kallisto.tsv$counts), las=2)
abline(h=outLimit, lty=2)
abline(v=1:ncol(txi.kallisto.tsv$counts), lty=3, col="black")

Filter and Normalize Counts

filterCounts = function (counts, lib.size = NULL, thresh = 1, minSamples = 2) {
  cpms <- 2^log2CPM(counts, lib.size = lib.size)$y
  keep <- rowSums(cpms > thresh) >= minSamples
  counts <- counts[keep, ]
  counts
}
x <- table(design$Stimulation)
dim(txi.kallisto.tsv$counts)

## [1] 34431    39

counts <- filterCounts(txi.kallisto.tsv$counts, thresh=1, minSamples=min(x))
dim(counts)

## [1] 12858    39

countsSubQ <- qNorm(counts)
x <- log2CPM(countsSubQ)
s <- makeSVD(x$y)

PCA

pcRes(s$v, s$d, cond, design$Batch)[1:5, ]

##   propVar cumPropVar cond.R2 batch.R2
## 1   28.82      28.82   69.66    25.21
## 2   13.93      42.75   41.92    44.39
## 3   10.96      53.71   73.10    18.68
## 4    7.39      61.10    9.06    75.08
## 5    5.80      66.90    5.43    78.47

pcRes(s$v, s$d, cond, design$Patient)[1:5, ]

##   propVar cumPropVar cond.R2 batch.R2
## 1   28.82      28.82   69.66    30.95
## 2   13.93      42.75   41.92    46.07
## 3   10.96      53.71   73.10    20.98
## 4    7.39      61.10    9.06    80.68
## 5    5.80      66.90    5.43    91.16

Plot PC1 v PC2

condnum <- as.numeric(design$Stimulation)
condnum <- ifelse(condnum==4, "green",
                  ifelse(condnum==3, "lightblue",
                         ifelse(condnum==2, "pink",
                                ifelse(condnum==1, "purple", "black"))))
patnum <- as.numeric(design$Patient)
patnum <- ifelse(patnum==4, 21,
                 ifelse(patnum==3, 22,
                        ifelse(patnum==2, 23,
                               ifelse(patnum==1, 24,
                                      ifelse(patnum==5, 25, 1)))))
plotPC(s$v, s$d, col="black", pch=patnum, bg=condnum)

legend(x=0.2, y=0.2, legend=unique(design$Stimulation), pch=22, col=0,
       pt.bg=c("green","lightblue","pink","purple"), pt.cex=1.5, cex=0.5, bty="n")
legend(x=0.2, y=-0.1, legend=unique(design$Patient), pch=unique(patnum), col=0,
       pt.bg="gray90", pt.cex=1.5, cex=0.5, bty="n")

my_design <- design
rownames(my_design) <- my_design[[1]]
colnames(my_design) <- tolower(colnames(my_design))
my_design[["condition"]] <- my_design[["stimulation"]]
my_design[["batch"]] <- my_design[["patient"]]
data <- x[["y"]]

another <- plot_pca(data=data, design=my_design)

## Warning in RColorBrewer::brewer.pal(12, "Dark2"): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors

Euclidian Distance Heat Map

dists <- dist(t(counts))
mat <- as.matrix(dists)
rownames(mat) <- colnames(mat) <- cond
hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
vec.patient <- rainbow(nlevels(design$Patient), start=0, end=.8)
patient.color <- rep(0, length(design$Patient))
for (i in 1:length(design$Patient))
  patient.color[i] <- vec.patient[design$Patient[i]==levels(design$Patient)]
vec.condition <- c("green", "lightblue", "pink", "purple")
condition.color <- rep(0, length(design$Stimulation))
for (i in 1:length(design$Stimulation))
  condition.color[i] <- vec.condition[design$Stimulation[i]==levels(design$Stimulation)]

heatmap <- heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(11, 11), ColSideColors=condition.color,
                     RowSideColors=patient.color, key="FALSE", srtCol=45)

Correct for Patient in Limma model

mod <- model.matrix(~design$Patient)
v <- voom(countsSubQ, mod)
fit <- lmFit(v)
newData <- residuals(fit, v)
s <- makeSVD(newData)

pcRes(s$v, s$d, cond, design$Batch)[1:5, ]

##   propVar cumPropVar cond.R2 batch.R2
## 1   36.04      36.04   95.47     5.38
## 2   17.81      53.85   91.87     6.83
## 3    8.05      61.90   29.00    25.52
## 4    5.39      67.29   78.12    21.01
## 5    3.06      70.35    9.63     8.75

pcRes(s$v, s$d, cond, design$Patient)[1:5, ]

##   propVar cumPropVar cond.R2 batch.R2
## 1   36.04      36.04   95.47        0
## 2   17.81      53.85   91.87        0
## 3    8.05      61.90   29.00        0
## 4    5.39      67.29   78.12        0
## 5    3.06      70.35    9.63        0

Plot PC1 and PC2 with patient correction

gronum <- as.numeric(design$Growth)
gronum <- ifelse(gronum==2, 19,
                 ifelse(gronum==1, 15, 1))
patnum <- as.numeric(design$Patient)
patnum <- ifelse(patnum==1, "pink",
                ifelse(patnum==2, "green",
                        ifelse(patnum==3, "blue",
                              ifelse(patnum==4, "yellow",
                                      ifelse(patnum==5, "black", "orange")))))
samplenum <- as.numeric(design$Stimulation)
samplenum <- ifelse(samplenum==4, "green",
                ifelse(samplenum==3, "blue",
                        ifelse(samplenum==1, "black",
                              ifelse(samplenum==2, "yellow", "grey"))))
plotPC(s$v, s$d, pch=gronum, col=samplenum, cex=2)

legend(x=0.05, y=0.3, legend=c("NS", "LPS", "LP", "LA"), pch=22, col=0,
       pt.bg=c("green", "blue", "black", "yellow"), pt.cex=1.5, cex=1, bty="n")
legend(x=0.05, y=0.1, legend=unique(design$Growth), pch=unique(gronum), col="black",
       pt.bg="black", pt.cex=1.0, cex=1, bty="n")

another_v2 <- plot_pca(data=newData, design=my_design)

## Warning in RColorBrewer::brewer.pal(12, "Dark2"): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors

another_v2$plot

Heatmap with patient correction??

dists <- dist(t(newData))
mat <- as.matrix(dists)
rownames(mat) <- colnames(mat) <- cond
heatmap <- heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(11, 11), ColSideColors=condition.color,
                     RowSideColors=patient.color, key="FALSE", srtCol=45)

DE analysis

countsSubQ <- qNorm(counts)
patient <- design$Patient
mod <- model.matrix(~0+cond+patient, data=design)
voom(countsSubQ, mod, plot=TRUE)

## An object of class "EList"
## $E
##                  HPGL0912   HPGL0913  HPGL0914  HPGL0915   HPGL0916  HPGL0917
## ENSG00000000419  5.122947  5.1270568  5.024619  4.473779  4.7167417  5.045765
## ENSG00000000457  3.298952  3.1666572  3.050232  3.024500  3.1611143  3.545003
## ENSG00000000460  2.399415  2.5290360  2.601569  2.435568  2.6406067  2.059072
## ENSG00000000938 10.041971  9.2476291  9.983340  9.741007  9.3622950  9.819416
## ENSG00000000971 -2.495677 -0.7789675 -1.031357 -1.765510 -0.1931129 -1.383149
##                  HPGL0918  HPGL0919   HPGL0920  HPGL0921   HPGL0922 HPGL0923
## ENSG00000000419 5.2660078  5.299251  4.6931386  4.855997  5.1076756 5.370218
## ENSG00000000457 4.0989374  3.419582  3.6972710  4.106408  3.6040736 3.993299
## ENSG00000000460 1.6421080  2.213396  1.9492232  1.795535  2.6303621 2.609675
## ENSG00000000938 8.8113183  9.567086  9.2411743  9.073456  9.4465028 8.449657
## ENSG00000000971 0.4631257 -2.026586 -0.7620683 -1.940476 -0.4053762 1.069277
##                   HPGL0924   HPGL0925   HPGL0926   HPGL0927  HPGL0928  HPGL0929
## ENSG00000000419  5.2902881  4.8084011  5.3797921  5.0693210 5.3190135  4.968390
## ENSG00000000457  3.3724325  3.7876025  3.9403856  3.8096880 3.8024362  3.782917
## ENSG00000000460  2.2422507  2.5160580  1.9281870  2.4927624 2.1826216  2.360953
## ENSG00000000938  9.6454386  8.8593722  8.4518594  9.4225328 8.4160319  9.485652
## ENSG00000000971 -0.1317729 -0.0409803 -0.5388732 -0.1522648 0.6414257 -1.201507
##                  HPGL0930   HPGL0931  HPGL0942  HPGL0943  HPGL0944  HPGL0945
## ENSG00000000419 4.9496118  4.7623073  5.102506  5.085227  4.874986  4.821552
## ENSG00000000457 4.0062619  4.3876971  3.224322  3.306427  3.496547  3.745348
## ENSG00000000460 2.0858879  2.1599903  2.750830  2.455631  3.031471  2.247731
## ENSG00000000938 8.5774052  8.2823716 10.605055 10.368233 10.030512 10.403797
## ENSG00000000971 0.3219542 -0.5602922 -1.046947 -2.082412  1.534188  1.090770
##                   HPGL0946 HPGL0947   HPGL0948 HPGL0949 HPGL0950   HPGL0951
## ENSG00000000419  5.2504270 5.259219  5.0182394 5.253533 5.071247  5.4802737
## ENSG00000000457  3.5212117 3.316758  3.2636573 3.172614 3.587398  3.5583401
## ENSG00000000460  2.3072570 1.611214  2.3985966 2.789688 2.524471  2.6507018
## ENSG00000000938 10.3682327 9.790238 10.3477295 9.189028 9.698073 10.2340652
## ENSG00000000971 -0.2224197 1.582295 -0.3171797 2.535629 2.737828 -0.1407236
##                 HPGL0952  HPGL0953 HPGL0954 HPGL0955   HPGL0956 HPGL0957
## ENSG00000000419 5.276192  5.194370 5.100043 5.218377  5.5547019 5.241558
## ENSG00000000457 3.551739  3.364224 3.631766 3.900960  3.6441932 3.645204
## ENSG00000000460 2.332391  2.304077 2.944163 2.609675  2.1551877 1.914653
## ENSG00000000938 9.430261 10.179109 9.337255 9.247627 10.2083643 9.380096
## ENSG00000000971 2.251412 -2.233885 1.842173 2.605888 -0.6588511 2.057933
##                  HPGL0958 HPGL0959 HPGL0960
## ENSG00000000419  5.194370 5.326119 5.215988
## ENSG00000000457  3.364224 3.834283 3.953882
## ENSG00000000460  2.304077 3.124578 2.645245
## ENSG00000000938 10.179109 9.012222 9.083070
## ENSG00000000971 -2.233885 2.871823 3.217145
## 12853 more rows ...
## 
## $weights
##            [,1]      [,2]       [,3]      [,4]      [,5]      [,6]      [,7]
## [1,] 19.1059000 19.258043 18.7306002 17.801781 18.134656 19.730621 19.870590
## [2,] 10.0686874 10.557347  9.3679651 10.322597 11.535487 12.996538 13.503159
## [3,]  8.0202059  7.010903  7.7570491  8.713326  7.608745  6.087962  5.333521
## [4,] 22.3245686 22.798531 22.3502695 22.758846 22.787224 22.528289 23.007849
## [5,]  0.9630185  2.058562  0.7704206  1.865260  1.853443  1.009789  2.173354
##            [,8]      [,9]     [,10]     [,11]     [,12]     [,13]     [,14]
## [1,] 19.3574504 18.448827 18.777358 20.388771 20.526170 20.045140 19.178730
## [2,] 12.2387548 13.253788 14.517731 12.837533 13.342036 12.084195 13.096368
## [3,]  5.8931482  6.614489  5.783267  7.523290  6.579483  7.277250  8.182506
## [4,] 22.5562332 22.971908 22.997609 22.720587 23.161967 22.749266 23.134740
## [5,]  0.8062579  1.967092  1.954514  1.583556  3.684342  1.242177  3.294770
##          [,15]     [,16]     [,17]     [,18]     [,19]     [,20]     [,21]
## [1,] 19.493027 19.646916 19.796030 19.275722 18.364469 18.691121 19.614978
## [2,] 14.353982 13.865867 14.374896 13.107199 14.124509 15.400927 11.496517
## [3,]  7.137670  7.080653  6.197931  6.851029  7.701533  6.720204  8.183313
## [4,] 23.154211 22.799311 23.209435 22.828122 23.188854 23.203608 22.026152
## [5,]  3.271077  1.469362  3.370888  1.155981  3.019060  2.997721  1.666861
##          [,22]     [,23]     [,24]     [,25]     [,26]     [,27]     [,28]
## [1,] 19.244538 18.332296 18.658230 20.301537 20.446349 19.959742 19.087208
## [2,] 10.759964 11.753832 13.008774 11.125048 11.619008 10.396805 11.378765
## [3,]  7.916568  8.889067  7.764450  7.317115  6.401535  7.077906  7.959203
## [4,] 22.050056 22.420407 22.446872 22.218989 22.678985 22.243941 22.639588
## [5,]  1.304883  3.498360  3.473078  2.775562  6.771951  2.117703  6.093439
##          [,29]     [,30]     [,31]     [,32]     [,33]     [,34]     [,35]
## [1,] 19.399628 20.627465 20.751759 20.305567 19.458187 19.772874 20.818403
## [2,] 12.619953 12.184772 12.692794 11.438331 12.442525 13.705569 12.590627
## [3,]  6.943077  8.197591  7.166980  7.930801  8.904568  7.778342  7.677800
## [4,] 22.667759 22.319107 22.792388 22.344713 22.752717 22.781085 22.379319
## [5,]  6.051935  2.310816  5.630274  1.782177  5.059937  5.024486  2.514568
##          [,36]     [,37]     [,38]     [,39]
## [1,] 20.944138 20.520953 19.697478 19.988184
## [2,] 13.103076 11.841095 12.859185 14.110883
## [3,]  6.714326  7.428369  8.347636  7.285165
## [4,] 22.858414 22.405679 22.819376 22.847500
## [5,]  6.135406  1.930129  5.522918  5.485154
## 12853 more rows ...
## 
## $design
##   condGM_LA condGM_LP condGM_LPS condGM_NS condM_LA condM_LP condM_LPS condM_NS
## 1         0         0          0         0        0        0         0        1
## 2         0         0          0         0        0        0         0        1
## 3         0         0          0         0        0        0         0        1
## 4         0         0          0         0        0        0         0        1
## 5         0         0          0         0        0        0         0        1
##   patientH2 patientH3 patientH4 patientH5
## 1         0         0         0         0
## 2         1         0         0         0
## 3         0         1         0         0
## 4         0         0         1         0
## 5         0         0         0         1
## 34 more rows ...
## 
## $targets
##          lib.size
## HPGL0912 20054310
## HPGL0913 20054287
## HPGL0914 20054287
## HPGL0915 20054311
## HPGL0916 20054294
## 34 more rows ...

v <- voom(countsSubQ, mod)
fit <- lmFit(v)

M_LPS v M_NS

eBayes finds an F-statistic from the set of t-statistics for that gene

M_LPS.M_NS.contr.mat <- makeContrasts(M_LPSvM_NS=(condM_LPS-condM_NS), levels=v$design)
M_LPS.M_NS.fit <- contrasts.fit(fit, M_LPS.M_NS.contr.mat)
M_LPS.M_NS.eb <- eBayes(M_LPS.M_NS.fit)
M_LPS.M_NS.topTab <- topTable(M_LPS.M_NS.eb, coef="M_LPSvM_NS", number=nrow(v$E))
M_LPS.M_NS.topTab <- cbind(rownames(M_LPS.M_NS.topTab), M_LPS.M_NS.topTab)
colnames(M_LPS.M_NS.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LPS.M_NS.topTab) <- c(1:nrow(M_LPS.M_NS.topTab))

write.csv(M_LPS.M_NS.topTab, file="topTab_M_LPSvM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

Limit list to genes with an adjusted p value < 0.05

M_LPS.M_NS.sigGenes <- M_LPS.M_NS.topTab[M_LPS.M_NS.topTab$adj.P.Val <0.05, ]
length(M_LPS.M_NS.sigGenes$ID)

## [1] 4840

Filter out rows with less than 2-fold change (log2 fold change of > 1)

M_LPS.M_NS.sigGenesFold1 <- subset(M_LPS.M_NS.sigGenes, abs(logFC) > 1)
length(M_LPS.M_NS.sigGenesFold1$ID)

## [1] 1350

33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M35;17M35;17M35;17M35;17M35;17M35;17MFilter out rows with less than 4-fold change (log2 fold change of > 2)

M_LPS.M_NS.sigGenesFold2 <- subset(M_LPS.M_NS.sigGenes, abs(logFC) > 2)
length(M_LPS.M_NS.sigGenesFold2$ID)

## [1] 431

Make an MA plot

sel <- M_LPS.M_NS.topTab$adj.P.Val < 0.05
top <- M_LPS.M_NS.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="M_LPSvM_NS adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

Annotate sigGenes list using Biomart

M_LPS.M_NS.sigGenes <- M_LPS.M_NS.sigGenes[order(-M_LPS.M_NS.sigGenes$logFC), ]

sigGenes <- M_LPS.M_NS.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id", values=ids, mart=ensembl)

## Cache found

colnames(desc)=c("ID", "Symbol", "Description", "Type")

desc$Description <- gsub(",", "", desc$Description)

DEG_LPS <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG_LPS <- subset(DEG_LPS, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG_LPS <- DEG_LPS[order(-DEG_LPS$logFC), ]

##Filter out Genes -1<FC<1
DEG_LPS <- subset(DEG_LPS, abs(DEG_LPS$logFC)>1)

##Save DE genes
write.table(DEG_LPS, "DEG_M_LPSvM_NS_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG_LPS, "DEG_M_LPSvM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

M_LPS v M_LA

#eBayes finds an F-statistic from the set of t-statistics for that gene
M_LPS.M_LA.contr.mat <- makeContrasts(M_LPSvM_LA=((condM_LA-condM_NS)-(condM_LPS-condM_NS)), levels=v$design)
M_LPS.M_LA.fit <- contrasts.fit(fit, M_LPS.M_LA.contr.mat)
M_LPS.M_LA.eb <- eBayes(M_LPS.M_LA.fit)
M_LPS.M_LA.topTab <- topTable(M_LPS.M_LA.eb, coef="M_LPSvM_LA", number=nrow(v$E))
M_LPS.M_LA.topTab <- cbind(rownames(M_LPS.M_LA.topTab), M_LPS.M_LA.topTab)
colnames(M_LPS.M_LA.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LPS.M_LA.topTab) <- c(1:nrow(M_LPS.M_LA.topTab))

write.csv(M_LPS.M_LA.topTab, file="topTab_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.csv", row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
M_LPS.M_LA.sigGenes <- M_LPS.M_LA.topTab[M_LPS.M_LA.topTab$adj.P.Val <0.05, ]
length(M_LPS.M_LA.sigGenes$ID)

## [1] 955

##[1] 617

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LPS.M_LA.sigGenesFold1 <- subset(M_LPS.M_LA.sigGenes, abs(logFC) > 1)
length(M_LPS.M_LA.sigGenesFold1$ID)

## [1] 256

##[1] 240

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
M_LPS.M_LA.sigGenesFold2 <- subset(M_LPS.M_LA.sigGenes, abs(logFC) > 2)
length(M_LPS.M_LA.sigGenesFold2$ID)

## [1] 57

##[1] 69

##Make an MA plot
sel <- M_LPS.M_LA.topTab$adj.P.Val < 0.05
top <- M_LPS.M_LA.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="M_LPSvM_LA adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.pdf", width=8, height=8)

## pdf 
##   3

dev.off()

## png 
##   2

dev.copy(png, "MAplot_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.png", width=700,
         height=700)

## png 
##   3

dev.off()

## png 
##   2

M_LPS.M_LA.sigGenes <- M_LPS.M_LA.sigGenes[order(-M_LPS.M_LA.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- M_LPS.M_LA.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)

## Cache found

colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]

##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)

##Save DE genes
write.table(DEG, "DEG_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.csv", row.names=F, quote=F)

M_LPS v M_LP

#eBayes finds an F-statistic from the set of t-statistics for that gene
M_LPS.M_LP.contr.mat <- makeContrasts(M_LPSvM_LP=((condM_LP-condM_NS)-(condM_LPS-condM_NS)), levels=v$design)
M_LPS.M_LP.fit <- contrasts.fit(fit, M_LPS.M_LP.contr.mat)
M_LPS.M_LP.eb <- eBayes(M_LPS.M_LP.fit)
M_LPS.M_LP.topTab <- topTable(M_LPS.M_LP.eb, coef="M_LPSvM_LP", number=nrow(v$E))
M_LPS.M_LP.topTab <- cbind(rownames(M_LPS.M_LP.topTab), M_LPS.M_LP.topTab)
colnames(M_LPS.M_LP.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LPS.M_LP.topTab) <- c(1:nrow(M_LPS.M_LP.topTab))

write.csv(M_LPS.M_LP.topTab, file="topTab_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.csv",
          row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
M_LPS.M_LP.sigGenes <- M_LPS.M_LP.topTab[M_LPS.M_LP.topTab$adj.P.Val <0.05, ]
length(M_LPS.M_LP.sigGenes$ID)

## [1] 1993

##[1] 1473

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LPS.M_LP.sigGenesFold1 <- subset(M_LPS.M_LP.sigGenes, abs(logFC) > 1)
length(M_LPS.M_LP.sigGenesFold1$ID)

## [1] 489

##[1] 467

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
M_LPS.M_LP.sigGenesFold2 <- subset(M_LPS.M_LP.sigGenes, abs(logFC) > 2)
length(M_LPS.M_LP.sigGenesFold2$ID)

## [1] 91

##[1] 125

##Make an MA plot
sel <- M_LPS.M_LP.topTab$adj.P.Val < 0.05
top <- M_LPS.M_LP.topTab
sub <- paste("No. of sig. genes: ", sum(sel),"/",length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="M_LPSvM_LP adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.pdf", width=8, height=8)

## pdf 
##   3

dev.off()

## png 
##   2

dev.copy(png, "MAplot_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.png", width=700,
         height=700)

## png 
##   3

dev.off()

## png 
##   2

M_LPS.M_LP.sigGenes <- M_LPS.M_LP.sigGenes[order(-M_LPS.M_LP.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- M_LPS.M_LP.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)

## Cache found

colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]

##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)

##Save DE genes
write.table(DEG, "DEG_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.csv", row.names=F, quote=F)

M_LP v M_NS

##eBayes finds an F-statistic from the set of t-statistics for that gene
M_LP.M_NS.contr.mat <- makeContrasts(M_LPvM_NS=(condM_LP-condM_NS), levels=v$design)
M_LP.M_NS.fit <- contrasts.fit(fit, M_LP.M_NS.contr.mat)
M_LP.M_NS.eb <- eBayes(M_LP.M_NS.fit)
M_LP.M_NS.topTab <- topTable(M_LP.M_NS.eb, coef="M_LPvM_NS", number=nrow(v$E))
M_LP.M_NS.topTab <- cbind(rownames(M_LP.M_NS.topTab), M_LP.M_NS.topTab)
colnames(M_LP.M_NS.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LP.M_NS.topTab) <- c(1:nrow(M_LP.M_NS.topTab))

write.csv(M_LP.M_NS.topTab, file="topTab_M_LPvM_NS_CDS_limmabatchcorrection_20171018.csv", row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
M_LP.M_NS.sigGenes <- M_LP.M_NS.topTab[M_LP.M_NS.topTab$adj.P.Val <0.05, ]
length(M_LP.M_NS.sigGenes$ID)

## [1] 4603

##[1] 4386

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LP.M_NS.sigGenesFold1 <- subset(M_LP.M_NS.sigGenes, abs(logFC) > 1)
length(M_LP.M_NS.sigGenesFold1$ID)

## [1] 1422

##[1] 1455

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
M_LP.M_NS.sigGenesFold2 <- subset(M_LP.M_NS.sigGenes, abs(logFC) > 2)
length(M_LP.M_NS.sigGenesFold2$ID)

## [1] 448

##[1] 519

##Make an MA plot
sel <- M_LP.M_NS.topTab$adj.P.Val < 0.05
top <- M_LP.M_NS.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="M_LPvM_NS adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID,])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_M_LPvM_NS_CDS_limmabatchcorrection_20171018.pdf", width=8, height=8)

## pdf 
##   3

dev.off()

## png 
##   2

dev.copy(png, "MAplot_M_LPvM_NS_CDS_limmabatchcorrection_20171018.png", width=700, height=700)

## png 
##   3

dev.off()

## png 
##   2

M_LP.M_NS.sigGenes <- M_LP.M_NS.sigGenes[order(-M_LP.M_NS.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- M_LP.M_NS.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)

## Cache found

colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG_LP4 <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG_LP4 <- subset(DEG_LP4, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG_LP4 <- DEG_LP4[order(-DEG_LP4$logFC), ]

##Filter out Genes -1<FC<1
DEG_LP4 <- subset(DEG_LP4, abs(DEG_LP4$logFC)>1)

##Save DE genes
write.table(DEG_LP4, "DEG_M_LPvM_NS_CDS_limmabatchcorrection_20171018.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG_LP4, "DEG_M_LPvM_NS_CDS_limmabatchcorrection_20171018.csv", row.names=F, quote=F)

M_LA v M_NS

#eBayes finds an F-statistic from the set of t-statistics for that gene
M_LA.M_NS.contr.mat <- makeContrasts(M_LAvM_NS=(condM_LA-condM_NS), levels=v$design)
M_LA.M_NS.fit <- contrasts.fit(fit, M_LA.M_NS.contr.mat)
M_LA.M_NS.eb <- eBayes(M_LA.M_NS.fit)
M_LA.M_NS.topTab <- topTable(M_LA.M_NS.eb, coef="M_LAvM_NS", number=nrow(v$E))
M_LA.M_NS.topTab <- cbind(rownames(M_LA.M_NS.topTab), M_LA.M_NS.topTab)
colnames(M_LA.M_NS.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LA.M_NS.topTab) <- c(1:nrow(M_LA.M_NS.topTab))

write.csv(M_LA.M_NS.topTab, file="topTab_M_LAvM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
M_LA.M_NS.sigGenes <- M_LA.M_NS.topTab[M_LA.M_NS.topTab$adj.P.Val <0.05, ]
length(M_LA.M_NS.sigGenes$ID)

## [1] 4922

##[1] 617

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LA.M_NS.sigGenesFold1 <- subset(M_LA.M_NS.sigGenes, abs(logFC) > 1)
length(M_LA.M_NS.sigGenesFold1$ID)

## [1] 1493

##[1] 240

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
M_LA.M_NS.sigGenesFold2 <- subset(M_LA.M_NS.sigGenes, abs(logFC) > 2)
length(M_LA.M_NS.sigGenesFold2$ID)

## [1] 478

##[1] 69

##Make an MA plot
sel <- M_LA.M_NS.topTab$adj.P.Val < 0.05
top <- M_LA.M_NS.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="M_LAvM_NS adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_M_LAvM_NS_CDS_limmabatchcorrection_20171120.pdf", width=8, height=8)

## pdf 
##   3

dev.off()

## png 
##   2

dev.copy(png, "MAplot_M_LAvM_NS_CDS_limmabatchcorrection_20171120.png", width=700, height=700)

## png 
##   3

dev.off()

## png 
##   2

M_LA.M_NS.sigGenes <- M_LA.M_NS.sigGenes[order(-M_LA.M_NS.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- M_LA.M_NS.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)

## Cache found

colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG_LA4 <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG_LA4 <- subset(DEG_LA4, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG_LA4 <- DEG_LA4[order(-DEG_LA4$logFC), ]

##Filter out Genes -1<FC<1
DEG_LA4 <- subset(DEG_LA4, abs(DEG_LA4$logFC)>1)

##Save DE genes
write.table(DEG_LA4, "DEG_M_LAvM_NS_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG_LA4, "DEG_M_LAvM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

##GM_LPS v. GM_NS

#eBayes finds an F-statistic from the set of t-statistics for that gene
GM_LPS.GM_NS.contr.mat <- makeContrasts(GM_LPSvGM_NS=(condGM_LPS-condGM_NS), levels=v$design)
GM_LPS.GM_NS.fit <- contrasts.fit(fit, GM_LPS.GM_NS.contr.mat)
GM_LPS.GM_NS.eb <- eBayes(GM_LPS.GM_NS.fit)
GM_LPS.GM_NS.topTab <- topTable(GM_LPS.GM_NS.eb, coef="GM_LPSvGM_NS", number=nrow(v$E))
GM_LPS.GM_NS.topTab <- cbind(rownames(GM_LPS.GM_NS.topTab), GM_LPS.GM_NS.topTab)
colnames(GM_LPS.GM_NS.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(GM_LPS.GM_NS.topTab) <- c(1:nrow(GM_LPS.GM_NS.topTab))

write.csv(GM_LPS.GM_NS.topTab, file="topTab_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
GM_LPS.GM_NS.sigGenes <- GM_LPS.GM_NS.topTab[GM_LPS.GM_NS.topTab$adj.P.Val <0.05, ]
length(GM_LPS.GM_NS.sigGenes$ID)

## [1] 653

##[1] 302

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
GM_LPS.GM_NS.sigGenesFold1 <- subset(GM_LPS.GM_NS.sigGenes, abs(logFC) > 1)
length(GM_LPS.GM_NS.sigGenesFold1$ID)

## [1] 260

##[1] 197

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
GM_LPS.GM_NS.sigGenesFold2 <- subset(GM_LPS.GM_NS.sigGenes, abs(logFC) > 2)
length(GM_LPS.GM_NS.sigGenesFold2$ID)

## [1] 99

##[1] 101

##Make an MA plot
sel <- GM_LPS.GM_NS.topTab$adj.P.Val < 0.05
top <- GM_LPS.GM_NS.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="GM_LPSvGM_NS adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.pdf", width=8, height=8)

## pdf 
##   3

dev.off()

## png 
##   2

dev.copy(png, "MAplot_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.png", width=700, height=700)

## png 
##   3

dev.off()

## png 
##   2

GM_LPS.GM_NS.sigGenes <- GM_LPS.GM_NS.sigGenes[order(-GM_LPS.GM_NS.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- GM_LPS.GM_NS.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)

## Cache found

colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]

##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)

##Save DE genes
write.table(DEG, "DEG_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

GM_LPS v GM_LP

#eBayes finds an F-statistic from the set of t-statistics for that gene
GM_LPS.GM_LP.contr.mat <- makeContrasts(GM_LPSvGM_LP=((condGM_LPS-condGM_NS)-(condGM_LP-condGM_NS)), levels=v$design)
GM_LPS.GM_LP.fit <- contrasts.fit(fit, GM_LPS.GM_LP.contr.mat)
GM_LPS.GM_LP.eb <- eBayes(GM_LPS.GM_LP.fit)
GM_LPS.GM_LP.topTab <- topTable(GM_LPS.GM_LP.eb, coef="GM_LPSvGM_LP", number=nrow(v$E))
GM_LPS.GM_LP.topTab <- cbind(rownames(GM_LPS.GM_LP.topTab), GM_LPS.GM_LP.topTab)
colnames(GM_LPS.GM_LP.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(GM_LPS.GM_LP.topTab) <- c(1:nrow(GM_LPS.GM_LP.topTab))

write.csv(GM_LPS.GM_LP.topTab, file="topTab_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171023.csv", row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
GM_LPS.GM_LP.sigGenes <- GM_LPS.GM_LP.topTab[GM_LPS.GM_LP.topTab$adj.P.Val <0.05, ]
length(GM_LPS.GM_LP.sigGenes$ID)

## [1] 350

##[1] 194

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
GM_LPS.GM_LP.sigGenesFold1 <- subset(GM_LPS.GM_LP.sigGenes, abs(logFC) > 1)
length(GM_LPS.GM_LP.sigGenesFold1$ID)

## [1] 126

##[1] 114

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
GM_LPS.GM_LP.sigGenesFold2 <- subset(GM_LPS.GM_LP.sigGenes, abs(logFC) > 2)
length(GM_LPS.GM_LP.sigGenesFold2$ID)

## [1] 28

##[1] 27

##Make an MA plot
sel <- GM_LPS.GM_LP.topTab$adj.P.Val < 0.05
top <- GM_LPS.GM_LP.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="GM_LPSvGM_LP adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171120.pdf", width=8, height=8)

## pdf 
##   3

dev.off()

## png 
##   2

dev.copy(png, "MAplot_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171120.png", width=700,
         height=700)

## png 
##   3

dev.off()

## png 
##   2

GM_LPS.GM_LP.sigGenes <- GM_LPS.GM_LP.sigGenes[order(-GM_LPS.GM_LP.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- GM_LPS.GM_LP.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)

## Cache found

colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]

##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)

##Save DE genes
write.table(DEG, "DEG_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

##GM_LPS v GM_LA

#eBayes finds an F-statistic from the set of t-statistics for that gene
GM_LPS.GM_LA.contr.mat <- makeContrasts(GM_LPSvGM_LA=((condGM_LPS-condGM_NS)-(condGM_LA-condGM_NS)), levels=v$design)
GM_LPS.GM_LA.fit <- contrasts.fit(fit, GM_LPS.GM_LA.contr.mat)
GM_LPS.GM_LA.eb <- eBayes(GM_LPS.GM_LA.fit)
GM_LPS.GM_LA.topTab <- topTable(GM_LPS.GM_LA.eb, coef="GM_LPSvGM_LA", number=nrow(v$E))
GM_LPS.GM_LA.topTab <- cbind(rownames(GM_LPS.GM_LA.topTab), GM_LPS.GM_LA.topTab)
colnames(GM_LPS.GM_LA.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(GM_LPS.GM_LA.topTab) <- c(1:nrow(GM_LPS.GM_LA.topTab))

write.csv(GM_LPS.GM_LA.topTab, file="topTab_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171023.csv", row.names=F, quote=F)

##Limit list to genes with an adjusted p value < 0.05
GM_LPS.GM_LA.sigGenes <- GM_LPS.GM_LA.topTab[GM_LPS.GM_LA.topTab$adj.P.Val <0.05, ]
length(GM_LPS.GM_LA.sigGenes$ID)

## [1] 12

##[1] 17

##Filter out rows with less than 2-fold change (log2 fold change of > 1)
GM_LPS.GM_LA.sigGenesFold1 <- subset(GM_LPS.GM_LA.sigGenes, abs(logFC) > 1)
length(GM_LPS.GM_LA.sigGenesFold1$ID)

## [1] 7

##[1] 15

##Filter out rows with less than 4-fold change (log2 fold change of > 2)
GM_LPS.GM_LA.sigGenesFold2 <- subset(GM_LPS.GM_LA.sigGenes, abs(logFC) > 2)
length(GM_LPS.GM_LA.sigGenesFold2$ID)

## [1] 1

##[1] 7

##Make an MA plot
sel <- GM_LPS.GM_LA.topTab$adj.P.Val < 0.05
top <- GM_LPS.GM_LA.topTab
sub <- paste("No. of sig. genes: ", sum(sel),"/", length(sel))
cpm <- v$E

plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
     main="GM_LPSvGM_LA adjusted",
     ylab="log FC", xlab="Average Expression",
     sub=sub)
points(rowMeans(cpm[top$ID,])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")

dev.copy(pdf, "MAplot_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171120.pdf", width=8, height=8)

## pdf 
##   3

dev.off()

## png 
##   2

dev.copy(png, "MAplot_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171120.png", width=700, height=700)

## png 
##   3

dev.off()

## png 
##   2

GM_LPS.GM_LA.sigGenes <- GM_LPS.GM_LA.sigGenes[order(-GM_LPS.GM_LA.sigGenes$logFC), ]

##Annotate sigGenes list using Biomart
sigGenes <- GM_LPS.GM_LA.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
ids <- sigGenes$ID

##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
              values=ids, mart=ensembl)

## Cache found

colnames(desc)=c("ID", "Symbol", "Description", "Type")

##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)

DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]

##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)

##Save DE genes
write.table(DEG, "DEG_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)

Venn Diagram of shared DEG between M-CSF LPS v LA v LP

Upregulated genes compared to NS

v_data <- list(DEG_LA4[DEG_LA4$logFC >0, ]$ID, DEG_LP4[DEG_LP4$logFC >0, ]$ID, DEG_LPS[DEG_LPS$logFC >0, ]$ID)
v_data <- Venn(v_data, SetNames=c("LPS+Ado", "LPS+PGE2", "LPS"), numberOfSets=3)
plot(v_data, doWeights=F)

##dev.copy(png, "VennDiagram Upregulated LPS LA LP", width=700, height=700)
##dev.off();

Downregulated genes compared to NS

v_data <- list(DEG_LA4[DEG_LA4$logFC <0, ]$ID, DEG_LP4[DEG_LP4$logFC <0, ]$ID, DEG_LPS[DEG_LPS$logFC <0, ]$ID)
v_data <- Venn(v_data, SetNames=c("LPS+Ado", "LPS+PGE2", "LPS"), numberOfSets=3)
plot(v_data, doWeights=F)

##dev.copy(png, "VennDiagram Downregulated LPS LA LP", width=700, height=700)
##dev.off();

Volcano Plots

M_LPS v M_LA

res <- read.table("topTab_M_LPSvM_LA_CDS_limmabatchcorrection_20171120.txt", header=TRUE)
head(res)
with(res, plot(logFC, -log10(P.Value), cex=0.8, pch=20, main="Volcano plot M_LPS v M_LA", xlim=c(-7, 7)))

## Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, adj.P.Val<.05 ), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="violetred2"))
with(subset(res, abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="orange"))
with(subset(res, adj.P.Val<.05 & abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="turquoise3"))
##dev.copy(png, "VolcanoPlot M_LPS v M_LA", width=700, height=700)
##dev.off();

M_LPS v M_LP

res <- read.csv("topTab_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.csv", header=TRUE)
head(res)
with(res, plot(logFC, -log10(P.Value), cex=0.8, pch=20, main="Volcano plot M_LPS v M_LP", xlim=c(-7, 7)))

## Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, adj.P.Val<.05 ), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="violetred2"))
with(subset(res, abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="orange"))
with(subset(res, adj.P.Val<.05 & abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="turquoise3"))
##dev.copy(png, "VolcanoPlot M_LPS v M_LP", width=700, height=700)
##dev.off();

Rich Factor Graphs

GO terms shared LA LP

GOSharedpathways <- read.table("GO MF FDR .05 TOP 5 only-2.txt", header=TRUE, sep='\t')
GOSharedpathways <- GOSharedpathways[order(GOSharedpathways$p.Value, decreasing=TRUE), ]

colorRamp <- colorRampPalette(c("turquoise1", "mediumblue"))(50)
par(mar=c(5, 18, 3, 8))
plot(GOSharedpathways$Rich.Factor, c(1:5), axes=F,
     panel.first=abline(h=1:5, v=seq(0.1, 0.6, 0.1), col="grey90"),
     cex=as.numeric(GOSharedpathways$Gene.Number)/4,
     col=colorRamp[cut(as.numeric(GOSharedpathways$p.Value), breaks=50)],
     pch=20, ylab=" ", xlab="Rich Factor", main="Pathways Enriched",
     xlim=c(0, 0.04))
axis(side=2, at=1:5, labels=as.character(GOSharedpathways$GeneSet), las=2, cex.axis=0.5)
axis(side=1, tick=GOSharedpathways[["Rich.Factor"]])

KEGG LA

LApathways <- read.table("Cytoscape Pathways/LA v LPS.txt", header=TRUE, sep='\t')
LApathways <- LApathways[order(LApathways$p.Value, decreasing=TRUE), ]
#plot x v. y, change size, assign color to p-value
colorRamp <- colorRampPalette(c("turquoise1", "mediumblue"))(50)
par(mar=c(5, 18, 3, 8))
plot(LApathways$Rich.Factor, c(1:28), axes=F,
     panel.first=abline(h=1:28, v=seq(0.1, 0.4, 0.1), col="grey90"),
     cex=as.numeric(LApathways$Gene.Number)/10,
     col=colorRamp[cut(as.numeric(LApathways$p.Value), breaks=50)],
     pch=20, ylab=" ", xlab="Rich Factor", main="Pathways Enriched",
     xlim=c(0, 0.4))
axis(side=2, at=1:28, labels=as.character(LApathways$Pathway), las=2, cex.axis=0.8)
axis(side=1, tick=LApathways$Rich.Factor, pos=0.4)

GO LA

GOLApathways <- read.table("Cytoscape Pathways/GO BP LA whole network.txt", header=TRUE, sep='\t')
GOLApathways <- GOLApathways[order(GOLApathways$p.Value, decreasing=TRUE), ]
##plot x v. y, change size, assign color to p-value
colorRamp <- colorRampPalette(c("turquoise1", "mediumblue"))(50)
par(mar=c(5, 20, 3, 8))
plot(GOLApathways$Rich.Factor, c(1:57), axes=F,
     panel.first=abline(h=1:57, v=seq(0.1, 0.5, 0.1), col="grey90"),
     cex=as.numeric(GOLApathways$Gene.Number)/10,
     col=colorRamp[cut(as.numeric(GOLApathways$p.Value), breaks=50)],
     pch=20, ylab=" ", xlab="Rich Factor", main="Pathways Enriched",
     xlim=c(0, 0.5))
axis(side=2,at=1:57, labels=as.character(GOLApathways$Pathway), las=2, cex.axis=0.8)
axis(side=1, tick=GOLApathways$Rich.Factor, pos=0.4)

GO LP

GOLPpathways <- read.table("Cytoscape Pathways/GO_BP_LPvLPS.txt", header=TRUE, sep='\t')
GOLPpathways <- GOLPpathways[order(GOLPpathways$p.Value, decreasing=TRUE), ]

colorRamp <- colorRampPalette(c("turquoise1", "mediumblue"))(50)
par(mar=c(5, 18, 3, 8))
plot(GOLPpathways$Rich.Factor, c(1:35), axes=F,
     panel.first=abline(h=1:35, v=seq(0.1, 0.6, 0.1), col="grey90"),
     cex=as.numeric(GOLPpathways$Gene.Number)/20,
     col=colorRamp[cut(as.numeric(GOLPpathways$p.Value), breaks=50)],
     pch=20, ylab=" ", xlab="Rich Factor", main="Pathways Enriched",
     xlim=c(0, 0.6))
axis(side=2, at=1:35, labels=as.character(GOLPpathways$Pathway), las=2, cex.axis=0.5)
axis(side=1, tick=GOLPpathways$Rich.Factor, pos=0.4)

Human Macrophages: M-CSF v GM-CSF: LPS, LPS+Adenosine, LPS+PGE2

Kajal Hamidzadeh

July 11, 2018

Libraries

Set working directory, import metadata and abundance files

Convert transcript ID to gene ID

Create counts table

Create DESeqDataSet

Bar Plot of Counts

Box Plot of Counts

Heatmap of Pearson Correlation

Median Pairwise Correlation

Filter and Normalize Counts

PCA

Plot PC1 v PC2

Euclidian Distance Heat Map

Correct for Patient in Limma model

Plot PC1 and PC2 with patient correction

Heatmap with patient correction??

DE analysis

M_LPS v M_NS

M_LPS v M_LA

M_LPS v M_LP

M_LP v M_NS

M_LA v M_NS

GM_LPS v GM_LP

Venn Diagram of shared DEG between M-CSF LPS v LA v LP

Volcano Plots

Rich Factor Graphs

GO terms shared LA LP

KEGG LA

GO LA

GO LP