In this version of Kajal’s document, I will repeat each step with some changes to see if I can make things a little easier for future modification.
I will therefore leave Kajal’s work unchanged and follow each block with an alternative.
design <- read.table('MetaData only 4 hour.txt', header=T, sep='\t')
design[["Patient"]] <- as.factor(design[["Patient"]])
design[["Stimulation"]] <- as.factor(design[["Stimulation"]])
design[["Batch"]] <- as.factor(design[["Batch"]])
design[["Growth"]] <- as.factor(design[["Growth"]])
files <- file.path("kallisto abundance files/", design$HPGL.Identifier, "abundance.tsv")
names(files) <- paste0("HPGL09", c(12:31, 42:60))
rownames(design) <- design[[1]]
stim_design_idx <- design[["Stimulation"]] != "NS"
stim_design <- design[stim_design_idx, ]
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
tx2gene <- getBM(attributes=c("ensembl_transcript_id", "ensembl_gene_id", "chromosome_name"), mart=ensembl)
## Cache found
## Note: importing `abundance.h5` is typically faster than `abundance.tsv`
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
## transcripts missing from tx2gene: 14613
## summarizing abundance
## summarizing counts
## summarizing length
## [1] 34431
write.table(txi.kallisto.tsv$counts, "TPM_MvGM_CDS_20180711.txt", col.names=T, row.names=F, quote=F)
write.csv(txi.kallisto.tsv$counts, "TPM_MvGM_20180711.csv", row.names=T, quote=F)
stim_counts <- txi.kallisto.tsv$counts
stim_counts_idx <- colnames(stim_counts) %in% rownames(stim_design)
stim_counts <- stim_counts[, stim_counts_idx]
This is a potentially important difference in how I and Kajal treated the data. This is because I created the expressionset without using ‘lengthScaledTPM’, but let tximport use the raw counts so that the various normalizations are not affected. E.g. using these scaled numbers results in less variance in the data and may lead to some confusion for the downstream tools (edger/limma/deseq/etc). On the other hand, it may lead to cleaner plots, I am not sure.
Not sure what this is actually doing
df <- data.frame(condition=paste(design$HPGL.Identifier, design$Growth,
design$Stimulation, design$Patient, sep="_"))
rownames(df)=colnames(txi.kallisto.tsv$counts)
dds <- DESeqDataSetFromTximport(txi.kallisto.tsv, df, ~condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using just counts from tximport
## [1] 34431
my_design <- design
rownames(my_design) <- design[[1]]
my_design[["condition"]] <- my_design[["Stimulation"]]
my_design[["file"]] <- glue::glue("preprocessing/{rownames(my_design)}/abundance.tsv")
colnames(my_design) <- tolower(colnames(my_design))
gene_info <- load_biomart_annotations(host="useast.ensembl.org")$annotation
## The biomart annotations file already exists, loading from it.
rownames(gene_info) <- make.names(gene_info[["ensembl_gene_id"]], unique=TRUE)
tx_gene_map <- gene_info[, c("ensembl_transcript_id", "ensembl_gene_id")]
hs_expt <- create_expt(metadata=my_design, gene_info=gene_info, tx_gene_map=tx_gene_map)
## Reading the sample metadata.
## The sample definitions comprises: 39 rows(samples) and 8 columns(metadata fields).
## Reading count tables.
## Using the transcript to gene mapping.
## Reading kallisto data with tximport.
## Finished reading count data.
## Matched 34431 annotations and counts.
## Bringing together the count matrix and gene information.
## The mapped IDs are not the rownames of your gene information, changing them now.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 34431 rows and 39 columns.
hs_expt <- set_expt_batches(hs_expt, fact="growth")
stim_expt <- subset_expt(hs_expt, subset="stimulation!='NS'")
## Using a subset expression.
## Warning in if (class(subset_design[[col]]) == "factor") {: the condition has
## length > 1 and only the first element will be used
## There were 39, now there are 30 samples.
par(mar=c(10, 4.5, 3.5, 1))
par(oma=c(0, 0, 0, 0))
barplot(colSums(txi.kallisto.tsv$counts), las=3, main='Raw Counts By Sample')
## condition min 1st median mean 3rd max
## 1: NS 14327794 16797487 17086171 18433743 17427077 25536756
## 2: LPS 14788518 17005543 18507594 19201561 20567000 26005826
## 3: LA 16014957 18662352 19800400 21804785 23207183 35215275
## 4: LP 15864738 17917203 19813259 20710341 22113833 28681140
dds <- estimateSizeFactors(dds)
ncts <- counts(dds, normalized=TRUE)
y <- log(ncts + 1)
par(mar=c(5, 5, 2, 1))
par(oma=c(0, 0, 0, 0))
boxplot(y, names=colnames(txi.kallisto.tsv$counts), las=3, main='Per Sample Log of Size-factor Counts')
dds_stim <- estimateSizeFactors(dds_stim)
stim_ncts <- counts(dds_stim, normalized=TRUE)
stim_y <- log(stim_ncts + 1)
par(mar=c(5, 5, 2, 1))
par(oma=c(0, 0, 0, 0))
boxplot(stim_y, names=colnames(stim_counts), las=3,
main='Per Sample Log of Size-factor Counts')
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, adding 1 to the data.
## Changed 645787 zero count features.
datCor <- cor(txi.kallisto.tsv$counts)
heatmap.2(datCor, Rowv=NA, Colv=NA,
margins=c(10, 10),
labRow=df$condition,
labCol=df$condition,
dendrogram="none",
scale="none",
trace="none",
srtCol=45, main='Pearson Correlation')
stim_datCor <- cor(stim_counts)
heatmap.2(stim_datCor, Rowv=NA, Colv=NA,
margins=c(10, 10),
labRow=df$condition,
labCol=df$condition,
dendrogram="none",
scale="none",
trace="none",
srtCol=45, main='Pearson Correlation')
corM <- matrixStats::rowMedians(cor(txi.kallisto.tsv$counts))
qs <- quantile(corM, p=c(1, 3)/4)
iqr <- diff(qs)
outLimit <- qs[1] - 1.5 * iqr
ylim <- c(pmin(min(corM), outLimit), max(corM))
cond <- paste(design$Growth, design$Stimulation, sep="_")
col <- ifelse(cond=="M_NS", "gray60",
ifelse(cond=="M_LPS", "deeppink3",
ifelse(cond=="M_LA", "darkseagreen2",
ifelse(cond=="M_LP", "lavender",
ifelse(cond=="GM_NS", "lightpink1",
ifelse(cond=="GM_LPS", "moccasin",
ifelse(cond=="GM_LA", "black",
ifelse(cond=="GM_LP", "springgreen4",
"blue2"))))))))
par(mar=c(5, 4.5, 2, 1))
plot(corM, xaxt="n", ylim=ylim, ylab="Median Pairwise Correlation", xlab="", main="", col=col, pch=16, cex=1.5)
axis(side=1, at=seq(along=corM), labels=colnames(txi.kallisto.tsv$counts), las=2)
abline(h=outLimit, lty=2)
abline(v=1:ncol(txi.kallisto.tsv$counts), lty=3, col="black")
stim_corM <- matrixStats::rowMedians(cor(stim_counts))
stim_qs <- quantile(stim_corM, p=c(1, 3)/4)
stim_iqr <- diff(stim_qs)
stim_outLimit <- stim_qs[1] - (1.5 * iqr)
stim_ylim <- c(pmin(min(stim_corM), stim_outLimit), max(stim_corM))
stim_cond <- paste(stim_design$Growth, stim_design$Stimulation, sep="_")
col <- ifelse(cond=="M_NS", "gray60",
ifelse(cond=="M_LPS", "deeppink3",
ifelse(cond=="M_LA", "darkseagreen2",
ifelse(cond=="M_LP", "lavender",
ifelse(cond=="GM_NS", "lightpink1",
ifelse(cond=="GM_LPS", "moccasin",
ifelse(cond=="GM_LA", "black",
ifelse(cond=="GM_LP", "springgreen4",
"blue2"))))))))
par(mar=c(5, 4.5, 2, 1))
plot(stim_corM, xaxt="n", ylim=ylim, ylab="Median Pairwise Correlation", xlab="", main="", col=col, pch=16, cex=1.5)
axis(side=1, at=seq(along=stim_corM), labels=colnames(stim_counts), las=2)
abline(h=stim_outLimit, lty=2)
abline(v=1:ncol(stim_counts), lty=3, col="black")
## Performing correlation.
## Performing correlation.
filterCounts = function (counts, lib.size = NULL, thresh = 1, minSamples = 2) {
cpms <- 2^log2CPM(counts, lib.size = lib.size)$y
keep <- rowSums(cpms > thresh) >= minSamples
counts <- counts[keep, ]
counts
}
x_table <- table(design$Stimulation)
dim(txi.kallisto.tsv$counts)
## [1] 34431 39
## [1] 12858 39
countsSubQ <- qNorm(counts)
x <- log2CPM(countsSubQ)
s <- makeSVD(x$y)
stim_table <- table(stim_design$Stimulation)
stim_counts <- filterCounts(stim_counts, thresh=1, minSamples=min(stim_table))
dim(stim_counts)
## [1] 34431 30
## propVar cumPropVar cond.R2 batch.R2
## 1 28.82 28.82 69.66 25.21
## 2 13.93 42.75 41.92 44.39
## 3 10.96 53.71 73.10 18.68
## 4 7.39 61.10 9.06 75.08
## 5 5.80 66.90 5.43 78.47
## propVar cumPropVar cond.R2 batch.R2
## 1 28.82 28.82 69.66 30.95
## 2 13.93 42.75 41.92 46.07
## 3 10.96 53.71 73.10 20.98
## 4 7.39 61.10 9.06 80.68
## 5 5.80 66.90 5.43 91.16
## propVar cumPropVar cond.R2 batch.R2
## 1 19.48 19.48 63.12 32.88
## 2 11.01 30.49 26.93 54.19
## 3 6.91 37.40 1.42 70.89
## 4 5.79 43.19 2.32 70.10
## 5 5.08 48.27 4.32 81.90
## propVar cumPropVar cond.R2 batch.R2
## 1 19.48 19.48 63.12 35.52
## 2 11.01 30.49 26.93 53.86
## 3 6.91 37.40 1.42 88.21
## 4 5.79 43.19 2.32 94.87
## 5 5.08 48.27 4.32 82.44
condnum <- as.numeric(as.factor(design$Stimulation))
condnum <- ifelse(condnum==4, "green",
ifelse(condnum==3, "lightblue",
ifelse(condnum==2, "pink",
ifelse(condnum==1, "purple", "black"))))
patnum <- as.numeric(as.factor(design$Patient))
patnum <- ifelse(patnum==4, 21,
ifelse(patnum==3, 22,
ifelse(patnum==2, 23,
ifelse(patnum==1, 24,
ifelse(patnum==5, 25, 1)))))
cbcbSEQ::plotPC(s$v, s$d, col="black", pch=patnum, bg=condnum)
legend(x=0.2, y=0.2, legend=unique(design$Stimulation), pch=22, col=0,
pt.bg=c("green","lightblue","pink","purple"), pt.cex=1.5, cex=0.5, bty="n")
legend(x=0.2, y=-0.1, legend=unique(design$Patient), pch=unique(patnum), col=0,
pt.bg="gray90", pt.cex=1.5, cex=0.5, bty="n")
stim_condnum <- as.numeric(as.factor(stim_design$Stimulation))
stim_condnum <- ifelse(stim_condnum==4, "green",
ifelse(stim_condnum==3, "lightblue",
ifelse(stim_condnum==2, "pink",
ifelse(stim_condnum==1, "purple", "black"))))
stim_patnum <- as.numeric(as.factor(stim_design$Patient))
stim_patnum <- ifelse(stim_patnum==4, 21,
ifelse(stim_patnum==3, 22,
ifelse(stim_patnum==2, 23,
ifelse(stim_patnum==1, 24,
ifelse(stim_patnum==5, 25, 1)))))
cbcbSEQ::plotPC(stim_s$v, stim_s$d, col="black", pch=stim_patnum, bg=stim_condnum)
legend(x=0.2, y=0.2, legend=unique(stim_design$Stimulation), pch=22, col=0,
pt.bg=c("green","lightblue","pink","purple"), pt.cex=1.5, cex=0.5, bty="n")
legend(x=0.2, y=-0.1, legend=unique(stim_design$Patient), pch=unique(stim_patnum), col=0,
pt.bg="gray90", pt.cex=1.5, cex=0.5, bty="n")
## This function will replace the expt$expressionset slot with:
## log2(quant(cbcb(data)))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 21885 low-count genes (12546 remaining).
## Step 2: normalizing the data with quant.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 841 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
## Not putting labels on the plot.
## This function will replace the expt$expressionset slot with:
## log2(quant(cbcb(data)))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 22066 low-count genes (12365 remaining).
## Step 2: normalizing the data with quant.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 644 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
## Not putting labels on the plot.
dists <- dist(t(counts))
mat <- as.matrix(dists)
rownames(mat) <- colnames(mat) <- cond
hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
vec.patient <- rainbow(nlevels(design$Patient), start=0, end=.8)
patient.color <- rep(0, length(design$Patient))
for (i in 1:length(design$Patient))
patient.color[i] <- vec.patient[design$Patient[i]==levels(design$Patient)]
vec.condition <- c("green", "lightblue", "pink", "purple")
condition.color <- rep(0, length(design$Stimulation))
for (i in 1:length(design$Stimulation))
condition.color[i] <- vec.condition[design$Stimulation[i]==levels(design$Stimulation)]
heatmap <- heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(11, 11), ColSideColors=condition.color,
RowSideColors=patient.color, key="FALSE", srtCol=45)
stim_dists <- dist(t(stim_counts))
stim_mat <- as.matrix(stim_dists)
rownames(stim_mat) <- colnames(stim_mat) <- stim_cond
stim_hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
stim_vec.patient <- rainbow(nlevels(stim_design$Patient), start=0, end=.8)
stim_patient.color <- rep(0, length(stim_design$Patient))
for (i in 1:length(stim_design$Patient))
stim_patient.color[i] <- stim_vec.patient[stim_design$Patient[i]==levels(stim_design$Patient)]
stim_vec.condition <- c("green", "lightblue", "pink", "purple")
stim_condition.color <- rep(0, length(stim_design$Stimulation))
for (i in 1:length(stim_design$Stimulation))
stim_condition.color[i] <- stim_vec.condition[stim_design$Stimulation[i]==levels(stim_design$Stimulation)]
heatmap <- heatmap.2(stim_mat, trace="none", col = rev(hmcol), margin=c(11, 11),
ColSideColors=stim_condition.color,
RowSideColors=stim_patient.color, key="FALSE", srtCol=45)
mod <- model.matrix(~design$Patient)
v <- voom(countsSubQ, mod)
fit <- lmFit(v)
newData <- residuals(fit, v)
s <- makeSVD(newData)
pcRes(s$v, s$d, cond, design$Batch)[1:5, ]
## propVar cumPropVar cond.R2 batch.R2
## 1 36.04 36.04 95.47 5.38
## 2 17.81 53.85 91.87 6.83
## 3 8.05 61.90 29.00 25.52
## 4 5.39 67.29 78.12 21.01
## 5 3.06 70.35 9.63 8.75
## propVar cumPropVar cond.R2 batch.R2
## 1 36.04 36.04 95.47 0
## 2 17.81 53.85 91.87 0
## 3 8.05 61.90 29.00 0
## 4 5.39 67.29 78.12 0
## 5 3.06 70.35 9.63 0
stim_table <- table(stim_design$Stimulation)
stim_counts <- filterCounts(stim_counts, thresh=2, minSamples=min(stim_table))
dim(stim_counts)
## [1] 34431 30
stim_countsSubQ <- qNorm(stim_counts)
stim_mod <- model.matrix(~stim_design$Patient)
stim_v <- voom(stim_countsSubQ, stim_mod)
stim_fit <- lmFit(stim_v)
stim_newData <- residuals(stim_fit, stim_v)
stim_s <- makeSVD(stim_newData)
pcRes(stim_s$v, stim_s$d, stim_cond, stim_design$Batch)[1:5, ]
## propVar cumPropVar cond.R2 batch.R2
## 1 23.26 23.26 95.02 13.72
## 2 8.03 31.29 18.28 26.78
## 3 6.01 37.30 81.15 22.10
## 4 4.60 41.90 13.36 4.61
## 5 4.35 46.25 6.15 28.65
## propVar cumPropVar cond.R2 batch.R2
## 1 23.26 23.26 95.02 0
## 2 8.03 31.29 18.28 0
## 3 6.01 37.30 81.15 0
## 4 4.60 41.90 13.36 0
## 5 4.35 46.25 6.15 0
gronum <- as.numeric(as.factor(design$Growth))
gronum <- ifelse(gronum==2, 19,
ifelse(gronum==1, 15, 1))
patnum <- as.numeric(as.factor(design$Patient))
patnum <- ifelse(patnum==1, "pink",
ifelse(patnum==2, "green",
ifelse(patnum==3, "blue",
ifelse(patnum==4, "yellow",
ifelse(patnum==5, "black", "orange")))))
samplenum <- as.numeric(as.factor(design$Stimulation))
samplenum <- ifelse(samplenum==4, "green",
ifelse(samplenum==3, "blue",
ifelse(samplenum==1, "black",
ifelse(samplenum==2, "yellow", "grey"))))
plotPC(s$v, s$d, pch=gronum, col=samplenum, cex=2)
legend(x=0.05, y=0.3, legend=c("NS", "LPS", "LP", "LA"), pch=22, col=0,
pt.bg=c("green", "blue", "black", "yellow"), pt.cex=1.5, cex=1, bty="n")
legend(x=0.05, y=0.1, legend=unique(design$Growth), pch=unique(gronum), col="black",
pt.bg="black", pt.cex=1.0, cex=1, bty="n")
stim_gronum <- as.numeric(as.factor(stim_design$Growth))
stim_gronum <- ifelse(stim_gronum==2, 19,
ifelse(stim_gronum==1, 15, 1))
stim_patnum <- as.numeric(as.factor(stim_design$Patient))
stim_patnum <- ifelse(stim_patnum==1, "pink",
ifelse(stim_patnum==2, "green",
ifelse(stim_patnum==3, "blue",
ifelse(stim_patnum==4, "yellow",
ifelse(stim_patnum==5, "black", "orange")))))
stim_samplenum <- as.numeric(as.factor(stim_design$Stimulation))
stim_samplenum <- ifelse(stim_samplenum==3, "green",
ifelse(stim_samplenum==2, "blue",
ifelse(stim_samplenum==1, "black", "grey")))
plotPC(stim_s$v, stim_s$d, pch=stim_gronum, col=stim_samplenum, cex=2)
legend(x=0.05, y=0.3, legend=c("LPS", "LP", "LA"), pch=22, col=0,
pt.bg=c("green", "blue", "black", "yellow"), pt.cex=1.5, cex=1, bty="n")
legend(x=0.05, y=0.1, legend=unique(stim_design$Growth), pch=unique(stim_gronum), col="black",
pt.bg="black", pt.cex=1.0, cex=1, bty="n")
##stim_counts <- filterCounts(stim_counts, thresh=1, minSamples=min(x))
##dim(stim_counts)
##stim_countsSubQ <- qNorm(stim_counts)
##stim_x <- log2CPM(stim_countsSubQ)
##stim_s <- makeSVD(stim_x$y)
hss <- set_expt_batches(stim_expt, fact="patient")
hss <- sm(normalize_expt(hss, filter=TRUE))
hss <- sm(normalize_expt(hss, norm="quant"))
hss <- sm(normalize_expt(hss, convert="cpm"))
hss <- sm(normalize_expt(hss, transform="log2"))
hss <- sm(normalize_expt(hss, batch="limma"))
hss <- sm(set_expt_batches(hss, fact="growth"))
plot_pca(hss, plot_labels=FALSE)$plot
## Not putting labels on the plot.
dists <- dist(t(newData))
mat <- as.matrix(dists)
rownames(mat) <- colnames(mat) <- cond
heatmap <- heatmap.2(mat, trace="none", col = rev(hmcol), margin=c(11, 11),
ColSideColors=condition.color,
RowSideColors=patient.color, key="FALSE", srtCol=45)
countsSubQ <- qNorm(counts)
patient <- design$Patient
mod <- model.matrix(~0+cond+patient, data=design)
voom(countsSubQ, mod, plot=TRUE)
## An object of class "EList"
## $E
## HPGL0912 HPGL0913 HPGL0914 HPGL0915 HPGL0916 HPGL0917 HPGL0918
## ENSG00000000419 5.123 5.127 5.025 4.474 4.7167 5.046 5.2660
## ENSG00000000457 3.299 3.167 3.050 3.025 3.1611 3.545 4.0989
## ENSG00000000460 2.399 2.529 2.602 2.436 2.6406 2.059 1.6421
## ENSG00000000938 10.042 9.248 9.983 9.741 9.3623 9.819 8.8113
## ENSG00000000971 -2.496 -0.779 -1.031 -1.766 -0.1931 -1.383 0.4631
## HPGL0919 HPGL0920 HPGL0921 HPGL0922 HPGL0923 HPGL0924 HPGL0925
## ENSG00000000419 5.299 4.6931 4.856 5.1077 5.370 5.2903 4.80840
## ENSG00000000457 3.420 3.6973 4.106 3.6041 3.993 3.3724 3.78760
## ENSG00000000460 2.213 1.9492 1.796 2.6304 2.610 2.2423 2.51606
## ENSG00000000938 9.567 9.2412 9.073 9.4465 8.450 9.6454 8.85937
## ENSG00000000971 -2.027 -0.7621 -1.940 -0.4054 1.069 -0.1318 -0.04098
## HPGL0926 HPGL0927 HPGL0928 HPGL0929 HPGL0930 HPGL0931 HPGL0942
## ENSG00000000419 5.3798 5.0693 5.3190 4.968 4.950 4.7623 5.103
## ENSG00000000457 3.9404 3.8097 3.8024 3.783 4.006 4.3877 3.224
## ENSG00000000460 1.9282 2.4928 2.1826 2.361 2.086 2.1600 2.751
## ENSG00000000938 8.4519 9.4225 8.4160 9.486 8.577 8.2824 10.605
## ENSG00000000971 -0.5389 -0.1523 0.6414 -1.202 0.322 -0.5603 -1.047
## HPGL0943 HPGL0944 HPGL0945 HPGL0946 HPGL0947 HPGL0948 HPGL0949
## ENSG00000000419 5.085 4.875 4.822 5.2504 5.259 5.0182 5.254
## ENSG00000000457 3.306 3.497 3.745 3.5212 3.317 3.2637 3.173
## ENSG00000000460 2.456 3.031 2.248 2.3073 1.611 2.3986 2.790
## ENSG00000000938 10.368 10.031 10.404 10.3682 9.790 10.3477 9.189
## ENSG00000000971 -2.082 1.534 1.091 -0.2224 1.582 -0.3172 2.536
## HPGL0950 HPGL0951 HPGL0952 HPGL0953 HPGL0954 HPGL0955 HPGL0956
## ENSG00000000419 5.071 5.4803 5.276 5.194 5.100 5.218 5.5547
## ENSG00000000457 3.587 3.5583 3.552 3.364 3.632 3.901 3.6442
## ENSG00000000460 2.524 2.6507 2.332 2.304 2.944 2.610 2.1552
## ENSG00000000938 9.698 10.2341 9.430 10.179 9.337 9.248 10.2084
## ENSG00000000971 2.738 -0.1407 2.251 -2.234 1.842 2.606 -0.6589
## HPGL0957 HPGL0958 HPGL0959 HPGL0960
## ENSG00000000419 5.242 5.194 5.326 5.216
## ENSG00000000457 3.645 3.364 3.834 3.954
## ENSG00000000460 1.915 2.304 3.125 2.645
## ENSG00000000938 9.380 10.179 9.012 9.083
## ENSG00000000971 2.058 -2.234 2.872 3.217
## 12853 more rows ...
##
## $weights
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 19.106 19.258 18.7306 17.802 18.135 19.731 19.871 19.3575 18.449 18.777
## [2,] 10.069 10.557 9.3680 10.323 11.535 12.997 13.503 12.2388 13.254 14.518
## [3,] 8.020 7.011 7.7570 8.713 7.609 6.088 5.334 5.8931 6.614 5.783
## [4,] 22.325 22.799 22.3503 22.759 22.787 22.528 23.008 22.5562 22.972 22.998
## [5,] 0.963 2.059 0.7704 1.865 1.853 1.010 2.173 0.8063 1.967 1.955
## [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20]
## [1,] 20.389 20.526 20.045 19.179 19.493 19.647 19.796 19.276 18.364 18.691
## [2,] 12.838 13.342 12.084 13.096 14.354 13.866 14.375 13.107 14.125 15.401
## [3,] 7.523 6.579 7.277 8.183 7.138 7.081 6.198 6.851 7.702 6.720
## [4,] 22.721 23.162 22.749 23.135 23.154 22.799 23.209 22.828 23.189 23.204
## [5,] 1.584 3.684 1.242 3.295 3.271 1.469 3.371 1.156 3.019 2.998
## [,21] [,22] [,23] [,24] [,25] [,26] [,27] [,28] [,29] [,30]
## [1,] 19.615 19.245 18.332 18.658 20.302 20.446 19.960 19.087 19.400 20.627
## [2,] 11.497 10.760 11.754 13.009 11.125 11.619 10.397 11.379 12.620 12.185
## [3,] 8.183 7.917 8.889 7.764 7.317 6.402 7.078 7.959 6.943 8.198
## [4,] 22.026 22.050 22.420 22.447 22.219 22.679 22.244 22.640 22.668 22.319
## [5,] 1.667 1.305 3.498 3.473 2.776 6.772 2.118 6.093 6.052 2.311
## [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38] [,39]
## [1,] 20.752 20.306 19.458 19.773 20.818 20.944 20.521 19.697 19.988
## [2,] 12.693 11.438 12.443 13.706 12.591 13.103 11.841 12.859 14.111
## [3,] 7.167 7.931 8.905 7.778 7.678 6.714 7.428 8.348 7.285
## [4,] 22.792 22.345 22.753 22.781 22.379 22.858 22.406 22.819 22.848
## [5,] 5.630 1.782 5.060 5.024 2.515 6.135 1.930 5.523 5.485
## 12853 more rows ...
##
## $design
## condGM_LA condGM_LP condGM_LPS condGM_NS condM_LA condM_LP condM_LPS
## HPGL0912 0 0 0 0 0 0 0
## HPGL0913 0 0 0 0 0 0 0
## HPGL0914 0 0 0 0 0 0 0
## HPGL0915 0 0 0 0 0 0 0
## HPGL0916 0 0 0 0 0 0 0
## condM_NS patientH2 patientH3 patientH4 patientH5
## HPGL0912 1 0 0 0 0
## HPGL0913 1 1 0 0 0
## HPGL0914 1 0 1 0 0
## HPGL0915 1 0 0 1 0
## HPGL0916 1 0 0 0 1
## 34 more rows ...
##
## $targets
## lib.size
## HPGL0912 20054310
## HPGL0913 20054287
## HPGL0914 20054287
## HPGL0915 20054311
## HPGL0916 20054294
## 34 more rows ...
eBayes finds an F-statistic from the set of t-statistics for that gene
M_LPS.M_NS.contr.mat <- makeContrasts(M_LPSvM_NS=(condM_LPS-condM_NS), levels=v$design)
M_LPS.M_NS.fit <- contrasts.fit(fit, M_LPS.M_NS.contr.mat)
M_LPS.M_NS.eb <- eBayes(M_LPS.M_NS.fit)
M_LPS.M_NS.topTab <- topTable(M_LPS.M_NS.eb, coef="M_LPSvM_NS", number=nrow(v$E))
M_LPS.M_NS.topTab <- cbind(rownames(M_LPS.M_NS.topTab), M_LPS.M_NS.topTab)
colnames(M_LPS.M_NS.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LPS.M_NS.topTab) <- c(1:nrow(M_LPS.M_NS.topTab))
write.csv(M_LPS.M_NS.topTab, file="topTab_M_LPSvM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)
Limit list to genes with an adjusted p value < 0.05
M_LPS.M_NS.sigGenes <- M_LPS.M_NS.topTab[M_LPS.M_NS.topTab$adj.P.Val <0.05, ]
length(M_LPS.M_NS.sigGenes$ID)
## [1] 4840
Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LPS.M_NS.sigGenesFold1 <- subset(M_LPS.M_NS.sigGenes, abs(logFC) > 1)
length(M_LPS.M_NS.sigGenesFold1$ID)
## [1] 1350
33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M33;33M35;17M35;17M35;17M35;17M35;17M35;17MFilter out rows with less than 4-fold change (log2 fold change of > 2)
M_LPS.M_NS.sigGenesFold2 <- subset(M_LPS.M_NS.sigGenes, abs(logFC) > 2)
length(M_LPS.M_NS.sigGenesFold2$ID)
## [1] 431
Make an MA plot
sel <- M_LPS.M_NS.topTab$adj.P.Val < 0.05
top <- M_LPS.M_NS.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E
plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
main="M_LPSvM_NS adjusted",
ylab="log FC", xlab="Average Expression",
sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")
Annotate sigGenes list using Biomart
M_LPS.M_NS.sigGenes <- M_LPS.M_NS.sigGenes[order(-M_LPS.M_NS.sigGenes$logFC), ]
sigGenes <- M_LPS.M_NS.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
## Error in bmRequest(request = request, verbose = verbose): Internal Server Error (HTTP 500).
ids <- sigGenes$ID
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id", values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")
desc$Description <- gsub(",", "", desc$Description)
DEG_LPS <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG_LPS <- subset(DEG_LPS, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG_LPS <- DEG_LPS[order(-DEG_LPS$logFC), ]
##Filter out Genes -1<FC<1
DEG_LPS <- subset(DEG_LPS, abs(DEG_LPS$logFC)>1)
##Save DE genes
write.table(DEG_LPS, "DEG_M_LPSvM_NS_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG_LPS, "DEG_M_LPSvM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)
#eBayes finds an F-statistic from the set of t-statistics for that gene
M_LPS.M_LA.contr.mat <- makeContrasts(M_LPSvM_LA=((condM_LA-condM_NS)-(condM_LPS-condM_NS)), levels=v$design)
M_LPS.M_LA.fit <- contrasts.fit(fit, M_LPS.M_LA.contr.mat)
M_LPS.M_LA.eb <- eBayes(M_LPS.M_LA.fit)
M_LPS.M_LA.topTab <- topTable(M_LPS.M_LA.eb, coef="M_LPSvM_LA", number=nrow(v$E))
M_LPS.M_LA.topTab <- cbind(rownames(M_LPS.M_LA.topTab), M_LPS.M_LA.topTab)
colnames(M_LPS.M_LA.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LPS.M_LA.topTab) <- c(1:nrow(M_LPS.M_LA.topTab))
write.csv(M_LPS.M_LA.topTab, file="topTab_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.csv", row.names=F, quote=F)
##Limit list to genes with an adjusted p value < 0.05
M_LPS.M_LA.sigGenes <- M_LPS.M_LA.topTab[M_LPS.M_LA.topTab$adj.P.Val <0.05, ]
length(M_LPS.M_LA.sigGenes$ID)
## [1] 955
##[1] 617
##Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LPS.M_LA.sigGenesFold1 <- subset(M_LPS.M_LA.sigGenes, abs(logFC) > 1)
length(M_LPS.M_LA.sigGenesFold1$ID)
## [1] 256
##[1] 240
##Filter out rows with less than 4-fold change (log2 fold change of > 2)
M_LPS.M_LA.sigGenesFold2 <- subset(M_LPS.M_LA.sigGenes, abs(logFC) > 2)
length(M_LPS.M_LA.sigGenesFold2$ID)
## [1] 57
##[1] 69
##Make an MA plot
sel <- M_LPS.M_LA.topTab$adj.P.Val < 0.05
top <- M_LPS.M_LA.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E
plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
main="M_LPSvM_LA adjusted",
ylab="log FC", xlab="Average Expression",
sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")
## pdf
## 3
## png
## 2
## png
## 3
## png
## 2
M_LPS.M_LA.sigGenes <- M_LPS.M_LA.sigGenes[order(-M_LPS.M_LA.sigGenes$logFC), ]
##Annotate sigGenes list using Biomart
sigGenes <- M_LPS.M_LA.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
## Error in bmRequest(request = request, verbose = verbose): Internal Server Error (HTTP 500).
ids <- sigGenes$ID
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")
##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)
DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]
##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)
##Save DE genes
write.table(DEG, "DEG_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_M_LPSvM_LA_CDS_limmabatchcorrection_20171120rev.csv", row.names=F, quote=F)
#eBayes finds an F-statistic from the set of t-statistics for that gene
M_LPS.M_LP.contr.mat <- makeContrasts(M_LPSvM_LP=((condM_LP-condM_NS)-(condM_LPS-condM_NS)), levels=v$design)
M_LPS.M_LP.fit <- contrasts.fit(fit, M_LPS.M_LP.contr.mat)
M_LPS.M_LP.eb <- eBayes(M_LPS.M_LP.fit)
M_LPS.M_LP.topTab <- topTable(M_LPS.M_LP.eb, coef="M_LPSvM_LP", number=nrow(v$E))
M_LPS.M_LP.topTab <- cbind(rownames(M_LPS.M_LP.topTab), M_LPS.M_LP.topTab)
colnames(M_LPS.M_LP.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LPS.M_LP.topTab) <- c(1:nrow(M_LPS.M_LP.topTab))
write.csv(M_LPS.M_LP.topTab, file="topTab_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.csv",
row.names=F, quote=F)
##Limit list to genes with an adjusted p value < 0.05
M_LPS.M_LP.sigGenes <- M_LPS.M_LP.topTab[M_LPS.M_LP.topTab$adj.P.Val <0.05, ]
length(M_LPS.M_LP.sigGenes$ID)
## [1] 1993
##[1] 1473
##Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LPS.M_LP.sigGenesFold1 <- subset(M_LPS.M_LP.sigGenes, abs(logFC) > 1)
length(M_LPS.M_LP.sigGenesFold1$ID)
## [1] 489
##[1] 467
##Filter out rows with less than 4-fold change (log2 fold change of > 2)
M_LPS.M_LP.sigGenesFold2 <- subset(M_LPS.M_LP.sigGenes, abs(logFC) > 2)
length(M_LPS.M_LP.sigGenesFold2$ID)
## [1] 91
##[1] 125
##Make an MA plot
sel <- M_LPS.M_LP.topTab$adj.P.Val < 0.05
top <- M_LPS.M_LP.topTab
sub <- paste("No. of sig. genes: ", sum(sel),"/",length(sel))
cpm <- v$E
plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
main="M_LPSvM_LP adjusted",
ylab="log FC", xlab="Average Expression",
sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")
## pdf
## 3
## png
## 2
## png
## 3
## png
## 2
M_LPS.M_LP.sigGenes <- M_LPS.M_LP.sigGenes[order(-M_LPS.M_LP.sigGenes$logFC), ]
##Annotate sigGenes list using Biomart
sigGenes <- M_LPS.M_LP.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
## Error in bmRequest(request = request, verbose = verbose): Internal Server Error (HTTP 500).
ids <- sigGenes$ID
##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")
##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)
DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]
##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)
##Save DE genes
write.table(DEG, "DEG_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.csv", row.names=F, quote=F)
##eBayes finds an F-statistic from the set of t-statistics for that gene
M_LP.M_NS.contr.mat <- makeContrasts(M_LPvM_NS=(condM_LP-condM_NS), levels=v$design)
M_LP.M_NS.fit <- contrasts.fit(fit, M_LP.M_NS.contr.mat)
M_LP.M_NS.eb <- eBayes(M_LP.M_NS.fit)
M_LP.M_NS.topTab <- topTable(M_LP.M_NS.eb, coef="M_LPvM_NS", number=nrow(v$E))
M_LP.M_NS.topTab <- cbind(rownames(M_LP.M_NS.topTab), M_LP.M_NS.topTab)
colnames(M_LP.M_NS.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LP.M_NS.topTab) <- c(1:nrow(M_LP.M_NS.topTab))
write.csv(M_LP.M_NS.topTab, file="topTab_M_LPvM_NS_CDS_limmabatchcorrection_20171018.csv", row.names=F, quote=F)
##Limit list to genes with an adjusted p value < 0.05
M_LP.M_NS.sigGenes <- M_LP.M_NS.topTab[M_LP.M_NS.topTab$adj.P.Val <0.05, ]
length(M_LP.M_NS.sigGenes$ID)
## [1] 4603
##[1] 4386
##Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LP.M_NS.sigGenesFold1 <- subset(M_LP.M_NS.sigGenes, abs(logFC) > 1)
length(M_LP.M_NS.sigGenesFold1$ID)
## [1] 1422
##[1] 1455
##Filter out rows with less than 4-fold change (log2 fold change of > 2)
M_LP.M_NS.sigGenesFold2 <- subset(M_LP.M_NS.sigGenes, abs(logFC) > 2)
length(M_LP.M_NS.sigGenesFold2$ID)
## [1] 448
##[1] 519
##Make an MA plot
sel <- M_LP.M_NS.topTab$adj.P.Val < 0.05
top <- M_LP.M_NS.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E
plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
main="M_LPvM_NS adjusted",
ylab="log FC", xlab="Average Expression",
sub=sub)
points(rowMeans(cpm[top$ID,])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")
## pdf
## 3
## png
## 2
## png
## 3
## png
## 2
M_LP.M_NS.sigGenes <- M_LP.M_NS.sigGenes[order(-M_LP.M_NS.sigGenes$logFC), ]
##Annotate sigGenes list using Biomart
sigGenes <- M_LP.M_NS.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
## Error in bmRequest(request = request, verbose = verbose): Internal Server Error (HTTP 500).
ids <- sigGenes$ID
##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")
##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)
DEG_LP4 <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG_LP4 <- subset(DEG_LP4, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG_LP4 <- DEG_LP4[order(-DEG_LP4$logFC), ]
##Filter out Genes -1<FC<1
DEG_LP4 <- subset(DEG_LP4, abs(DEG_LP4$logFC)>1)
##Save DE genes
write.table(DEG_LP4, "DEG_M_LPvM_NS_CDS_limmabatchcorrection_20171018.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG_LP4, "DEG_M_LPvM_NS_CDS_limmabatchcorrection_20171018.csv", row.names=F, quote=F)
#eBayes finds an F-statistic from the set of t-statistics for that gene
M_LA.M_NS.contr.mat <- makeContrasts(M_LAvM_NS=(condM_LA-condM_NS), levels=v$design)
M_LA.M_NS.fit <- contrasts.fit(fit, M_LA.M_NS.contr.mat)
M_LA.M_NS.eb <- eBayes(M_LA.M_NS.fit)
M_LA.M_NS.topTab <- topTable(M_LA.M_NS.eb, coef="M_LAvM_NS", number=nrow(v$E))
M_LA.M_NS.topTab <- cbind(rownames(M_LA.M_NS.topTab), M_LA.M_NS.topTab)
colnames(M_LA.M_NS.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(M_LA.M_NS.topTab) <- c(1:nrow(M_LA.M_NS.topTab))
write.csv(M_LA.M_NS.topTab, file="topTab_M_LAvM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)
##Limit list to genes with an adjusted p value < 0.05
M_LA.M_NS.sigGenes <- M_LA.M_NS.topTab[M_LA.M_NS.topTab$adj.P.Val <0.05, ]
length(M_LA.M_NS.sigGenes$ID)
## [1] 4922
##[1] 617
##Filter out rows with less than 2-fold change (log2 fold change of > 1)
M_LA.M_NS.sigGenesFold1 <- subset(M_LA.M_NS.sigGenes, abs(logFC) > 1)
length(M_LA.M_NS.sigGenesFold1$ID)
## [1] 1493
##[1] 240
##Filter out rows with less than 4-fold change (log2 fold change of > 2)
M_LA.M_NS.sigGenesFold2 <- subset(M_LA.M_NS.sigGenes, abs(logFC) > 2)
length(M_LA.M_NS.sigGenesFold2$ID)
## [1] 478
##[1] 69
##Make an MA plot
sel <- M_LA.M_NS.topTab$adj.P.Val < 0.05
top <- M_LA.M_NS.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E
plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
main="M_LAvM_NS adjusted",
ylab="log FC", xlab="Average Expression",
sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")
## pdf
## 3
## png
## 2
## png
## 3
## png
## 2
M_LA.M_NS.sigGenes <- M_LA.M_NS.sigGenes[order(-M_LA.M_NS.sigGenes$logFC), ]
##Annotate sigGenes list using Biomart
sigGenes <- M_LA.M_NS.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
## Error in bmRequest(request = request, verbose = verbose): Internal Server Error (HTTP 500).
ids <- sigGenes$ID
##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")
##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)
DEG_LA4 <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG_LA4 <- subset(DEG_LA4, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG_LA4 <- DEG_LA4[order(-DEG_LA4$logFC), ]
##Filter out Genes -1<FC<1
DEG_LA4 <- subset(DEG_LA4, abs(DEG_LA4$logFC)>1)
##Save DE genes
write.table(DEG_LA4, "DEG_M_LAvM_NS_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG_LA4, "DEG_M_LAvM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)
##GM_LPS v. GM_NS
#eBayes finds an F-statistic from the set of t-statistics for that gene
GM_LPS.GM_NS.contr.mat <- makeContrasts(GM_LPSvGM_NS=(condGM_LPS-condGM_NS), levels=v$design)
GM_LPS.GM_NS.fit <- contrasts.fit(fit, GM_LPS.GM_NS.contr.mat)
GM_LPS.GM_NS.eb <- eBayes(GM_LPS.GM_NS.fit)
GM_LPS.GM_NS.topTab <- topTable(GM_LPS.GM_NS.eb, coef="GM_LPSvGM_NS", number=nrow(v$E))
GM_LPS.GM_NS.topTab <- cbind(rownames(GM_LPS.GM_NS.topTab), GM_LPS.GM_NS.topTab)
colnames(GM_LPS.GM_NS.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(GM_LPS.GM_NS.topTab) <- c(1:nrow(GM_LPS.GM_NS.topTab))
write.csv(GM_LPS.GM_NS.topTab, file="topTab_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)
##Limit list to genes with an adjusted p value < 0.05
GM_LPS.GM_NS.sigGenes <- GM_LPS.GM_NS.topTab[GM_LPS.GM_NS.topTab$adj.P.Val <0.05, ]
length(GM_LPS.GM_NS.sigGenes$ID)
## [1] 653
##[1] 302
##Filter out rows with less than 2-fold change (log2 fold change of > 1)
GM_LPS.GM_NS.sigGenesFold1 <- subset(GM_LPS.GM_NS.sigGenes, abs(logFC) > 1)
length(GM_LPS.GM_NS.sigGenesFold1$ID)
## [1] 260
##[1] 197
##Filter out rows with less than 4-fold change (log2 fold change of > 2)
GM_LPS.GM_NS.sigGenesFold2 <- subset(GM_LPS.GM_NS.sigGenes, abs(logFC) > 2)
length(GM_LPS.GM_NS.sigGenesFold2$ID)
## [1] 99
##[1] 101
##Make an MA plot
sel <- GM_LPS.GM_NS.topTab$adj.P.Val < 0.05
top <- GM_LPS.GM_NS.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E
plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
main="GM_LPSvGM_NS adjusted",
ylab="log FC", xlab="Average Expression",
sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")
## pdf
## 3
## png
## 2
## png
## 3
## png
## 2
GM_LPS.GM_NS.sigGenes <- GM_LPS.GM_NS.sigGenes[order(-GM_LPS.GM_NS.sigGenes$logFC), ]
##Annotate sigGenes list using Biomart
sigGenes <- GM_LPS.GM_NS.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
## Error in bmRequest(request = request, verbose = verbose): Internal Server Error (HTTP 500).
ids <- sigGenes$ID
##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")
##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)
DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]
##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)
##Save DE genes
write.table(DEG, "DEG_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_GM_LPSvGM_NS_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)
#eBayes finds an F-statistic from the set of t-statistics for that gene
GM_LPS.GM_LP.contr.mat <- makeContrasts(GM_LPSvGM_LP=((condGM_LPS-condGM_NS)-(condGM_LP-condGM_NS)), levels=v$design)
GM_LPS.GM_LP.fit <- contrasts.fit(fit, GM_LPS.GM_LP.contr.mat)
GM_LPS.GM_LP.eb <- eBayes(GM_LPS.GM_LP.fit)
GM_LPS.GM_LP.topTab <- topTable(GM_LPS.GM_LP.eb, coef="GM_LPSvGM_LP", number=nrow(v$E))
GM_LPS.GM_LP.topTab <- cbind(rownames(GM_LPS.GM_LP.topTab), GM_LPS.GM_LP.topTab)
colnames(GM_LPS.GM_LP.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(GM_LPS.GM_LP.topTab) <- c(1:nrow(GM_LPS.GM_LP.topTab))
write.csv(GM_LPS.GM_LP.topTab, file="topTab_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171023.csv", row.names=F, quote=F)
##Limit list to genes with an adjusted p value < 0.05
GM_LPS.GM_LP.sigGenes <- GM_LPS.GM_LP.topTab[GM_LPS.GM_LP.topTab$adj.P.Val <0.05, ]
length(GM_LPS.GM_LP.sigGenes$ID)
## [1] 350
##[1] 194
##Filter out rows with less than 2-fold change (log2 fold change of > 1)
GM_LPS.GM_LP.sigGenesFold1 <- subset(GM_LPS.GM_LP.sigGenes, abs(logFC) > 1)
length(GM_LPS.GM_LP.sigGenesFold1$ID)
## [1] 126
##[1] 114
##Filter out rows with less than 4-fold change (log2 fold change of > 2)
GM_LPS.GM_LP.sigGenesFold2 <- subset(GM_LPS.GM_LP.sigGenes, abs(logFC) > 2)
length(GM_LPS.GM_LP.sigGenesFold2$ID)
## [1] 28
##[1] 27
##Make an MA plot
sel <- GM_LPS.GM_LP.topTab$adj.P.Val < 0.05
top <- GM_LPS.GM_LP.topTab
sub <- paste("No. of sig. genes: ", sum(sel), "/", length(sel))
cpm <- v$E
plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
main="GM_LPSvGM_LP adjusted",
ylab="log FC", xlab="Average Expression",
sub=sub)
points(rowMeans(cpm[top$ID, ])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")
## pdf
## 3
## png
## 2
## png
## 3
## png
## 2
GM_LPS.GM_LP.sigGenes <- GM_LPS.GM_LP.sigGenes[order(-GM_LPS.GM_LP.sigGenes$logFC), ]
##Annotate sigGenes list using Biomart
sigGenes <- GM_LPS.GM_LP.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
## Error in bmRequest(request = request, verbose = verbose): Internal Server Error (HTTP 500).
ids <- sigGenes$ID
##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")
##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)
DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]
##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)
##Save DE genes
write.table(DEG, "DEG_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)
##GM_LPS v GM_LA
#eBayes finds an F-statistic from the set of t-statistics for that gene
GM_LPS.GM_LA.contr.mat <- makeContrasts(GM_LPSvGM_LA=((condGM_LPS-condGM_NS)-(condGM_LA-condGM_NS)), levels=v$design)
GM_LPS.GM_LA.fit <- contrasts.fit(fit, GM_LPS.GM_LA.contr.mat)
GM_LPS.GM_LA.eb <- eBayes(GM_LPS.GM_LA.fit)
GM_LPS.GM_LA.topTab <- topTable(GM_LPS.GM_LA.eb, coef="GM_LPSvGM_LA", number=nrow(v$E))
GM_LPS.GM_LA.topTab <- cbind(rownames(GM_LPS.GM_LA.topTab), GM_LPS.GM_LA.topTab)
colnames(GM_LPS.GM_LA.topTab) <- c("ID", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "B")
rownames(GM_LPS.GM_LA.topTab) <- c(1:nrow(GM_LPS.GM_LA.topTab))
write.csv(GM_LPS.GM_LA.topTab, file="topTab_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171023.csv", row.names=F, quote=F)
##Limit list to genes with an adjusted p value < 0.05
GM_LPS.GM_LA.sigGenes <- GM_LPS.GM_LA.topTab[GM_LPS.GM_LA.topTab$adj.P.Val <0.05, ]
length(GM_LPS.GM_LA.sigGenes$ID)
## [1] 12
##[1] 17
##Filter out rows with less than 2-fold change (log2 fold change of > 1)
GM_LPS.GM_LA.sigGenesFold1 <- subset(GM_LPS.GM_LA.sigGenes, abs(logFC) > 1)
length(GM_LPS.GM_LA.sigGenesFold1$ID)
## [1] 7
##[1] 15
##Filter out rows with less than 4-fold change (log2 fold change of > 2)
GM_LPS.GM_LA.sigGenesFold2 <- subset(GM_LPS.GM_LA.sigGenes, abs(logFC) > 2)
length(GM_LPS.GM_LA.sigGenesFold2$ID)
## [1] 1
##[1] 7
##Make an MA plot
sel <- GM_LPS.GM_LA.topTab$adj.P.Val < 0.05
top <- GM_LPS.GM_LA.topTab
sub <- paste("No. of sig. genes: ", sum(sel),"/", length(sel))
cpm <- v$E
plot(rowMeans(cpm[top$ID,]), top$logFC, pch=16, cex=0.5, col="darkgrey",
main="GM_LPSvGM_LA adjusted",
ylab="log FC", xlab="Average Expression",
sub=sub)
points(rowMeans(cpm[top$ID,])[sel], top$logFC[sel], col="red", cex=0.5)
abline(h=c(-1, 0, 1), col="red")
## pdf
## 3
## png
## 2
## png
## 3
## png
## 2
GM_LPS.GM_LA.sigGenes <- GM_LPS.GM_LA.sigGenes[order(-GM_LPS.GM_LA.sigGenes$logFC), ]
##Annotate sigGenes list using Biomart
sigGenes <- GM_LPS.GM_LA.sigGenes
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", host="www.ensembl.org")
## Error in bmRequest(request = request, verbose = verbose): Internal Server Error (HTTP 500).
ids <- sigGenes$ID
##To see possibilities for attributes, use head(listAttributes(ensembl), n=20L)
desc <- getBM(attributes=c("ensembl_gene_id", "hgnc_symbol", "description", "gene_biotype"), filters="ensembl_gene_id",
values=ids, mart=ensembl)
## Cache found
colnames(desc)=c("ID", "Symbol", "Description", "Type")
##Remove commas from description
desc$Description <- gsub(",", "", desc$Description)
DEG <- merge(sigGenes, desc, by="ID", all=TRUE)
DEG <- subset(DEG, select=c(ID, Symbol, Description, logFC, adj.P.Val, AveExpr, Type), FS="/t")
DEG <- DEG[order(-DEG$logFC), ]
##Filter out Genes -1<FC<1
DEG <- subset(DEG, abs(DEG$logFC)>1)
##Save DE genes
write.table(DEG, "DEG_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171120.txt", col.names=T, row.names=F, quote=F)
write.csv(DEG, "DEG_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171120.csv", row.names=F, quote=F)
M_LPS v M_LA
res <- read.table("topTab_M_LPSvM_LA_CDS_limmabatchcorrection_20171120.txt", header=TRUE)
head(res)
with(res, plot(logFC, -log10(P.Value), cex=0.8, pch=20, main="Volcano plot M_LPS v M_LA", xlim=c(-7, 7)))
## Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, adj.P.Val<.05 ), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="violetred2"))
with(subset(res, abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="orange"))
with(subset(res, adj.P.Val<.05 & abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="turquoise3"))
##dev.copy(png, "VolcanoPlot M_LPS v M_LA", width=700, height=700)
##dev.off();
M_LPS v M_LP
res <- read.csv("topTab_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.csv", header=TRUE)
head(res)
with(res, plot(logFC, -log10(P.Value), cex=0.8, pch=20, main="Volcano plot M_LPS v M_LP", xlim=c(-7, 7)))
## Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, adj.P.Val<.05 ), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="violetred2"))
with(subset(res, abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="orange"))
with(subset(res, adj.P.Val<.05 & abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8, pch=20, col="turquoise3"))
##dev.copy(png, "VolcanoPlot M_LPS v M_LP", width=700, height=700)
##dev.off();
res <- read.csv("topTab_M_LPSvM_LP_CDS_limmabatchcorrection_20171120rev.csv",
header=TRUE)
head(res)
## ID logFC AveExpr t P.Value adj.P.Val B
## 1 ENSG00000121966 4.167 5.148 13.64 9.189e-15 1.182e-10 23.50
## 2 ENSG00000182568 2.467 3.922 13.00 3.316e-14 2.132e-10 22.23
## 3 ENSG00000176595 4.467 1.729 11.43 9.698e-13 4.157e-09 18.60
## 4 ENSG00000198814 2.443 6.435 11.18 1.695e-12 4.986e-09 18.53
## 5 ENSG00000112394 2.502 5.836 10.96 2.791e-12 5.982e-09 18.04
## 6 ENSG00000109321 4.500 1.516 11.12 1.939e-12 4.986e-09 17.86
with(res, plot(logFC, -log10(P.Value), cex=0.8,
pch=20, main="Volcano plot M_LPS v M_LP", xlim=c(-7, 7)))
## Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, adj.P.Val<.05 ), points(logFC, -log10(P.Value), cex=0.8,
pch=20, col="violetred2"))
with(subset(res, abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8,
pch=20, col="orange"))
with(subset(res, adj.P.Val<.05 & abs(logFC)>1),
points(logFC, -log10(P.Value),
cex=0.8, pch=20, col="turquoise3"))
## png
## 3
## png
## 2
## Warning in file(file, "rt"): cannot open file
## 'topTab_M_LPSvM_LA_CDS_limmabatchcorrection_20171120.txt': No such file or
## directory
## Error in file(file, "rt"): cannot open the connection
## ID logFC AveExpr t P.Value adj.P.Val B
## 1 ENSG00000121966 4.167 5.148 13.64 9.189e-15 1.182e-10 23.50
## 2 ENSG00000182568 2.467 3.922 13.00 3.316e-14 2.132e-10 22.23
## 3 ENSG00000176595 4.467 1.729 11.43 9.698e-13 4.157e-09 18.60
## 4 ENSG00000198814 2.443 6.435 11.18 1.695e-12 4.986e-09 18.53
## 5 ENSG00000112394 2.502 5.836 10.96 2.791e-12 5.982e-09 18.04
## 6 ENSG00000109321 4.500 1.516 11.12 1.939e-12 4.986e-09 17.86
with(res, plot(logFC, -log10(P.Value), cex=0.8, pch=20,
main="Volcano plot M_LPS v M_LA", xlim=c(-7, 7)))
## Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, adj.P.Val<.05 ), points(logFC, -log10(P.Value), cex=0.8,
pch=20, col="violetred2"))
with(subset(res, abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8,
pch=20, col="orange"))
with(subset(res, adj.P.Val<.05 & abs(logFC)>1),
points(logFC, -log10(P.Value),
cex=0.8, pch=20, col="turquoise3"))
## png
## 3
## png
## 2
res <- read.table("GM_LPS v GM_LA/topTab_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171023.txt",
header=TRUE)
## Warning in file(file, "rt"): cannot open file 'GM_LPS v GM_LA/
## topTab_GM_LPSvGM_LA_CDS_limmabatchcorrection_20171023.txt': No such file or
## directory
## Error in file(file, "rt"): cannot open the connection
## ID logFC AveExpr t P.Value adj.P.Val B
## 1 ENSG00000121966 4.167 5.148 13.64 9.189e-15 1.182e-10 23.50
## 2 ENSG00000182568 2.467 3.922 13.00 3.316e-14 2.132e-10 22.23
## 3 ENSG00000176595 4.467 1.729 11.43 9.698e-13 4.157e-09 18.60
## 4 ENSG00000198814 2.443 6.435 11.18 1.695e-12 4.986e-09 18.53
## 5 ENSG00000112394 2.502 5.836 10.96 2.791e-12 5.982e-09 18.04
## 6 ENSG00000109321 4.500 1.516 11.12 1.939e-12 4.986e-09 17.86
with(res, plot(logFC, -log10(P.Value), cex=0.8, pch=20,
main="Volcano plot GM_LPS_4 v GM_LA_4", xlim=c(-7, 7)))
## Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, adj.P.Val<.05 ), points(logFC, -log10(P.Value), cex=0.8,
pch=20, col="violetred2"))
with(subset(res, abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8,
pch=20, col="orange"))
with(subset(res, adj.P.Val<.05 & abs(logFC)>1),
points(logFC, -log10(P.Value), cex=0.8, pch=20, col="turquoise3"))
## png
## 3
## png
## 2
## Warning in file(file, "rt"): cannot open file
## 'topTab_GM_LPSvGM_LP_CDS_limmabatchcorrection_20171023.txt': No such file or
## directory
## Error in file(file, "rt"): cannot open the connection
## ID logFC AveExpr t P.Value adj.P.Val B
## 1 ENSG00000121966 4.167 5.148 13.64 9.189e-15 1.182e-10 23.50
## 2 ENSG00000182568 2.467 3.922 13.00 3.316e-14 2.132e-10 22.23
## 3 ENSG00000176595 4.467 1.729 11.43 9.698e-13 4.157e-09 18.60
## 4 ENSG00000198814 2.443 6.435 11.18 1.695e-12 4.986e-09 18.53
## 5 ENSG00000112394 2.502 5.836 10.96 2.791e-12 5.982e-09 18.04
## 6 ENSG00000109321 4.500 1.516 11.12 1.939e-12 4.986e-09 17.86
with(res, plot(logFC, -log10(P.Value), cex=0.8, pch=20,
main="Volcano plot GM_LPS v GM_LP", xlim=c(-7, 7)))
## Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, adj.P.Val<.05 ), points(logFC, -log10(P.Value), cex=0.8,
pch=20, col="violetred2"))
with(subset(res, abs(logFC)>1), points(logFC, -log10(P.Value), cex=0.8,
pch=20, col="orange"))
with(subset(res, adj.P.Val<.05 & abs(logFC)>1),
points(logFC, -log10(P.Value), cex=0.8, pch=20, col="turquoise3"))
## png
## 3
## png
## 2