The samples contain 5 mutants and a wild-type (WT) control grown in several media and instilled in mice for 1 hour. This document uses the mouse raw reverse strand counts.
# https://haozhu233.github.io/kableExtra/awesome_table_in_html.html
#setwd("/Volumes/VTL_Lab_HQ/System/Volumes/Data/Users/VTL_Lab1/Documents/El-Husseini_Nour/rnaseq/ed")
library(kableExtra)
library(dplyr)
dt <- read.csv("ed_sample_table.csv")
dt %>%
filter(Media == "Mouse instilled") %>%
# filter(Media == "LB" | Media == "LB + 0.5 M urea") %>%
# filter(Media == "PBS-T") %>%
# filter(Media == "Mice urine") %>%
# filter(Strain == "WT") %>%
# filter(Media == "LB" | Media == "LB + 0.5 M urea") %>%
kbl() %>%
kable_classic(full_width = T, html_font = "Computer modern") | Sample | Strain | Media | Batch |
|---|---|---|---|
| SM043 | PA14 WT | Mouse instilled | 1 |
| SM044 | PA14 WT | Mouse instilled | 2 |
| SM045 | PA14 WT | Mouse instilled | 3 |
| SM046 | ∆eda | Mouse instilled | 1 |
| SM047 | ∆eda | Mouse instilled | 2 |
| SM048 | ∆eda | Mouse instilled | 3 |
| SM049 | ∆edd | Mouse instilled | 1 |
| SM050 | ∆edd | Mouse instilled | 2 |
| SM051 | ∆edd | Mouse instilled | 3 |
| SM052 | ∆gcd | Mouse instilled | 1 |
| SM053 | ∆gcd | Mouse instilled | 2 |
| SM054 | ∆gcd | Mouse instilled | 3 |
| SM055 | ∆pgl | Mouse instilled | 1 |
| SM056 | ∆pgl | Mouse instilled | 2 |
| SM057 | ∆pgl | Mouse instilled | 3 |
| SM058 | ∆zwf | Mouse instilled | 1 |
| SM059 | ∆zwf | Mouse instilled | 2 |
| SM060 | ∆zwf | Mouse instilled | 3 |
library(DESeq2)
library(ggplot2)
library(dplyr)
metadata=read.csv("metadata_ed.csv", row.names = 1, header = TRUE)
# these are the upside down counts
# rawcounts=read.csv("htseq_unfiltered_ed.csv", row.names =1 )
#rawcounts=read.csv("htseq_SM_unfiltered_reverse_strand_count.csv", row.names =1 )
# mus counts
rawcounts <- read.csv("htseq_unfiltered_reverse_strand_count_mus.csv", header = TRUE)
rawcounts <- filter(rawcounts,rowSums(rawcounts[,2:19])> 0, na.rm = TRUE)
# append b to batch column
metadata[["batch"]] <- paste0("b",metadata[["batch"]])
# filter samples
### Mice instilled
#metadata <- filter(metadata, media != "LB" & media != "LB_0.5 M_urea")
#metadata <- filter(metadata, media == "PBS-T")
metadata <- filter(metadata, media == "mice_instilled")
#metadata <- filter(metadata, media == "mice_urine")
#metadata <- filter(metadata, strain == "WT")
# filter poor quality samples
metadata <- filter(metadata, !rownames(metadata) %in% c("SM040", "SM048", "SM053"))
samples <- rownames(metadata)
rawcounts <- select(rawcounts, all_of(samples))
# check that the samples are in order
all(rownames(metadata) == colnames(rawcounts))## [1] TRUE
# filter 0's
# check that the samples are in order
dds=DESeqDataSetFromMatrix(countData = rawcounts, colData =metadata, design = ~ condition + batch) # boxplot of average raw counts per sample
boxplot(log10(assays(dds)[["counts"]]), las=2, ylab="log10(assays(dds)[[counts]])")bp <- barplot(colSums(rawcounts),
# ylim = c(0,4000000),
las=2,)
mtext("Top axis", side=2, line=4)library(geomtextpath)
dat <- stack(rawcounts)
ggplot(dat, aes(x = log2(values+1), fill=ind, label = ind )) +
geom_density(alpha = 0.5, position = "identity") +
ylab("Density") +
geom_textdensity(hjust = "ymax", vjust = 0,
text_only = TRUE, text_smoothing = 20) +
theme(panel.grid = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
legend.position = "none",
legend.key = element_rect(fill = NA)) +
coord_cartesian(xlim = c(0, 20))x <- colSums(rawcounts)
y <- colSums(rawcounts != 0)
df <- data.frame(x,y)
df$ind <- rownames(df)
ggplot(data = df, aes(x = x, y = y, color = ind, label = ind)) + geom_point() +
geom_text(hjust=-0.1, vjust=-0.1) +
scale_x_continuous(limits = c(0,7e6), breaks = c(0, 1e6,2e6,3e6,4e6, 5e6, 6e6, 7e6), labels = (c(0,1,2,3,4,5,6, 7))) +
xlab("CPM") + ylab("Number of non-zero genes observed") +
theme(panel.grid = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
legend.position = "none",
legend.key = element_rect(fill = NA)) # variance stabilizing transformations (VST)
vsd <- vst(dds, blind=TRUE)library(ggplot2)
# PCA plot
pcaData <- plotPCA(vsd, intgroup=c("condition", "batch", "strain", "media"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
pca <- ggplot(pcaData, aes(PC1, PC2, color=strain, shape=media)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
scale_colour_brewer(palette = "Dark2") +
#scale_shape_manual(values=c(16,17,15,18,7,8,3,10)) +
geom_rug() +
# ylim(-40, 40) +
# xlim(-50,60) +
coord_fixed() +
labs(color = "Strain", shape = "Strain") +
theme(panel.grid = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(fill = NA),
legend.key = element_rect(fill = NA))
pcalibrary("RColorBrewer")
library("pheatmap")
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix(sampleDists)
df <- as.data.frame(colData(dds)[,c("condition","batch")])
anno_col <- select(metadata, all_of(c("strain")))
pheatmap(sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
labels_row = colnames(rawcounts),
labels_col = colnames(rawcounts),
angle_col = 45,
show_colnames = TRUE)