Vince is looking to plot the various intersections/unions of some existing RNASeq data of mice infected with Pseudomonas aeruginosa. The table in question is rather complex; comprising samples of: mouse bladder data, LB with various additions, etc etc. Thus a great majority of what follows is my attempt to simplify the table so that it is possible to follow; then I load it into one of R’s venn libraries and print some images.
In order to load this most easily, I am changing the following:
Save it as shrunken.csv
Unfortunately, I have no guarantee that ID’s order and lbu_ID’s order are the same, so split this into two sets and merge them back.
all_together <- read.csv("external_data/shrunken.csv")
## 3 rows are missing data... drop them.
##all_together <- all_together[complete.cases(all_together), ]
rownames(all_together) <- make.names(all_together[["ID"]], unique=TRUE)
all_together$lbu_lfc <- as.numeric(all_together$lbu_lfc)
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
all_together[is.na(all_together)] <- 0
## Human urine, PBS-T up/down (hupbst_stuff)
## The goal is 4 fold, yesno? So 2 log fold changes.
## First pull the up
hupbst_up <- all_together[all_together[["hupbst_lfc"]] >= 2, ]
## Vince's sheet shows 161 here
dim(hupbst_up)
## [1] 161 16
## Take only the low adjusted p-values
hupbst_pup <- hupbst_up[hupbst_up[["hupbst_padj"]] <= 0.05, ]
## How many?
dim(hupbst_pup)
## [1] 160 16
## Get the names of these genes
hupbst_up_genes <- rownames(hupbst_up)
hupbst_pup_genes <- rownames(hupbst_pup)
## Repeat going down, <= 4 fold
hupbst_down <- all_together[all_together[["hupbst_lfc"]] <= -2, ]
hupbst_pdown <- hupbst_down[hupbst_down[["hupbst_padj"]] <= 0.05, ]
dim(hupbst_down)
## [1] 275 16
## [1] 274 16
hupbst_pdown_genes <- rownames(hupbst_pdown)
## now repeat the above for Mouse urine, PBS-T (mupbst_stuff)
mupbst_up <- all_together[all_together[["mupbst_lfc"]] >= 2, ]
## Take only the low adjusted p-values
mupbst_pup <- mupbst_up[mupbst_up[["mupbst_padj"]] <= 0.05, ]
## How many?
dim(mupbst_up)
## [1] 321 16
## [1] 321 16
## Get the names of these genes
mupbst_pup_genes <- rownames(mupbst_pup)
## Repeat going down, <= 4 fold
mupbst_down <- all_together[all_together[["mupbst_lfc"]] <= -2, ]
mupbst_pdown <- mupbst_down[mupbst_down[["mupbst_padj"]] <= 0.05, ]
dim(mupbst_down)
## [1] 611 16
## [1] 611 16
mupbst_pdown_genes <- rownames(mupbst_pdown)
## Repeat for mouse bladder (mbpbst_stuff)
mbpbst_up <- all_together[all_together[["mbpbst_lfc"]] >= 2, ]
## Take only the low adjusted p-values
mbpbst_pup <- mbpbst_up[mbpbst_up[["mbpbst_padj"]] <= 0.05, ]
## How many?
dim(mbpbst_up)
## [1] 435 16
## [1] 434 16
## Get the names of these genes
mbpbst_pup_genes <- rownames(mbpbst_pup)
## Repeat going down, <= 4 fold
mbpbst_down <- all_together[all_together[["mbpbst_lfc"]] <= -2, ]
mbpbst_pdown <- mbpbst_down[mbpbst_down[["mbpbst_padj"]] <= 0.05, ]
dim(mbpbst_down)
## [1] 432 16
## [1] 432 16
mbpbst_pdown_genes <- rownames(mbpbst_pdown)
## And last, LB + urea (lbu_stuff)
lbu_up <- all_together[all_together[["lbu_lfc"]] >= 2, ]
## Take only the low adjusted p-values
## oops, I made padj to adjp...
lbu_pup <- lbu_up[lbu_up[["lbu_adjp"]] <= 0.05, ]
## How many?
dim(lbu_up)
## [1] 192 16
## [1] 184 16
## Get the names of these genes
lbu_pup_genes <- rownames(lbu_pup)
## Repeat going down, <= 4 fold
lbu_down <- all_together[as.numeric(all_together[["lbu_lfc"]]) <= -2, ]
lbu_pdown <- lbu_down[as.numeric(lbu_down[["lbu_adjp"]]) <= 0.05, ]
dim(lbu_down)
## [1] 543 16
## [1] 540 16
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Now make sure that the sets in the ups are not down downs and vice versa
unique_hupbst_up <- hupbst_pup_genes[hupbst_pup_genes %nin% hupbst_pdown_genes]
unique_hupbst_up <- unique_hupbst_up[unique_hupbst_up %nin% mupbst_pdown_genes]
unique_hupbst_up <- unique_hupbst_up[unique_hupbst_up %nin% mbpbst_pdown_genes]
unique_hupbst_up <- unique_hupbst_up[unique_hupbst_up %nin% lbu_pdown_genes]
unique_mupbst_up <- mupbst_pup_genes[mupbst_pup_genes %nin% hupbst_pdown_genes]
unique_mupbst_up <- unique_mupbst_up[unique_mupbst_up %nin% mupbst_pdown_genes]
unique_mupbst_up <- unique_mupbst_up[unique_mupbst_up %nin% mbpbst_pdown_genes]
unique_mupbst_up <- unique_mupbst_up[unique_mupbst_up %nin% lbu_pdown_genes]
unique_mbpbst_up <- mbpbst_pup_genes[mbpbst_pup_genes %nin% hupbst_pdown_genes]
unique_mbpbst_up <- unique_mbpbst_up[unique_mbpbst_up %nin% mupbst_pdown_genes]
unique_mbpbst_up <- unique_mbpbst_up[unique_mbpbst_up %nin% mbpbst_pdown_genes]
unique_mbpbst_up <- unique_mbpbst_up[unique_mbpbst_up %nin% lbu_pdown_genes]
unique_lbu_up <- lbu_pup_genes[lbu_pup_genes %nin% hupbst_pdown_genes]
unique_lbu_up <- unique_lbu_up[unique_lbu_up %nin% mupbst_pdown_genes]
unique_lbu_up <- unique_lbu_up[unique_lbu_up %nin% mbpbst_pdown_genes]
unique_lbu_up <- unique_lbu_up[unique_lbu_up %nin% lbu_pdown_genes]
unique_hupbst_down <- hupbst_pdown_genes[hupbst_pdown_genes %nin% hupbst_pup_genes]
unique_hupbst_down <- unique_hupbst_down[unique_hupbst_down %nin% mupbst_pup_genes]
unique_hupbst_down <- unique_hupbst_down[unique_hupbst_down %nin% mbpbst_pup_genes]
unique_hupbst_down <- unique_hupbst_down[unique_hupbst_down %nin% lbu_pup_genes]
unique_mupbst_down <- mupbst_pdown_genes[mupbst_pdown_genes %nin% hupbst_pup_genes]
unique_mupbst_down <- unique_mupbst_down[unique_mupbst_down %nin% mupbst_pup_genes]
unique_mupbst_down <- unique_mupbst_down[unique_mupbst_down %nin% mbpbst_pup_genes]
unique_mupbst_down <- unique_mupbst_down[unique_mupbst_down %nin% lbu_pup_genes]
unique_mbpbst_down <- mbpbst_pdown_genes[mbpbst_pdown_genes %nin% hupbst_pup_genes]
unique_mbpbst_down <- unique_mbpbst_down[unique_mbpbst_down %nin% mupbst_pup_genes]
unique_mbpbst_down <- unique_mbpbst_down[unique_mbpbst_down %nin% mbpbst_pup_genes]
unique_mbpbst_down <- unique_mbpbst_down[unique_mbpbst_down %nin% lbu_pup_genes]
unique_lbu_down <- lbu_pdown_genes[lbu_pdown_genes %nin% hupbst_pup_genes]
unique_lbu_down <- unique_lbu_down[unique_lbu_down %nin% mupbst_pup_genes]
unique_lbu_down <- unique_lbu_down[unique_lbu_down %nin% mbpbst_pup_genes]
unique_lbu_down <- unique_lbu_down[unique_lbu_down %nin% lbu_pup_genes]
## a is hupbst
## b is mupbst
## c is mbpbst
## d is lbu
library(Vennerable)
input <- list(
"human_urine" = unique_hupbst_up,
"mouse_urine" = unique_mupbst_up,
"mouse_bladder" = unique_mbpbst_up,
"lb_urea" = unique_lbu_up)
venn_fun <- Venn(input)
start <- Weights(venn_fun)
venn_fun
## A Venn object on 4 sets named
## human_urine,mouse_urine,mouse_bladder,lb_urea
## 0000 1000 0100 1100 0010 1010 0110 1110 0001 1001 0101 1101 0011 1011 0111
## 0 51 131 43 223 4 60 18 93 3 17 16 13 4 16
## 1111
## 12
pinput <- list(
"human_urine" = unique_hupbst_down,
"mouse_urine" = unique_mupbst_down,
"mouse_bladder" = unique_mbpbst_down,
"lb_urea" = unique_lbu_down)
venn_fun <- Venn(input)
venn_fun
## A Venn object on 4 sets named
## human_urine,mouse_urine,mouse_bladder,lb_urea
## 0000 1000 0100 1100 0010 1010 0110 1110 0001 1001 0101 1101 0011 1011 0111
## 0 51 131 43 223 4 60 18 93 3 17 16 13 4 16
## 1111
## 12
In order to use these, I think the venneuler package will be the best, as it is able to work with intersection names and numbers rather than the sets of items.
down up
A Human Urine only 13 51 B Mouse Urine only 150 131 C Bladder (mouse) 190 222 D Urea 210 93 A+B Human and mouse 59 43 A+C Human + Bladder 8 4 A+D Human+Urea 7 3 B+C Mouse + Bladder 101 59 B+D Mouse+Urea 73 17 C+D Urea+Bladder 14 13 A+B+C All but urea 44 18 A+B+D All but bladder 90 16 A+C+D All but mouse 1 4 B+C+D All but human 26 16 A+B+C+D All 38 12
## Loading required package: rJava
down_sets <- c(
"A" = 13,
"B" = 150,
"C" = 101,
"D" = 210,
"A&B" = 59,
"A&C" = 8,
"A&D" = 7,
"B&C" = 101,
"B&D" = 73,
"C&D" = 14,
"A&B&C" = 44,
"A&B&D" = 90,
"A&C&D" = 1,
"B&C&D" = 26,
"A&B&C&D" = 38)
down_vince <- venneuler(down_sets)
plot(down_vince)
up_sets <- c(
"A" = 51,
"B" = 131,
"C" = 222,
"D" = 93,
"A&B" = 43,
"A&C" = 4,
"A&D" = 3,
"B&C" = 59,
"B&D" = 17,
"C&D" = 13,
"A&B&C" = 18,
"A&B&D" = 16,
"A&C&D" = 4,
"B&C&D" = 16,
"A&B&C&D" = 12)
up_vince = venneuler(down_sets)
plot(up_vince)