1 Introduction

Vince is looking to plot the various intersections/unions of some existing RNASeq data of mice infected with Pseudomonas aeruginosa. The table in question is rather complex; comprising samples of: mouse bladder data, LB with various additions, etc etc. Thus a great majority of what follows is my attempt to simplify the table so that it is possible to follow; then I load it into one of R’s venn libraries and print some images.

2 Step 1, look at table

In order to load this most easily, I am changing the following:

  • Remove top rows
  • shorten names to a combination of condition+column: Eg:
    • ID (gene id)
    • hupbst_basemean – human urine vs. pbs-t
    • hupbst_lfc
    • hupbst_p
    • hupbst_padj
    • mupbst_lfc – mouse urine vs. pbs-t
    • mupbst_p
    • mupbst_padj
    • mbpbst_lfc – mouse bladder vs. pbs-t
    • mbpbst_p
    • mbpbst_padj
    • lbu_ID – LB with LB urine
    • lbu_basemean
    • lbu_lfc
    • lbu_p
    • lbu_adjp
  • Remove blank columns

Save it as shrunken.csv

3 Step 2, load it up

Unfortunately, I have no guarantee that ID’s order and lbu_ID’s order are the same, so split this into two sets and merge them back.

## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## [1] 161  16
## [1] 160  16
## [1] 275  16
## [1] 274  16
## [1] 321  16
## [1] 321  16
## [1] 611  16
## [1] 611  16
## [1] 435  16
## [1] 434  16
## [1] 432  16
## [1] 432  16
## [1] 192  16
## [1] 184  16
## [1] 543  16
## [1] 540  16

4 Make some venns…

## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Now make sure that the sets in the ups are not down downs and vice versa
unique_hupbst_up <- hupbst_pup_genes[hupbst_pup_genes %nin% hupbst_pdown_genes]
unique_hupbst_up <- unique_hupbst_up[unique_hupbst_up %nin% mupbst_pdown_genes]
unique_hupbst_up <- unique_hupbst_up[unique_hupbst_up %nin% mbpbst_pdown_genes]
unique_hupbst_up <- unique_hupbst_up[unique_hupbst_up %nin% lbu_pdown_genes]

unique_mupbst_up <- mupbst_pup_genes[mupbst_pup_genes %nin% hupbst_pdown_genes]
unique_mupbst_up <- unique_mupbst_up[unique_mupbst_up %nin% mupbst_pdown_genes]
unique_mupbst_up <- unique_mupbst_up[unique_mupbst_up %nin% mbpbst_pdown_genes]
unique_mupbst_up <- unique_mupbst_up[unique_mupbst_up %nin% lbu_pdown_genes]

unique_mbpbst_up <- mbpbst_pup_genes[mbpbst_pup_genes %nin% hupbst_pdown_genes]
unique_mbpbst_up <- unique_mbpbst_up[unique_mbpbst_up %nin% mupbst_pdown_genes]
unique_mbpbst_up <- unique_mbpbst_up[unique_mbpbst_up %nin% mbpbst_pdown_genes]
unique_mbpbst_up <- unique_mbpbst_up[unique_mbpbst_up %nin% lbu_pdown_genes]

unique_lbu_up <- lbu_pup_genes[lbu_pup_genes %nin% hupbst_pdown_genes]
unique_lbu_up <- unique_lbu_up[unique_lbu_up %nin% mupbst_pdown_genes]
unique_lbu_up <- unique_lbu_up[unique_lbu_up %nin% mbpbst_pdown_genes]
unique_lbu_up <- unique_lbu_up[unique_lbu_up %nin% lbu_pdown_genes]

unique_hupbst_down <- hupbst_pdown_genes[hupbst_pdown_genes %nin% hupbst_pup_genes]
unique_hupbst_down <- unique_hupbst_down[unique_hupbst_down %nin% mupbst_pup_genes]
unique_hupbst_down <- unique_hupbst_down[unique_hupbst_down %nin% mbpbst_pup_genes]
unique_hupbst_down <- unique_hupbst_down[unique_hupbst_down %nin% lbu_pup_genes]

unique_mupbst_down <- mupbst_pdown_genes[mupbst_pdown_genes %nin% hupbst_pup_genes]
unique_mupbst_down <- unique_mupbst_down[unique_mupbst_down %nin% mupbst_pup_genes]
unique_mupbst_down <- unique_mupbst_down[unique_mupbst_down %nin% mbpbst_pup_genes]
unique_mupbst_down <- unique_mupbst_down[unique_mupbst_down %nin% lbu_pup_genes]

unique_mbpbst_down <- mbpbst_pdown_genes[mbpbst_pdown_genes %nin% hupbst_pup_genes]
unique_mbpbst_down <- unique_mbpbst_down[unique_mbpbst_down %nin% mupbst_pup_genes]
unique_mbpbst_down <- unique_mbpbst_down[unique_mbpbst_down %nin% mbpbst_pup_genes]
unique_mbpbst_down <- unique_mbpbst_down[unique_mbpbst_down %nin% lbu_pup_genes]

unique_lbu_down <- lbu_pdown_genes[lbu_pdown_genes %nin% hupbst_pup_genes]
unique_lbu_down <- unique_lbu_down[unique_lbu_down %nin% mupbst_pup_genes]
unique_lbu_down <- unique_lbu_down[unique_lbu_down %nin% mbpbst_pup_genes]
unique_lbu_down <- unique_lbu_down[unique_lbu_down %nin% lbu_pup_genes]


## a is hupbst
## b is mupbst
## c is mbpbst
## d is lbu
library(Vennerable)
input <- list(
    "human_urine" = unique_hupbst_up,
    "mouse_urine" = unique_mupbst_up,
    "mouse_bladder" = unique_mbpbst_up,
    "lb_urea" = unique_lbu_up)
venn_fun <- Venn(input)
start <- Weights(venn_fun)
venn_fun
## A Venn object on 4 sets named
## human_urine,mouse_urine,mouse_bladder,lb_urea 
## 0000 1000 0100 1100 0010 1010 0110 1110 0001 1001 0101 1101 0011 1011 0111 
##    0   51  131   43  223    4   60   18   93    3   17   16   13    4   16 
## 1111 
##   12

## A Venn object on 4 sets named
## human_urine,mouse_urine,mouse_bladder,lb_urea 
## 0000 1000 0100 1100 0010 1010 0110 1110 0001 1001 0101 1101 0011 1011 0111 
##    0   51  131   43  223    4   60   18   93    3   17   16   13    4   16 
## 1111 
##   12

5 Vince’s numbers

In order to use these, I think the venneuler package will be the best, as it is able to work with intersection names and numbers rather than the sets of items.

                   down   up

A Human Urine only 13 51 B Mouse Urine only 150 131 C Bladder (mouse) 190 222 D Urea 210 93 A+B Human and mouse 59 43 A+C Human + Bladder 8 4 A+D Human+Urea 7 3 B+C Mouse + Bladder 101 59 B+D Mouse+Urea 73 17 C+D Urea+Bladder 14 13 A+B+C All but urea 44 18 A+B+D All but bladder 90 16 A+C+D All but mouse 1 4 B+C+D All but human 26 16 A+B+C+D All 38 12

## Loading required package: rJava

