Load essentiality data
Note I edited the headers of the tables before loading them. They now look like: “Orf t3_k t3_n t3_r t3_s t3_zbar”
t0 = read.table("data/essentiality/old/t0_essentiality.txt", skip=7)
colnames(t0) = c("Orf","t0_k","t0_n","t0_r","t0_s","t0_zbar")
t1 = read.table("data/essentiality/old/t1_essentiality.txt", skip=7)
colnames(t1) = c("Orf","t1_k","t1_n","t1_r","t1_s","t1_zbar")
t2 = read.table("data/essentiality/old/t2_essentiality.txt")
colnames(t2) = c("Orf","t2_k","t2_n","t2_r","t2_s","t2_zbar")
t3 = read.table("data/essentiality/old/t3_essentiality.txt")
colnames(t3) = c("Orf","t3_k","t3_n","t3_r","t3_s","t3_zbar")
essentiality_data = merge(t0, t1, by.x="Orf", by.y="Orf")
essentiality_data = merge(essentiality_data, t2, by.x="Orf", by.y="Orf")
essentiality_data = merge(essentiality_data, t3, by.x="Orf", by.y="Orf")
XLConnect::createSheet(workbook, name="essentiality_all")
XLConnect::writeWorksheet(workbook, essentiality_data, sheet="essentiality_all")
XLConnect::saveWorkbook(workbook)
Yoann asked about a venn diagram of this data. Here are 3 methods, one including a size-weighted which is completely silly because most of the hits are in the intersection of all 3.
t0_essential = t0
t1_essential = t1
t2_essential = t2
t0_essential$answer = ifelse(t0$t0_zbar > 0.99, 1, 0)
t1_essential$answer = ifelse(t1$t1_zbar > 0.99, 1, 0)
t2_essential$answer = ifelse(t2$t2_zbar > 0.99, 1, 0)
t0_essential = t0_essential[, c(1, 7)]
t1_essential = t1_essential[, c(1, 7)]
t2_essential = t2_essential[, c(1, 7)]
rownames(t0_essential) = t0_essential$Orf
##t0_essential = t0_essential[, -1]
rownames(t1_essential) = t1_essential$Orf
##t1_essential = t1_essential[, -1]
rownames(t2_essential) = t2_essential$Orf
##t2_essential = t2_essential[, -1]
essential_results = merge(t0_essential, t1_essential, by='row.names')
rownames(essential_results) = essential_results$Row.names
essential_results = essential_results[,-1]
essential_results = merge(essential_results, t2_essential, by='row.names')
rownames(essential_results) = essential_results$Row.names
essential_results = essential_results[, -1]
essential_results <- essential_results[, c("answer.x", "answer.y", "answer")]
## This is using limma's vennDiagram function
colnames(essential_results) = c("t0", "t1", "t2")
essential_counts = limma::vennCounts(essential_results)
limma::vennDiagram(essential_counts,
counts.col = c("blue", "red", "green"))

## I can use that to easily make the variables for the VennDiagram library...
i1 = 1
i2 = 8
i3 = 61
i12 = 1
i13 = 4
i23 = 85
i123 = 230
area1 = i1 + i12 + i13 + i123
area2 = i2 + i12 + i23 + i123
area3 = i3 + i13 + i23 + i123
n123 = i123
n12 = i12 + i123
n13 = i13 + i123
n23 = i23 + i123
test = Vennerable::Venn(SetNames = c("t0","t1","t2"), Weight =
c('100' = i1, '010' = i2, '001' = i3, ## The loners
'110' = i12, '011' = i23, '101' = i13, ## The doubles
'111' = i123))
Vennerable::plot(test, doWeights = TRUE, type = "circles")

Vennerable::plot(test, doWeights = FALSE, type = "circles")
draw.triple.venn(area1, area2, area3, n12, n23, n13, n123,
category = rep("", 3), rotation = 1, reverse = FALSE, euler.d = TRUE,
scaled = TRUE, lwd = rep(2, 3), lty = rep("solid", 3),
col = rep("black", 3), fill = NULL, alpha = rep(0.5, 3),
label.col = rep("black", 7), cex = rep(1, 7), fontface = rep("plain", 7),
fontfamily = rep("serif", 7), cat.pos = c(-40, 40, 180),
cat.dist = c(0.05, 0.05, 0.025), cat.col = rep("black", 3),
cat.cex = rep(1, 3), cat.fontface = rep("plain", 3),
cat.fontfamily = rep("serif", 3),
cat.just = list(c(0.5, 1), c(0.5, 1), c(0.5, 0)), cat.default.pos = "outer",
cat.prompts = FALSE, rotation.degree = 0, rotation.centre = c(0.5, 0.5),
ind = TRUE, sep.dist = 0.05, offset = 0)

## (polygon[GRID.polygon.702], polygon[GRID.polygon.703], polygon[GRID.polygon.704], polygon[GRID.polygon.705], polygon[GRID.polygon.706], polygon[GRID.polygon.707], text[GRID.text.708], text[GRID.text.709], text[GRID.text.710], text[GRID.text.711], text[GRID.text.712], text[GRID.text.713], text[GRID.text.714], text[GRID.text.715], text[GRID.text.716], text[GRID.text.717])
Repeat for the Alabama and NZ131 strains.
Add annotation information for Alabama and NZ131 into the excel workbook
Run essentiality and add the essentiality output to the excel workbook…
I created an alabama directory inside the essentiality directory. It took me a few tries to get it correct, and I had to make some small edits to the perl script tnseq_TAs to accomodate the new strains, but I ran the perlscript in the same fashion. (Caveat: The perl script needs to be able to cross reference the name of the fasta sequence against the bam sequence name. This tripped me up terribly.
Upon completion, I wrote a tiny shell script to run the python essentiality package for each output file.
cd essentiality/alabama/
./README.sh
## This prints out the appropriate command lines for the TNseq_TAs.pl and gumbelMH.py commands.
That completed in some hours, providing the raw outputs from essentiality, which I again edited slightly to include the timepoint values. Let us load them into the excel spreadsheet.
alabama_t0 = read.table("data/essentiality/old/Alab49_t0_essentiality.out", skip=7, header=TRUE)
alabama_t1 = read.table("data/essentiality/old/Alab49_t1_essentiality.out", skip=7, header=TRUE)
alabama_t2 = read.table("data/essentiality/old/Alab49_t2_essentiality.out", skip=7, header=TRUE)
alabama_t3 = read.table("data/essentiality/old/Alab49_t3_essentiality.out", skip=7, header=TRUE)
nz_t0 = read.table("data/essentiality/old/NZ131_t0_essentiality.out", skip=7, header=TRUE)
nz_t1 = read.table("data/essentiality/old/NZ131_t1_essentiality.out", skip=7, header=TRUE)
nz_t2 = read.table("data/essentiality/old/NZ131_t2_essentiality.out", skip=7, header=TRUE)
nz_t3 = read.table("data/essentiality/old/NZ131_t3_essentiality.out", skip=7, header=TRUE)
alabama_essentials = merge(alabama_t0, alabama_t1, by.x="Orf", by.y="Orf")
alabama_essentials = merge(alabama_essentials, alabama_t2, by.x="Orf", by.y="Orf")
alabama_essentials = merge(alabama_essentials, alabama_t3, by.x="Orf", by.y="Orf")
nz_essentials = merge(nz_t0, nz_t1, by.x="Orf", by.y="Orf")
nz_essentials = merge(nz_essentials, nz_t2, by.x="Orf", by.y="Orf")
nz_essentials = merge(nz_essentials, nz_t3, by.x="Orf", by.y="Orf")
head(nz_essentials)
## Orf t0_k t0_n t0_r t0_s t0_zbar t1_k t1_n t1_r t1_s t1_zbar t2_k
## 1 Spy49_0001 1 105 56 657 1.000 14 105 13 150 0.001 3
## 2 Spy49_0002 2 99 97 1100 1.000 9 99 28 311 1.000 1
## 3 Spy49_0003 3 15 5 73 0.000 12 15 1 2 0.000 12
## 4 Spy49_0004 21 80 11 97 0.001 65 80 2 9 0.000 54
## 5 Spy49_0005 1 56 43 414 0.999 12 56 14 161 0.034 2
## 6 Spy49_0006 74 256 14 179 0.001 191 256 4 42 0.000 111
## t2_n t2_r t2_s t2_zbar t3_k t3_n t3_r t3_s t3_zbar
## 1 105 39 537 0.998 4 105 39 537 0.102
## 2 99 78 910 1.000 6 99 24 255 0.002
## 3 15 1 2 0.000 4 15 6 51 0.001
## 4 80 2 9 0.000 28 80 11 97 0.001
## 5 56 28 240 0.189 5 56 22 252 0.003
## 6 256 10 141 0.000 61 256 21 311 0.024
XLConnect::createSheet(workbook, name="alabama_essentiality")
XLConnect::writeWorksheet(workbook, alabama_essentials, sheet="alabama_essentiality")
XLConnect::saveWorkbook(workbook)
XLConnect::createSheet(workbook, name="nz131_essentiality")
XLConnect::writeWorksheet(workbook, nz_essentials, sheet="nz131_essentiality")
XLConnect::saveWorkbook(workbook)
---
title: "2015: The first TNSeq of S.pyogenes"
author: "atb abelew@gmail.com"
date: "`r Sys.Date()`"
output:
 html_document:
  code_download: true
  code_folding: show
  fig_caption: true
  fig_height: 7
  fig_width: 7
  highlight: default
  keep_md: false
  mode: selfcontained
  number_sections: true
  self_contained: true
  theme: readable
  toc: true
  toc_float:
    collapsed: false
    smooth_scroll: false
---

<style>
  body .main-container {
    max-width: 1600px;
  }
</style>

```{r options, include=FALSE}
if (!isTRUE(get0("skip_load"))) {
  library(hpgltools)
  tt <- devtools::load_all("~/hpgltools")
  knitr::opts_knit$set(progress=TRUE,
                       verbose=TRUE,
                       width=90,
                       echo=TRUE)
  knitr::opts_chunk$set(error=TRUE,
                        fig.width=8,
                        fig.height=8,
                        dpi=96)
  old_options <- options(digits=4,
                         stringsAsFactors=FALSE,
                         knitr.duplicate.label="allow")
  ggplot2::theme_set(ggplot2::theme_bw(base_size=10))
  ver <- "20171102"
  previous_file <- "index.Rmd"

  tmp <- try(sm(loadme(filename=paste0(gsub(pattern="\\.Rmd", replace="", x=previous_file), "-v", ver, ".rda.xz"))))
  rmd_file <- "essentiality.Rmd"
}
```

# Quick caveat:  This was one of my earliest analyses and it therefore is... bad.

I am therefore doing to bare minimum required to be able to once again glean
useful information from it.

```{r load, include=FALSE}
library(hpgltools)
please_install("VennDiagram")
load("RData")
workbook = XLConnect::loadWorkbook("excel/tnseq_workbook.xls", create=TRUE)
```

[last](cbcbSEQ.html),
<a href="#" onclick="history.go(-1)">go back</a>
[index](index.html), [next](essentiality.html)


# Essentiality

This is run with the following command line and prints out a table of z scores.
This requires a file TA_data.txt which in turn comes from a small perl script
which reads over the bam files and prints the # of insertions which fall exactly
in the correct orientation at each TA position in the genome.

```{r essentiality_running, engine='bash', eval=FALSE}
for dir in t0 t1 t2 t3; do
  perl tnseq_TAs.pl -i ../run3/libs/random_1mismatch/lib1_${dir}_sorted.bam 2>${dir}/tnseq_TAs.err 1>${dir}/tnseq_TAs.out
  python gumbelMH.py -f ${dir}/tnseq_TAs.out -m 1 -s 1000 2>${dir}/essentiality.err 1>${dir}/essentiality.out
done
```

Upon completion, I have 4 directories containing the essentiality.out files.
These are copied into [data/essentiality/](data/essentiality/).

# Load essentiality data

Note I edited the headers of the tables before loading them.  They now look like:
"Orf	t3_k	t3_n	t3_r	t3_s	t3_zbar"

```{r load_essentiality}
t0 = read.table("data/essentiality/old/t0_essentiality.txt", skip=7)
colnames(t0) = c("Orf","t0_k","t0_n","t0_r","t0_s","t0_zbar")
t1 = read.table("data/essentiality/old/t1_essentiality.txt", skip=7)
colnames(t1) = c("Orf","t1_k","t1_n","t1_r","t1_s","t1_zbar")
t2 = read.table("data/essentiality/old/t2_essentiality.txt")
colnames(t2) = c("Orf","t2_k","t2_n","t2_r","t2_s","t2_zbar")
t3 = read.table("data/essentiality/old/t3_essentiality.txt")
colnames(t3) = c("Orf","t3_k","t3_n","t3_r","t3_s","t3_zbar")
essentiality_data = merge(t0, t1, by.x="Orf", by.y="Orf")
essentiality_data = merge(essentiality_data, t2, by.x="Orf", by.y="Orf")
essentiality_data = merge(essentiality_data, t3, by.x="Orf", by.y="Orf")

XLConnect::createSheet(workbook, name="essentiality_all")
XLConnect::writeWorksheet(workbook, essentiality_data, sheet="essentiality_all")
XLConnect::saveWorkbook(workbook)
```

Yoann asked about a venn diagram of this data.  Here are 3 methods, one
including a size-weighted which is completely silly because most of the hits are
in the intersection of all 3.

```{r essentiality_venn}
t0_essential = t0
t1_essential = t1
t2_essential = t2
t0_essential$answer = ifelse(t0$t0_zbar > 0.99, 1, 0)
t1_essential$answer = ifelse(t1$t1_zbar > 0.99, 1, 0)
t2_essential$answer = ifelse(t2$t2_zbar > 0.99, 1, 0)
t0_essential = t0_essential[, c(1, 7)]
t1_essential = t1_essential[, c(1, 7)]
t2_essential = t2_essential[, c(1, 7)]
rownames(t0_essential) = t0_essential$Orf
##t0_essential = t0_essential[, -1]
rownames(t1_essential) = t1_essential$Orf
##t1_essential = t1_essential[, -1]
rownames(t2_essential) = t2_essential$Orf
##t2_essential = t2_essential[, -1]
essential_results = merge(t0_essential, t1_essential, by='row.names')
rownames(essential_results) = essential_results$Row.names
essential_results = essential_results[,-1]
essential_results = merge(essential_results, t2_essential, by='row.names')
rownames(essential_results) = essential_results$Row.names
essential_results = essential_results[, -1]
essential_results <- essential_results[, c("answer.x", "answer.y", "answer")]
## This is using limma's vennDiagram function
colnames(essential_results) = c("t0", "t1", "t2")
essential_counts = limma::vennCounts(essential_results)
limma::vennDiagram(essential_counts,
                   counts.col = c("blue", "red", "green"))
## I can use that to easily make the variables for the VennDiagram library...

i1 = 1
i2 = 8
i3 = 61
i12 = 1
i13 = 4
i23 = 85
i123 = 230

area1 = i1 + i12 + i13 + i123
area2 = i2 + i12 + i23 + i123
area3 = i3 + i13 + i23 + i123

n123 = i123
n12 = i12 + i123
n13 = i13 + i123
n23 = i23 + i123

test = Vennerable::Venn(SetNames = c("t0","t1","t2"), Weight =
    c('100' = i1, '010' = i2, '001' = i3, ## The loners
      '110' = i12, '011' = i23, '101' = i13,  ## The doubles
      '111' = i123))
Vennerable::plot(test, doWeights = TRUE, type = "circles")
Vennerable::plot(test, doWeights = FALSE, type = "circles")

draw.triple.venn(area1, area2, area3, n12, n23, n13, n123,
                 category = rep("", 3), rotation = 1, reverse = FALSE, euler.d = TRUE,
                 scaled = TRUE, lwd = rep(2, 3), lty = rep("solid", 3),
                 col = rep("black", 3), fill = NULL, alpha = rep(0.5, 3),
                 label.col = rep("black", 7), cex = rep(1, 7), fontface = rep("plain", 7),
                 fontfamily = rep("serif", 7), cat.pos = c(-40, 40, 180),
                 cat.dist = c(0.05, 0.05, 0.025), cat.col = rep("black", 3),
                 cat.cex = rep(1, 3), cat.fontface = rep("plain", 3),
                 cat.fontfamily = rep("serif", 3),
                 cat.just = list(c(0.5, 1), c(0.5, 1), c(0.5, 0)), cat.default.pos = "outer",
                 cat.prompts = FALSE, rotation.degree = 0, rotation.centre = c(0.5, 0.5),
                 ind = TRUE, sep.dist = 0.05, offset = 0)
```

# Repeat for the Alabama and NZ131 strains.

## Add annotation information for Alabama and NZ131 into the excel workbook

## Run essentiality and add the essentiality output to the excel workbook...

I created an alabama directory inside the essentiality directory.  It took me a few tries to get it correct, and I had to make some small edits to the perl script tnseq_TAs to accomodate the new strains, but I ran the perlscript in the same fashion.  (Caveat:  The perl script needs to be able to cross reference the name of the _fasta_ sequence against the bam sequence name.  This tripped me up terribly.

Upon completion, I wrote a tiny shell script to run the python essentiality package for each output file.

```{r alabama_essentiality, enging='bash', eval=FALSE}
cd essentiality/alabama/
./README.sh
## This prints out the appropriate command lines for the TNseq_TAs.pl and gumbelMH.py commands.
```

That completed in some hours, providing the raw outputs from essentiality, which I again edited slightly to include the timepoint values.  Let us load them into the excel spreadsheet.

```{r load_alabama_essentiality}
alabama_t0 = read.table("data/essentiality/old/Alab49_t0_essentiality.out", skip=7, header=TRUE)
alabama_t1 = read.table("data/essentiality/old/Alab49_t1_essentiality.out", skip=7, header=TRUE)
alabama_t2 = read.table("data/essentiality/old/Alab49_t2_essentiality.out", skip=7, header=TRUE)
alabama_t3 = read.table("data/essentiality/old/Alab49_t3_essentiality.out", skip=7, header=TRUE)
nz_t0 = read.table("data/essentiality/old/NZ131_t0_essentiality.out", skip=7, header=TRUE)
nz_t1 = read.table("data/essentiality/old/NZ131_t1_essentiality.out", skip=7, header=TRUE)
nz_t2 = read.table("data/essentiality/old/NZ131_t2_essentiality.out", skip=7, header=TRUE)
nz_t3 = read.table("data/essentiality/old/NZ131_t3_essentiality.out", skip=7, header=TRUE)
alabama_essentials = merge(alabama_t0, alabama_t1, by.x="Orf", by.y="Orf")
alabama_essentials = merge(alabama_essentials, alabama_t2, by.x="Orf", by.y="Orf")
alabama_essentials = merge(alabama_essentials, alabama_t3, by.x="Orf", by.y="Orf")
nz_essentials = merge(nz_t0, nz_t1, by.x="Orf", by.y="Orf")
nz_essentials = merge(nz_essentials, nz_t2, by.x="Orf", by.y="Orf")
nz_essentials = merge(nz_essentials, nz_t3, by.x="Orf", by.y="Orf")
head(nz_essentials)

XLConnect::createSheet(workbook, name="alabama_essentiality")
XLConnect::writeWorksheet(workbook, alabama_essentials, sheet="alabama_essentiality")
XLConnect::saveWorkbook(workbook)
XLConnect::createSheet(workbook, name="nz131_essentiality")
XLConnect::writeWorksheet(workbook, nz_essentials, sheet="nz131_essentiality")
XLConnect::saveWorkbook(workbook)
```

# Return

[last](cbcbSEQ.html),
<a href="#" onclick="history.go(-1)">go back</a>,
[index](index.html), [next](essentiality.html)

```{r run_date, results='asis', echo=FALSE}
email = "<a href='mailto:abelew@umd.edu'>Ashton Trey Belew</a>"
last_update = format(Sys.time(), "(<time>%Y-%m-%d</time>)")
cat(paste(email, last_update))
```

```{r save, include=FALSE}
save(list = ls(all=TRUE), file = "RData")
```
