1 Quick caveat: This was one of my earliest analyses and it therefore is… bad.

I am therefore doing to bare minimum required to be able to once again glean useful information from it.

last, go back index, next

2 Essentiality

This is run with the following command line and prints out a table of z scores. This requires a file TA_data.txt which in turn comes from a small perl script which reads over the bam files and prints the # of insertions which fall exactly in the correct orientation at each TA position in the genome.

for dir in t0 t1 t2 t3; do
  perl tnseq_TAs.pl -i ../run3/libs/random_1mismatch/lib1_${dir}_sorted.bam 2>${dir}/tnseq_TAs.err 1>${dir}/tnseq_TAs.out
  python gumbelMH.py -f ${dir}/tnseq_TAs.out -m 1 -s 1000 2>${dir}/essentiality.err 1>${dir}/essentiality.out
done

Upon completion, I have 4 directories containing the essentiality.out files. These are copied into data/essentiality/.

3 Load essentiality data

Note I edited the headers of the tables before loading them. They now look like: “Orf t3_k t3_n t3_r t3_s t3_zbar”

t0 = read.table("data/essentiality/old/t0_essentiality.txt", skip=7)
colnames(t0) = c("Orf","t0_k","t0_n","t0_r","t0_s","t0_zbar")
t1 = read.table("data/essentiality/old/t1_essentiality.txt", skip=7)
colnames(t1) = c("Orf","t1_k","t1_n","t1_r","t1_s","t1_zbar")
t2 = read.table("data/essentiality/old/t2_essentiality.txt")
colnames(t2) = c("Orf","t2_k","t2_n","t2_r","t2_s","t2_zbar")
t3 = read.table("data/essentiality/old/t3_essentiality.txt")
colnames(t3) = c("Orf","t3_k","t3_n","t3_r","t3_s","t3_zbar")
essentiality_data = merge(t0, t1, by.x="Orf", by.y="Orf")
essentiality_data = merge(essentiality_data, t2, by.x="Orf", by.y="Orf")
essentiality_data = merge(essentiality_data, t3, by.x="Orf", by.y="Orf")

XLConnect::createSheet(workbook, name="essentiality_all")
XLConnect::writeWorksheet(workbook, essentiality_data, sheet="essentiality_all")
XLConnect::saveWorkbook(workbook)

Yoann asked about a venn diagram of this data. Here are 3 methods, one including a size-weighted which is completely silly because most of the hits are in the intersection of all 3.

t0_essential = t0
t1_essential = t1
t2_essential = t2
t0_essential$answer = ifelse(t0$t0_zbar > 0.99, 1, 0)
t1_essential$answer = ifelse(t1$t1_zbar > 0.99, 1, 0)
t2_essential$answer = ifelse(t2$t2_zbar > 0.99, 1, 0)
t0_essential = t0_essential[, c(1, 7)]
t1_essential = t1_essential[, c(1, 7)]
t2_essential = t2_essential[, c(1, 7)]
rownames(t0_essential) = t0_essential$Orf
##t0_essential = t0_essential[, -1]
rownames(t1_essential) = t1_essential$Orf
##t1_essential = t1_essential[, -1]
rownames(t2_essential) = t2_essential$Orf
##t2_essential = t2_essential[, -1]
essential_results = merge(t0_essential, t1_essential, by='row.names')
rownames(essential_results) = essential_results$Row.names
essential_results = essential_results[,-1]
essential_results = merge(essential_results, t2_essential, by='row.names')
rownames(essential_results) = essential_results$Row.names
essential_results = essential_results[, -1]
essential_results <- essential_results[, c("answer.x", "answer.y", "answer")]
## This is using limma's vennDiagram function
colnames(essential_results) = c("t0", "t1", "t2")
essential_counts = limma::vennCounts(essential_results)
limma::vennDiagram(essential_counts,
                   counts.col = c("blue", "red", "green"))

## I can use that to easily make the variables for the VennDiagram library...

i1 = 1
i2 = 8
i3 = 61
i12 = 1
i13 = 4
i23 = 85
i123 = 230

area1 = i1 + i12 + i13 + i123
area2 = i2 + i12 + i23 + i123
area3 = i3 + i13 + i23 + i123

n123 = i123
n12 = i12 + i123
n13 = i13 + i123
n23 = i23 + i123

test = Vennerable::Venn(SetNames = c("t0","t1","t2"), Weight =
    c('100' = i1, '010' = i2, '001' = i3, ## The loners
      '110' = i12, '011' = i23, '101' = i13,  ## The doubles
      '111' = i123))
Vennerable::plot(test, doWeights = TRUE, type = "circles")

Vennerable::plot(test, doWeights = FALSE, type = "circles")

draw.triple.venn(area1, area2, area3, n12, n23, n13, n123,
                 category = rep("", 3), rotation = 1, reverse = FALSE, euler.d = TRUE,
                 scaled = TRUE, lwd = rep(2, 3), lty = rep("solid", 3),
                 col = rep("black", 3), fill = NULL, alpha = rep(0.5, 3),
                 label.col = rep("black", 7), cex = rep(1, 7), fontface = rep("plain", 7),
                 fontfamily = rep("serif", 7), cat.pos = c(-40, 40, 180),
                 cat.dist = c(0.05, 0.05, 0.025), cat.col = rep("black", 3),
                 cat.cex = rep(1, 3), cat.fontface = rep("plain", 3),
                 cat.fontfamily = rep("serif", 3),
                 cat.just = list(c(0.5, 1), c(0.5, 1), c(0.5, 0)), cat.default.pos = "outer",
                 cat.prompts = FALSE, rotation.degree = 0, rotation.centre = c(0.5, 0.5),
                 ind = TRUE, sep.dist = 0.05, offset = 0)

## (polygon[GRID.polygon.702], polygon[GRID.polygon.703], polygon[GRID.polygon.704], polygon[GRID.polygon.705], polygon[GRID.polygon.706], polygon[GRID.polygon.707], text[GRID.text.708], text[GRID.text.709], text[GRID.text.710], text[GRID.text.711], text[GRID.text.712], text[GRID.text.713], text[GRID.text.714], text[GRID.text.715], text[GRID.text.716], text[GRID.text.717])

4 Repeat for the Alabama and NZ131 strains.

4.1 Add annotation information for Alabama and NZ131 into the excel workbook

4.2 Run essentiality and add the essentiality output to the excel workbook…

I created an alabama directory inside the essentiality directory. It took me a few tries to get it correct, and I had to make some small edits to the perl script tnseq_TAs to accomodate the new strains, but I ran the perlscript in the same fashion. (Caveat: The perl script needs to be able to cross reference the name of the fasta sequence against the bam sequence name. This tripped me up terribly.

Upon completion, I wrote a tiny shell script to run the python essentiality package for each output file.

cd essentiality/alabama/
./README.sh
## This prints out the appropriate command lines for the TNseq_TAs.pl and gumbelMH.py commands.

That completed in some hours, providing the raw outputs from essentiality, which I again edited slightly to include the timepoint values. Let us load them into the excel spreadsheet.

alabama_t0 = read.table("data/essentiality/old/Alab49_t0_essentiality.out", skip=7, header=TRUE)
alabama_t1 = read.table("data/essentiality/old/Alab49_t1_essentiality.out", skip=7, header=TRUE)
alabama_t2 = read.table("data/essentiality/old/Alab49_t2_essentiality.out", skip=7, header=TRUE)
alabama_t3 = read.table("data/essentiality/old/Alab49_t3_essentiality.out", skip=7, header=TRUE)
nz_t0 = read.table("data/essentiality/old/NZ131_t0_essentiality.out", skip=7, header=TRUE)
nz_t1 = read.table("data/essentiality/old/NZ131_t1_essentiality.out", skip=7, header=TRUE)
nz_t2 = read.table("data/essentiality/old/NZ131_t2_essentiality.out", skip=7, header=TRUE)
nz_t3 = read.table("data/essentiality/old/NZ131_t3_essentiality.out", skip=7, header=TRUE)
alabama_essentials = merge(alabama_t0, alabama_t1, by.x="Orf", by.y="Orf")
alabama_essentials = merge(alabama_essentials, alabama_t2, by.x="Orf", by.y="Orf")
alabama_essentials = merge(alabama_essentials, alabama_t3, by.x="Orf", by.y="Orf")
nz_essentials = merge(nz_t0, nz_t1, by.x="Orf", by.y="Orf")
nz_essentials = merge(nz_essentials, nz_t2, by.x="Orf", by.y="Orf")
nz_essentials = merge(nz_essentials, nz_t3, by.x="Orf", by.y="Orf")
head(nz_essentials)
##          Orf t0_k t0_n t0_r t0_s t0_zbar t1_k t1_n t1_r t1_s t1_zbar t2_k
## 1 Spy49_0001    1  105   56  657   1.000   14  105   13  150   0.001    3
## 2 Spy49_0002    2   99   97 1100   1.000    9   99   28  311   1.000    1
## 3 Spy49_0003    3   15    5   73   0.000   12   15    1    2   0.000   12
## 4 Spy49_0004   21   80   11   97   0.001   65   80    2    9   0.000   54
## 5 Spy49_0005    1   56   43  414   0.999   12   56   14  161   0.034    2
## 6 Spy49_0006   74  256   14  179   0.001  191  256    4   42   0.000  111
##   t2_n t2_r t2_s t2_zbar t3_k t3_n t3_r t3_s t3_zbar
## 1  105   39  537   0.998    4  105   39  537   0.102
## 2   99   78  910   1.000    6   99   24  255   0.002
## 3   15    1    2   0.000    4   15    6   51   0.001
## 4   80    2    9   0.000   28   80   11   97   0.001
## 5   56   28  240   0.189    5   56   22  252   0.003
## 6  256   10  141   0.000   61  256   21  311   0.024
XLConnect::createSheet(workbook, name="alabama_essentiality")
XLConnect::writeWorksheet(workbook, alabama_essentials, sheet="alabama_essentiality")
XLConnect::saveWorkbook(workbook)
XLConnect::createSheet(workbook, name="nz131_essentiality")
XLConnect::writeWorksheet(workbook, nz_essentials, sheet="nz131_essentiality")
XLConnect::saveWorkbook(workbook)

5 Return

last, go back, index, next

Ashton Trey Belew ()

---
title: "2015: The first TNSeq of S.pyogenes"
author: "atb abelew@gmail.com"
date: "`r Sys.Date()`"
output:
 html_document:
  code_download: true
  code_folding: show
  fig_caption: true
  fig_height: 7
  fig_width: 7
  highlight: default
  keep_md: false
  mode: selfcontained
  number_sections: true
  self_contained: true
  theme: readable
  toc: true
  toc_float:
    collapsed: false
    smooth_scroll: false
---

<style>
  body .main-container {
    max-width: 1600px;
  }
</style>

```{r options, include=FALSE}
if (!isTRUE(get0("skip_load"))) {
  library(hpgltools)
  tt <- devtools::load_all("~/hpgltools")
  knitr::opts_knit$set(progress=TRUE,
                       verbose=TRUE,
                       width=90,
                       echo=TRUE)
  knitr::opts_chunk$set(error=TRUE,
                        fig.width=8,
                        fig.height=8,
                        dpi=96)
  old_options <- options(digits=4,
                         stringsAsFactors=FALSE,
                         knitr.duplicate.label="allow")
  ggplot2::theme_set(ggplot2::theme_bw(base_size=10))
  ver <- "20171102"
  previous_file <- "index.Rmd"

  tmp <- try(sm(loadme(filename=paste0(gsub(pattern="\\.Rmd", replace="", x=previous_file), "-v", ver, ".rda.xz"))))
  rmd_file <- "essentiality.Rmd"
}
```

# Quick caveat:  This was one of my earliest analyses and it therefore is... bad.

I am therefore doing to bare minimum required to be able to once again glean
useful information from it.

```{r load, include=FALSE}
library(hpgltools)
please_install("VennDiagram")
load("RData")
workbook = XLConnect::loadWorkbook("excel/tnseq_workbook.xls", create=TRUE)
```

[last](cbcbSEQ.html),
<a href="#" onclick="history.go(-1)">go back</a>
[index](index.html), [next](essentiality.html)


# Essentiality

This is run with the following command line and prints out a table of z scores.
This requires a file TA_data.txt which in turn comes from a small perl script
which reads over the bam files and prints the # of insertions which fall exactly
in the correct orientation at each TA position in the genome.

```{r essentiality_running, engine='bash', eval=FALSE}
for dir in t0 t1 t2 t3; do
  perl tnseq_TAs.pl -i ../run3/libs/random_1mismatch/lib1_${dir}_sorted.bam 2>${dir}/tnseq_TAs.err 1>${dir}/tnseq_TAs.out
  python gumbelMH.py -f ${dir}/tnseq_TAs.out -m 1 -s 1000 2>${dir}/essentiality.err 1>${dir}/essentiality.out
done
```

Upon completion, I have 4 directories containing the essentiality.out files.
These are copied into [data/essentiality/](data/essentiality/).

# Load essentiality data

Note I edited the headers of the tables before loading them.  They now look like:
"Orf	t3_k	t3_n	t3_r	t3_s	t3_zbar"

```{r load_essentiality}
t0 = read.table("data/essentiality/old/t0_essentiality.txt", skip=7)
colnames(t0) = c("Orf","t0_k","t0_n","t0_r","t0_s","t0_zbar")
t1 = read.table("data/essentiality/old/t1_essentiality.txt", skip=7)
colnames(t1) = c("Orf","t1_k","t1_n","t1_r","t1_s","t1_zbar")
t2 = read.table("data/essentiality/old/t2_essentiality.txt")
colnames(t2) = c("Orf","t2_k","t2_n","t2_r","t2_s","t2_zbar")
t3 = read.table("data/essentiality/old/t3_essentiality.txt")
colnames(t3) = c("Orf","t3_k","t3_n","t3_r","t3_s","t3_zbar")
essentiality_data = merge(t0, t1, by.x="Orf", by.y="Orf")
essentiality_data = merge(essentiality_data, t2, by.x="Orf", by.y="Orf")
essentiality_data = merge(essentiality_data, t3, by.x="Orf", by.y="Orf")

XLConnect::createSheet(workbook, name="essentiality_all")
XLConnect::writeWorksheet(workbook, essentiality_data, sheet="essentiality_all")
XLConnect::saveWorkbook(workbook)
```

Yoann asked about a venn diagram of this data.  Here are 3 methods, one
including a size-weighted which is completely silly because most of the hits are
in the intersection of all 3.

```{r essentiality_venn}
t0_essential = t0
t1_essential = t1
t2_essential = t2
t0_essential$answer = ifelse(t0$t0_zbar > 0.99, 1, 0)
t1_essential$answer = ifelse(t1$t1_zbar > 0.99, 1, 0)
t2_essential$answer = ifelse(t2$t2_zbar > 0.99, 1, 0)
t0_essential = t0_essential[, c(1, 7)]
t1_essential = t1_essential[, c(1, 7)]
t2_essential = t2_essential[, c(1, 7)]
rownames(t0_essential) = t0_essential$Orf
##t0_essential = t0_essential[, -1]
rownames(t1_essential) = t1_essential$Orf
##t1_essential = t1_essential[, -1]
rownames(t2_essential) = t2_essential$Orf
##t2_essential = t2_essential[, -1]
essential_results = merge(t0_essential, t1_essential, by='row.names')
rownames(essential_results) = essential_results$Row.names
essential_results = essential_results[,-1]
essential_results = merge(essential_results, t2_essential, by='row.names')
rownames(essential_results) = essential_results$Row.names
essential_results = essential_results[, -1]
essential_results <- essential_results[, c("answer.x", "answer.y", "answer")]
## This is using limma's vennDiagram function
colnames(essential_results) = c("t0", "t1", "t2")
essential_counts = limma::vennCounts(essential_results)
limma::vennDiagram(essential_counts,
                   counts.col = c("blue", "red", "green"))
## I can use that to easily make the variables for the VennDiagram library...

i1 = 1
i2 = 8
i3 = 61
i12 = 1
i13 = 4
i23 = 85
i123 = 230

area1 = i1 + i12 + i13 + i123
area2 = i2 + i12 + i23 + i123
area3 = i3 + i13 + i23 + i123

n123 = i123
n12 = i12 + i123
n13 = i13 + i123
n23 = i23 + i123

test = Vennerable::Venn(SetNames = c("t0","t1","t2"), Weight =
    c('100' = i1, '010' = i2, '001' = i3, ## The loners
      '110' = i12, '011' = i23, '101' = i13,  ## The doubles
      '111' = i123))
Vennerable::plot(test, doWeights = TRUE, type = "circles")
Vennerable::plot(test, doWeights = FALSE, type = "circles")

draw.triple.venn(area1, area2, area3, n12, n23, n13, n123,
                 category = rep("", 3), rotation = 1, reverse = FALSE, euler.d = TRUE,
                 scaled = TRUE, lwd = rep(2, 3), lty = rep("solid", 3),
                 col = rep("black", 3), fill = NULL, alpha = rep(0.5, 3),
                 label.col = rep("black", 7), cex = rep(1, 7), fontface = rep("plain", 7),
                 fontfamily = rep("serif", 7), cat.pos = c(-40, 40, 180),
                 cat.dist = c(0.05, 0.05, 0.025), cat.col = rep("black", 3),
                 cat.cex = rep(1, 3), cat.fontface = rep("plain", 3),
                 cat.fontfamily = rep("serif", 3),
                 cat.just = list(c(0.5, 1), c(0.5, 1), c(0.5, 0)), cat.default.pos = "outer",
                 cat.prompts = FALSE, rotation.degree = 0, rotation.centre = c(0.5, 0.5),
                 ind = TRUE, sep.dist = 0.05, offset = 0)
```

# Repeat for the Alabama and NZ131 strains.

## Add annotation information for Alabama and NZ131 into the excel workbook

## Run essentiality and add the essentiality output to the excel workbook...

I created an alabama directory inside the essentiality directory.  It took me a few tries to get it correct, and I had to make some small edits to the perl script tnseq_TAs to accomodate the new strains, but I ran the perlscript in the same fashion.  (Caveat:  The perl script needs to be able to cross reference the name of the _fasta_ sequence against the bam sequence name.  This tripped me up terribly.

Upon completion, I wrote a tiny shell script to run the python essentiality package for each output file.

```{r alabama_essentiality, enging='bash', eval=FALSE}
cd essentiality/alabama/
./README.sh
## This prints out the appropriate command lines for the TNseq_TAs.pl and gumbelMH.py commands.
```

That completed in some hours, providing the raw outputs from essentiality, which I again edited slightly to include the timepoint values.  Let us load them into the excel spreadsheet.

```{r load_alabama_essentiality}
alabama_t0 = read.table("data/essentiality/old/Alab49_t0_essentiality.out", skip=7, header=TRUE)
alabama_t1 = read.table("data/essentiality/old/Alab49_t1_essentiality.out", skip=7, header=TRUE)
alabama_t2 = read.table("data/essentiality/old/Alab49_t2_essentiality.out", skip=7, header=TRUE)
alabama_t3 = read.table("data/essentiality/old/Alab49_t3_essentiality.out", skip=7, header=TRUE)
nz_t0 = read.table("data/essentiality/old/NZ131_t0_essentiality.out", skip=7, header=TRUE)
nz_t1 = read.table("data/essentiality/old/NZ131_t1_essentiality.out", skip=7, header=TRUE)
nz_t2 = read.table("data/essentiality/old/NZ131_t2_essentiality.out", skip=7, header=TRUE)
nz_t3 = read.table("data/essentiality/old/NZ131_t3_essentiality.out", skip=7, header=TRUE)
alabama_essentials = merge(alabama_t0, alabama_t1, by.x="Orf", by.y="Orf")
alabama_essentials = merge(alabama_essentials, alabama_t2, by.x="Orf", by.y="Orf")
alabama_essentials = merge(alabama_essentials, alabama_t3, by.x="Orf", by.y="Orf")
nz_essentials = merge(nz_t0, nz_t1, by.x="Orf", by.y="Orf")
nz_essentials = merge(nz_essentials, nz_t2, by.x="Orf", by.y="Orf")
nz_essentials = merge(nz_essentials, nz_t3, by.x="Orf", by.y="Orf")
head(nz_essentials)

XLConnect::createSheet(workbook, name="alabama_essentiality")
XLConnect::writeWorksheet(workbook, alabama_essentials, sheet="alabama_essentiality")
XLConnect::saveWorkbook(workbook)
XLConnect::createSheet(workbook, name="nz131_essentiality")
XLConnect::writeWorksheet(workbook, nz_essentials, sheet="nz131_essentiality")
XLConnect::saveWorkbook(workbook)
```

# Return

[last](cbcbSEQ.html),
<a href="#" onclick="history.go(-1)">go back</a>,
[index](index.html), [next](essentiality.html)

```{r run_date, results='asis', echo=FALSE}
email = "<a href='mailto:abelew@umd.edu'>Ashton Trey Belew</a>"
last_update = format(Sys.time(), "(<time>%Y-%m-%d</time>)")
cat(paste(email, last_update))
```

```{r save, include=FALSE}
save(list = ls(all=TRUE), file = "RData")
```
