These are rmarkdown documents which make heavy use of the hpgltools package. The following section demonstrates how to set that up in a clean R environment.
## Use R's install.packages to install devtools.
install.packages("devtools")
## Use devtools to install hpgltools.
devtools::install_github("abelew/hpgltools")
## Load hpgltools into the R environment.
library(hpgltools)
pander::pander(sessionInfo())
R version 3.5.1 (2018-07-02)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: hpgltools(v.2018.03)
loaded via a namespace (and not attached): Rcpp(v.0.12.17), compiler(v.3.5.1), pillar(v.1.3.0), plyr(v.1.8.4), bindr(v.0.1.1), base64enc(v.0.1-3), iterators(v.1.0.10), tools(v.3.5.1), digest(v.0.6.15), memoise(v.1.1.0), evaluate(v.0.10.1), tibble(v.1.4.2), gtable(v.0.2.0), pkgconfig(v.2.0.1), rlang(v.0.2.1), foreach(v.1.4.4), commonmark(v.1.5), yaml(v.2.1.19), parallel(v.3.5.1), bindrcpp(v.0.2.2), xml2(v.1.2.0), roxygen2(v.6.0.1), withr(v.2.1.2), stringr(v.1.3.1), dplyr(v.0.7.6), knitr(v.1.20), devtools(v.1.13.6), rprojroot(v.1.3-2), grid(v.3.5.1), tidyselect(v.0.2.4), glue(v.1.2.0), data.table(v.1.11.4), Biobase(v.2.40.0), R6(v.2.2.2), rmarkdown(v.1.10), pander(v.0.6.2), ggplot2(v.3.0.0), purrr(v.0.2.5), magrittr(v.1.5), backports(v.1.1.2), scales(v.0.5.0), codetools(v.0.2-15), htmltools(v.0.3.6), BiocGenerics(v.0.26.0), assertthat(v.0.2.0), colorspace(v.1.3-2), stringi(v.1.2.3), lazyeval(v.0.2.1), munsell(v.0.5.0) and crayon(v.1.3.4)
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to index-v20170706.rda.xz
tt <- sm(saveme(filename=this_save))
The three others are: hpgl0906, hpgl0907, hpgl0908
cd preprocessing
mkdir hpgl0906 hpgl0907 hpgl0908
cd hpgl0906 && rsync -av ~/scratch/tnseq/spyogenes_5448v2/preprocessing/tnseq/hpgl0906/ ./ && cd ..
cd hpgl0907 && rsync -av ~/scratch/tnseq/spyogenes_5448v2/preprocessing/tnseq/hpgl0907/ ./ && cd ..
cd hpgl0908 && rsync -av ~/scratch/tnseq/spyogenes_5448v2/preprocessing/tnseq/hpgl0908/ ./ && cd ..
bamfiles="../hpgl0906/outputs/bowtie_mgas_5005/hpgl0906-trimmed_ca_ta-v0M1.bam \
../hpgl0907/outputs/bowtie_mgas_5005/hpgl0907-trimmed_ca_ta-v0M1.bam \
../hpgl0908/outputs/bowtie_mgas_5005/hpgl0908-trimmed_ca_ta-v0M1.bam"
samtools merge combined.bam ${bamfiles}
Done.
## The following should answer 1-5 above.
cd preprocessing/
bamtools stats < hpgl0906.bam
## bash: line 2: hpgl0906.bam: No such file or directory
cd preprocessing/
bamtools stats < hpgl0907.bam
## bash: line 1: hpgl0907.bam: No such file or directory
cd preprocessing/
bamtools stats < hpgl0908.bam
## bash: line 1: hpgl0908.bam: No such file or directory
cd preprocessing/
bamtools stats < combined.bam
## bash: line 1: combined.bam: No such file or directory
The answer for this is in the R function tnseq_saturation().
file <- "preprocessing/hpgl0906/outputs/essentiality/hpgl0906-v0M1.wig"
hpgl0906_saturation <- tnseq_saturation(data=file)
## Warning in max(data_list, na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## Error in FUN(X[[i]], ...): only defined on a data frame with all numeric variables
file <- "preprocessing/hpgl0907/outputs/essentiality/hpgl0907-v0M1.wig"
hpgl0907_saturation <- tnseq_saturation(data=file)
## Warning in max(data_list, na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## Error in FUN(X[[i]], ...): only defined on a data frame with all numeric variables
file <- "preprocessing/hpgl0908/outputs/essentiality/hpgl0908-v0M1.wig"
hpgl0908_saturation <- tnseq_saturation(data=file)
## Warning in max(data_list, na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## Error in FUN(X[[i]], ...): only defined on a data frame with all numeric variables
file <- "preprocessing/hpgl0909/outputs/essentiality/hpgl0909-v0M1.wig"
hpgl0909_saturation <- tnseq_saturation(data=file)
## Warning in max(data_list, na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## Error in FUN(X[[i]], ...): only defined on a data frame with all numeric variables
file <- "preprocessing/hpgl0910/outputs/essentiality/hpgl0910-v0M1.wig"
hpgl0910_saturation <- tnseq_saturation(data=file)
## Warning in max(data_list, na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## Error in FUN(X[[i]], ...): only defined on a data frame with all numeric variables
file <- "preprocessing/hpgl0911/outputs/essentiality/hpgl0911-v0M1.wig"
hpgl0911_saturation <- tnseq_saturation(data=file)
## Warning in max(data_list, na.rm = TRUE): no non-missing arguments to max;
## returning -Inf
## Error in FUN(X[[i]], ...): only defined on a data frame with all numeric variables
## Ok, now have stats for the individual libraries.
all_table <- merge(hpgl0906_saturation$hits_by_position,
hpgl0907_saturation$hits_by_position, by="Start")
## Error in merge(hpgl0906_saturation$hits_by_position, hpgl0907_saturation$hits_by_position, : object 'hpgl0906_saturation' not found
all_table <- merge(all_table,
hpgl0908_saturation$hits_by_position, by="Start")
## Error in merge(all_table, hpgl0908_saturation$hits_by_position, by = "Start"): object 'all_table' not found
all_table$sum <- 0
## Error in all_table$sum <- 0: object 'all_table' not found
for (r in 1:nrow(all_table)) {
all_table[r, "sum"] <- all_table[r, "Reads.x"] + all_table[r, "Reads.y"] + all_table[r, "Reads"]
}
## Error in nrow(all_table): object 'all_table' not found
all_table <- all_table[, c("Start", "sum")]
## Error in eval(expr, envir, enclos): object 'all_table' not found
combined_saturation <- tnseq_saturation(data=all_table, column="sum")
## Error in tnseq_saturation(data = all_table, column = "sum"): object 'all_table' not found
all_table2 <- merge(hpgl0909_saturation$hits_by_position,
hpgl0910_saturation$hits_by_position, by="Start")
## Error in merge(hpgl0909_saturation$hits_by_position, hpgl0910_saturation$hits_by_position, : object 'hpgl0909_saturation' not found
all_table2 <- merge(all_table2,
hpgl0911_saturation$hits_by_position, by="Start")
## Error in merge(all_table2, hpgl0911_saturation$hits_by_position, by = "Start"): object 'all_table2' not found
all_table2$sum <- 0
## Error in all_table2$sum <- 0: object 'all_table2' not found
for (r in 1:nrow(all_table2)) {
all_table2[r, "sum"] <- all_table2[r, "Reads.x"] + all_table2[r, "Reads.y"] + all_table2[r, "Reads"]
}
## Error in nrow(all_table2): object 'all_table2' not found
all_table2 <- all_table2[, c("Start", "sum")]
## Error in eval(expr, envir, enclos): object 'all_table2' not found
combined_saturation2 <- tnseq_saturation(data=all_table2, column="sum")
## Error in tnseq_saturation(data = all_table2, column = "sum"): object 'all_table2' not found
I presume but am not certain that this is the number of > singleton hits.
hpgl0906_saturation$eq_0
## Error in eval(expr, envir, enclos): object 'hpgl0906_saturation' not found
hpgl0906_saturation$gt_1
## Error in eval(expr, envir, enclos): object 'hpgl0906_saturation' not found
hpgl0907_saturation$eq_0
## Error in eval(expr, envir, enclos): object 'hpgl0907_saturation' not found
hpgl0907_saturation$gt_1
## Error in eval(expr, envir, enclos): object 'hpgl0907_saturation' not found
hpgl0908_saturation$eq_0
## Error in eval(expr, envir, enclos): object 'hpgl0908_saturation' not found
hpgl0908_saturation$gt_1
## Error in eval(expr, envir, enclos): object 'hpgl0908_saturation' not found
combined_saturation$eq_0
## Error in eval(expr, envir, enclos): object 'combined_saturation' not found
combined_saturation$gt_1
## Error in eval(expr, envir, enclos): object 'combined_saturation' not found
hpgl0909_saturation$eq_0
## Error in eval(expr, envir, enclos): object 'hpgl0909_saturation' not found
hpgl0909_saturation$gt_1
## Error in eval(expr, envir, enclos): object 'hpgl0909_saturation' not found
hpgl0910_saturation$eq_0
## Error in eval(expr, envir, enclos): object 'hpgl0910_saturation' not found
hpgl0910_saturation$gt_1
## Error in eval(expr, envir, enclos): object 'hpgl0910_saturation' not found
hpgl0911_saturation$eq_0
## Error in eval(expr, envir, enclos): object 'hpgl0911_saturation' not found
hpgl0911_saturation$gt_1
## Error in eval(expr, envir, enclos): object 'hpgl0911_saturation' not found
combined_saturation2$eq_0
## Error in eval(expr, envir, enclos): object 'combined_saturation2' not found
combined_saturation2$gt_1
## Error in eval(expr, envir, enclos): object 'combined_saturation2' not found
hpgl0906_saturation$ratios[1]
## Error in eval(expr, envir, enclos): object 'hpgl0906_saturation' not found
hpgl0906_saturation$ratios[4]
## Error in eval(expr, envir, enclos): object 'hpgl0906_saturation' not found
hpgl0906_saturation$ratios[6]
## Error in eval(expr, envir, enclos): object 'hpgl0906_saturation' not found
hpgl0906_saturation$plot
## Error in eval(expr, envir, enclos): object 'hpgl0906_saturation' not found
hpgl0907_saturation$ratios[1]
## Error in eval(expr, envir, enclos): object 'hpgl0907_saturation' not found
hpgl0907_saturation$ratios[4]
## Error in eval(expr, envir, enclos): object 'hpgl0907_saturation' not found
hpgl0907_saturation$ratios[6]
## Error in eval(expr, envir, enclos): object 'hpgl0907_saturation' not found
hpgl0907_saturation$plot
## Error in eval(expr, envir, enclos): object 'hpgl0907_saturation' not found
hpgl0908_saturation$ratios[1]
## Error in eval(expr, envir, enclos): object 'hpgl0908_saturation' not found
hpgl0908_saturation$ratios[4]
## Error in eval(expr, envir, enclos): object 'hpgl0908_saturation' not found
hpgl0908_saturation$ratios[6]
## Error in eval(expr, envir, enclos): object 'hpgl0908_saturation' not found
hpgl0908_saturation$plot
## Error in eval(expr, envir, enclos): object 'hpgl0908_saturation' not found
combined_saturation$ratios[1]
## Error in eval(expr, envir, enclos): object 'combined_saturation' not found
combined_saturation$ratios[4]
## Error in eval(expr, envir, enclos): object 'combined_saturation' not found
combined_saturation$ratios[6]
## Error in eval(expr, envir, enclos): object 'combined_saturation' not found
combined_saturation$plot
## Error in eval(expr, envir, enclos): object 'combined_saturation' not found
The answer to this question should be easily searchable in either the annotation data for the genome and/or the precursor files for essentiality (which collects hits on every TA).
The following counts the number of lines in the tas.txt file. The answer should be that -1, as the first line is a header.
cd preprocessing/hpgl0906/outputs/essentiality/
wc hpgl0906-trimmed_ca_ta-v0M1_tas.txt
## wc: hpgl0906-trimmed_ca_ta-v0M1_tas.txt: No such file or directory