The three others are: hpgl0837, hpgl0838, hpgl0839
cd preprocessing
mkdir hpgl0837 hpgl0838 hpgl0839
cd hpgl0837 && rsync -av ~/scratch/tnseq/spyogenes_5448v2/preprocessing/tnseq/hpgl0837/ ./ && cd ..
cd hpgl0838 && rsync -av ~/scratch/tnseq/spyogenes_5448v2/preprocessing/tnseq/hpgl0838/ ./ && cd ..
cd hpgl0839 && rsync -av ~/scratch/tnseq/spyogenes_5448v2/preprocessing/tnseq/hpgl0839/ ./ && cd ..
bamfiles="../hpgl0837/outputs/bowtie_mgas_5005/hpgl0837-trimmed_ca_ta-v0M1.bam \
../hpgl0838/outputs/bowtie_mgas_5005/hpgl0838-trimmed_ca_ta-v0M1.bam \
../hpgl0839/outputs/bowtie_mgas_5005/hpgl0839-trimmed_ca_ta-v0M1.bam"
samtools merge combined.bam ${bamfiles}
Done.
## The following should answer 1-5 above.
cd preprocessing/
bamtools stats < hpgl0837.bam
##
## **********************************************
## Stats for BAM file(s):
## **********************************************
##
## Total reads: 4166286
## Mapped reads: 2172707 (52.1497%)
## Forward strand: 3061735 (73.4884%)
## Reverse strand: 1104551 (26.5116%)
## Failed QC: 0 (0%)
## Duplicates: 0 (0%)
## Paired-end reads: 0 (0%)
cd preprocessing/
bamtools stats < hpgl0838.bam
##
## **********************************************
## Stats for BAM file(s):
## **********************************************
##
## Total reads: 3248362
## Mapped reads: 1536284 (47.2941%)
## Forward strand: 2573976 (79.2392%)
## Reverse strand: 674386 (20.7608%)
## Failed QC: 0 (0%)
## Duplicates: 0 (0%)
## Paired-end reads: 0 (0%)
cd preprocessing/
bamtools stats < hpgl0839.bam
##
## **********************************************
## Stats for BAM file(s):
## **********************************************
##
## Total reads: 3925737
## Mapped reads: 2527090 (64.3724%)
## Forward strand: 2730038 (69.5421%)
## Reverse strand: 1195699 (30.458%)
## Failed QC: 0 (0%)
## Duplicates: 0 (0%)
## Paired-end reads: 0 (0%)
cd preprocessing/
bamtools stats < combined.bam
##
## **********************************************
## Stats for BAM file(s):
## **********************************************
##
## Total reads: 11340385
## Mapped reads: 6236081 (54.99%)
## Forward strand: 8365749 (73.7695%)
## Reverse strand: 2974636 (26.2305%)
## Failed QC: 0 (0%)
## Duplicates: 0 (0%)
## Paired-end reads: 0 (0%)
The answer for this is in the R function tnseq_saturation().
file <- "preprocessing/hpgl0837/outputs/essentiality/hpgl0837-trimmed_ca_ta-v0M1.wig"
hpgl0837_saturation <- tnseq_saturation(data=file)
file <- "preprocessing/hpgl0838/outputs/essentiality/hpgl0838-trimmed_ca_ta-v0M1.wig"
hpgl0838_saturation <- tnseq_saturation(data=file)
file <- "preprocessing/hpgl0839/outputs/essentiality/hpgl0839-trimmed_ca_ta-v0M1.wig"
hpgl0839_saturation <- tnseq_saturation(data=file)
## Ok, now have stats for the individual libraries.
all_table <- merge(hpgl0837_saturation$hits_by_position,
hpgl0838_saturation$hits_by_position, by="Start")
all_table <- merge(all_table,
hpgl0839_saturation$hits_by_position, by="Start")
all_table$sum <- 0
for (r in 1:nrow(all_table)) {
all_table[r, "sum"] <- all_table[r, "Reads.x"] + all_table[r, "Reads.y"] + all_table[r, "Reads"]
}
all_table <- all_table[, c("Start", "sum")]
combined_saturation <- tnseq_saturation(data=all_table, column="sum")
I presume but am not certain that this is the number of > singleton hits.
hpgl0837_saturation$eq_0
## 0
## 124098
hpgl0837_saturation$gt_1
## [1] 8613
hpgl0838_saturation$eq_0
## 0
## 126356
hpgl0838_saturation$gt_1
## [1] 6355
hpgl0839_saturation$eq_0
## 0
## 118634
hpgl0839_saturation$gt_1
## [1] 14077
combined_saturation$eq_0
## 0
## 107917
combined_saturation$gt_1
## [1] 24794
hpgl0837_saturation$ratios[1]
## 1
## "0.06940483"
hpgl0837_saturation$ratios[4]
## 8
## "0.03237764"
hpgl0837_saturation$ratios[6]
## 32
## "0.02089478"
hpgl0837_saturation$plot
## Warning: Removed 2020 rows containing non-finite values (stat_bin).
## Warning: Removed 2020 rows containing non-finite values (stat_density).
## Warning: Removed 1 rows containing missing values (geom_bar).
hpgl0838_saturation$ratios[1]
## 1
## "0.05029441"
hpgl0838_saturation$ratios[4]
## 8
## "0.02428852"
hpgl0838_saturation$ratios[6]
## 32
## "0.01438792"
hpgl0838_saturation$plot
## Warning: Removed 1322 rows containing non-finite values (stat_bin).
## Warning: Removed 1322 rows containing non-finite values (stat_density).
## Warning: Removed 2 rows containing missing values (geom_bar).
hpgl0839_saturation$ratios[1]
## 1
## "0.11865907"
hpgl0839_saturation$ratios[4]
## 8
## "0.07349495"
hpgl0839_saturation$ratios[6]
## 32
## "0.05283477"
hpgl0839_saturation$plot
## Warning: Removed 4902 rows containing non-finite values (stat_bin).
## Warning: Removed 4902 rows containing non-finite values (stat_density).
## Warning: Removed 1 rows containing missing values (geom_bar).
combined_saturation$ratios[1]
## 1
## "0.22975064"
combined_saturation$ratios[4]
## 8
## "0.13490924"
combined_saturation$ratios[6]
## 32
## "0.09394257"
combined_saturation$plot
## Warning: Removed 7961 rows containing non-finite values (stat_bin).
## Warning: Removed 7961 rows containing non-finite values (stat_density).
## Warning: Removed 2 rows containing missing values (geom_bar).
The answer to this question should be easily searchable in either the annotation data for the genome and/or the precursor files for essentiality (which collects hits on every TA).
The following counts the number of lines in the tas.txt file. The answer should be that -1, as the first line is a header.
cd preprocessing/hpgl0837/outputs/essentiality/
wc hpgl0837-trimmed_ca_ta-v0M1_tas.txt
## 132712 530848 1783514 hpgl0837-trimmed_ca_ta-v0M1_tas.txt