The following section loads the microbesonline and genbank annotations for Mycobacterium tuberculosis.
## Looks like it is taxon ID 83332
mtb_annotations <- as.data.frame(load_microbesonline_annotations(species="Mycobacterium tuberculosis H37Rv"))
## Found 1 entry.
## Genome Phylum Paper Loaded Complete
## 2178 Mycobacterium tuberculosis H37Rv Actinobacteria yes 2007-05-08 yes
## #Chr. #Plasmids #Genes tax_id
## 2178 1 0 4047 83332
## The species being downloaded is: Mycobacterium tuberculosis H37Rv
## Downloading: http://www.microbesonline.org/cgi-bin/genomeInfo.cgi?tId=83332;export=tab
knitr::kable(head(mtb_annotations))
locusId | accession | GI | scaffoldId | start | stop | strand | sysName | name | desc | COG | COGFun | COGDesc | TIGRFam | TIGRRoles | GO | EC | ECDesc |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
31772 | NP_214515.1 | 15607143 | 7022 | 1 | 1524 | + | Rv0001 | dnaA | chromosomal replication initiation protein (NCBI) | COG593 | L | ATPase involved in DNA replication initiation | TIGR00362 chromosomal replication initiator protein DnaA [dnaA] | DNA metabolism:DNA replication, recombination, and repair | GO:0006270,GO:0006275,GO:0003688,GO:0017111,GO:0005524 | NA | NA |
31773 | NP_214516.1 | 15607144 | 7022 | 2052 | 3260 | + | Rv0002 | dnaN | DNA polymerase III subunit beta (NCBI) | COG592 | L | DNA polymerase sliding clamp subunit (PCNA homolog) | TIGR00663 DNA polymerase III, beta subunit [dnaN] | DNA metabolism:DNA replication, recombination, and repair | GO:0006260,GO:0003677,GO:0003893,GO:0008408,GO:0016449,GO:0019984,GO:0003889,GO:0003894,GO:0015999,GO:0016450,GO:0003890,GO:0003895,GO:0016000,GO:0016451,GO:0003891,GO:0016448,GO:0016452 | 2.7.7.7 | DNA-directed DNA polymerase. |
31774 | NP_214517.1 | 15607145 | 7022 | 3280 | 4437 | + | Rv0003 | recF | recombination protein F (NCBI) | COG1195 | L | Recombinational DNA repair ATPase (RecF pathway) | TIGR00611 DNA replication and repair protein RecF [recF] | DNA metabolism:DNA replication, recombination, and repair | GO:0006281,GO:0005694,GO:0003697,GO:0005524 | NA | NA |
31775 | NP_214518.1 | 15607146 | 7022 | 4434 | 4997 | + | Rv0004 | Rv0004 | hypothetical protein (NCBI) | COG5512 | R | Zn-ribbon-containing, possibly RNA-binding protein and truncated derivatives | NA | NA | NA | NA | NA |
31776 | NP_214519.1 | 15607147 | 7022 | 5123 | 7267 | + | Rv0005 | gyrB | DNA topoisomerase IV subunit B (NCBI) | COG187 | L | Type IIA topoisomerase (DNA gyrase/topo II, topoisomerase IV), B subunit | TIGR01059 DNA gyrase, B subunit [gyrB] | DNA metabolism:DNA replication, recombination, and repair | GO:0006304,GO:0006265,GO:0005694,GO:0003918,GO:0005524 | 5.99.1.3 | DNA topoisomerase (ATP-hydrolyzing). |
31777 | NP_214520.1 | 15607148 | 7022 | 7302 | 9818 | + | Rv0006 | gyrA | DNA gyrase subunit A (NCBI) | COG188 | L | Type IIA topoisomerase (DNA gyrase/topo II, topoisomerase IV), A subunit | TIGR01063 DNA gyrase, A subunit [gyrA] | DNA metabolism:DNA replication, recombination, and repair | GO:0006265,GO:0006268,GO:0005694,GO:0003918,GO:0005509,GO:0005524 | 5.99.1.3 | DNA topoisomerase (ATP-hydrolyzing). |
mtb_go <- load_microbesonline_go(species="Mycobacterium tuberculosis H37Rv", id_column="sysName")
## Found 1 entry.
## Genome Phylum Paper Loaded Complete
## 2178 Mycobacterium tuberculosis H37Rv Actinobacteria yes 2007-05-08 yes
## #Chr. #Plasmids #Genes tax_id
## 2178 1 0 4047 83332
## The species being downloaded is: Mycobacterium tuberculosis H37Rv and is being downloaded as 83332.tab.
colnames(mtb_go) <- c("ID", "GO")
mtb_gff <- load_gff_annotations(gff="~/scratch/libraries/genome/mtuberculosis_h37rv.gff")
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=FALSE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=FALSE)
## Returning a df with 15 columns and 4008 rows.
rownames(mtb_gff) <- mtb_gff[["locus_tag"]]
mtb_annot <- merge(mtb_gff, mtb_annotations, by.x="row.names", by.y="sysName", all.x=TRUE)
rownames(mtb_annot) <- mtb_annot[["Row.names"]]
mtb_annot[["Row.names"]] <- NULL
There is now a sample sheet on google docs which contains Volker’s samples along with a growing set of downloaded samples.
all_expt <- create_expt(
metadata=glue::glue("sample_sheets/Mtb_RNAseq_data_sources_{ver}.xlsx"),
file_column="mtbh37rvhisat2file")
## Reading the sample metadata.
## The sample definitions comprises: 227 rows(samples) and 27 columns(metadata fields).
## Reading count tables.
## Reading count files with read.table().
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0021/outputs/hisat2_mtuberculosis_h37rv/HPGL0021.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0022/outputs/hisat2_mtuberculosis_h37rv/HPGL0022.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0023/outputs/hisat2_mtuberculosis_h37rv/HPGL0023.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0024/outputs/hisat2_mtuberculosis_h37rv/HPGL0024.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0025/outputs/hisat2_mtuberculosis_h37rv/HPGL0025.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0026/outputs/hisat2_mtuberculosis_h37rv/HPGL0026.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0083/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0084/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0085/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0087/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0088/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0089/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0091/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0092/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0093/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0095/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0130/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0131/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0132/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0133/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0134/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0135/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0136/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0137/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0138/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0139/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0140/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0141/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0330/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0331/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0332/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0333/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0334/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0335/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0336/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0511/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0512/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0513/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0514/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0515/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0516/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0517/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0518/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0519/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0520/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/hpgl0521/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214125/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214126/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214127/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214128/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214129/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214130/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214131/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214132/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214133/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214134/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214135/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214136/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214137/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214138/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214139/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214140/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214141/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214142/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214143/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1022493/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1022494/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1022495/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1022496/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1022497/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1022499/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1022501/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1022502/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1022507/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1022513/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198589/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198590/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198591/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198592/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198593/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198594/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198595/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198596/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198597/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198598/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198599/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198600/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198601/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198602/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198603/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198604/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198605/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198606/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198607/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198608/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198609/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198610/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198611/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198612/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198613/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198614/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198615/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198616/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198617/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198618/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198619/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198620/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198621/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198622/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198623/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198624/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198625/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198626/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198627/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198628/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198636/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198637/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198638/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198642/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198643/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198644/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198645/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198647/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198648/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198649/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198650/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198651/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198652/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198653/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198654/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198655/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198656/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198657/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198658/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198659/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198660/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198661/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198662/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198663/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198664/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198665/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198666/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198667/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198668/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198669/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198670/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198671/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198672/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198673/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198674/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198675/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198676/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198677/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR10198678/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR11215146/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR11215147/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR11215148/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR11215149/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR11215150/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR11215151/outputs/hisat2_mtuberculosis_h37rv/r1.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140762/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140763/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140764/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140765/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140766/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140767/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140768/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140769/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140770/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140771/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140772/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140773/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140774/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140775/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140776/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140777/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140778/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140779/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140780/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140781/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140782/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140783/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140784/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140785/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140786/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140787/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140788/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140789/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140790/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140791/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140792/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140793/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140794/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140795/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140796/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140797/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140798/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/ERR1140799/outputs/hisat2_mtuberculosis_h37rv/r1_trimmed.count_mtuberculosis_h37rv_sno_gene_locus_tag.count.xz contains 4013 rows and merges to 4013 rows.
## Finished reading count data.
## Warning in create_expt(metadata = glue::glue("sample_sheets/
## Mtb_RNAseq_data_sources_{ver}.xlsx"), : Some samples were removed when cross
## referencing the samples against the count data.
## Matched 4008 annotations and counts.
## Bringing together the count matrix and gene information.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 4008 rows and 198 columns.
all_expt <- set_expt_batches(all_expt, fact="experimentname")
all_expt <- set_expt_conditions(all_expt, fact="vitrovivo")
all_norm <- normalize_expt(all_expt, transform="log2", convert="cpm",
filter="simple", norm="quant")
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(simple(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: simple
## Removing 9 low-count genes (3999 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
plot_nonzero(all_norm)$plot
pp(file="first_pca_test.png", image=plot_pca(all_norm)$plot)
## plot labels was not set and there are more than 100 samples, disabling it.
## Not putting labels on the PC plot.
## Writing the image to: first_pca_test.png and calling dev.off().
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
all_nb <- normalize_expt(all_expt, transform="log2", filter="simple",
batch="limma", convert="cpm", surrogates=1)
## This function will replace the expt$expressionset slot with:
## log2(limma(cpm(simple(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: simple
## Removing 9 low-count genes (3999 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 86553 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with limma.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 702129 entries are x>1: 89%.
## batch_counts: Before batch/surrogate estimation, 86553 entries are x==0: 11%.
## batch_counts: Before batch/surrogate estimation, 3120 entries are 0<x<1: 0%.
## A specific number of surrogate variables was chosen: 1.
## batch_counts: Using limma's removeBatchEffect to remove batch effect.
## If you receive a warning: 'NANs produced', one potential reason is that the data was quantile normalized.
## There are 38385 (5%) elements which are < 0 after batch correction.
## Setting low elements to zero.
nb_pca <- plot_pca(all_nb)
## plot labels was not set and there are more than 100 samples, disabling it.
## Not putting labels on the PC plot.
pp(file="first_pca_batch_test.png", image=nb_pca$plot)
## Writing the image to: first_pca_batch_test.png and calling dev.off().
all_nb <- normalize_expt(all_expt, transform="log2", filter="simple",
norm="quant", batch="svaseq", convert="cpm")
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(quant(simple(data)))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Warning in normalize_expt(all_expt, transform = "log2", filter = "simple", :
## Quantile normalization and sva do not always play well together.
## Step 1: performing count filter with option: simple
## Removing 9 low-count genes (3999 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 788630 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 2 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 3170 entries are 0<x<1: 0%.
## The be method chose 16 surrogate variables.
## Attempting svaseq estimation with 16 surrogates.
## There are 76 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
nb_pca <- plot_pca(all_nb)
## plot labels was not set and there are more than 100 samples, disabling it.
## Not putting labels on the PC plot.
pp(file="first_pca_batch_test.png", image=nb_pca$plot)
## Writing the image to: first_pca_batch_test.png and calling dev.off().
testing <- plot_3d_pca(nb_pca)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
This is the group of samples which were collected by the Briken lab and previously analyzed by members of the El-Sayed lab.
local_expt <- sm(create_expt(metadata="sample_sheets/Mtb_RNAseq_data_sources_20200618.xlsx",
file_column="mtbfile",
gene_info=mtb_annot))
## Error in create_expt(metadata = "sample_sheets/Mtb_RNAseq_data_sources_20200618.xlsx", : I could not find your count tables by sample nor type, uppercase nor lowercase.
Najib and Volker would like to focus for the moment on only hpgl IDs: 130-132, 330-332.
few_expt <- subset_expt(local_expt, subset="condition=='Rv'")
## Error in sampleNames(expt): object 'local_expt' not found
new_column <- paste0(pData(few_expt)[["condition"]], "_", pData(few_expt)[["vitrovivo"]])
## Error in pData(few_expt): object 'few_expt' not found
few <- set_expt_conditions(few_expt, fact=new_column)
## Error in pData(expt): object 'few_expt' not found
few_norm <- normalize_expt(few, filter=TRUE, convert="cpm", transform="log2", norm="quant")
## Error in normalize_expt(few, filter = TRUE, convert = "cpm", transform = "log2", : object 'few' not found
plot_pca(few_norm)$plot
## Error in plot_pca(few_norm): object 'few_norm' not found
few_filt <- normalize_expt(few_expt, filter=TRUE)
## Error in normalize_expt(few_expt, filter = TRUE): object 'few_expt' not found
few_write <- write_expt(few_expt, excel="excel/few_written.xlsx")
## Error in exprs(expt): object 'few_expt' not found
few_de <- all_pairwise(few_filt)
## Error in normalize_expt(input, filter = TRUE, batch = FALSE, transform = "log2", : object 'few_filt' not found
few_table <- combine_de_tables(few_de, excel="excel/few_samples_table.xlsx")
## Error in combine_de_tables(few_de, excel = "excel/few_samples_table.xlsx"): object 'few_de' not found
few_sig <- extract_significant_genes(few_table,
excel="excel/few_samples_sig.xlsx")
## Error in extract_significant_genes(few_table, excel = "excel/few_samples_sig.xlsx"): object 'few_table' not found
mtb_lengths <- mtb_annot[, c("seqnames", "width")]
mtb_lengths[["seqnames"]] <- rownames(mtb_lengths)
colnames(mtb_lengths) <- c("ID", "length")
up_genes <- few_sig[["deseq"]][["ups"]][[1]]
## Error in eval(expr, envir, enclos): object 'few_sig' not found
up_go <- simple_goseq(sig_genes=up_genes, go_db=mtb_go, length_db=mtb_lengths,
excel="excel/up_goseq.xlsx")
## Error in simple_goseq(sig_genes = up_genes, go_db = mtb_go, length_db = mtb_lengths, : object 'up_genes' not found
down_genes <- few_sig[["deseq"]][["downs"]][[1]]
## Error in eval(expr, envir, enclos): object 'few_sig' not found
down <- rownames(down_genes)
## Error in rownames(down_genes): object 'down_genes' not found
down_go <- simple_goseq(sig_genes=down, go_db=mtb_go, length_db=mtb_lengths)
## Error in simple_goseq(sig_genes = down, go_db = mtb_go, length_db = mtb_lengths): object 'down' not found
few_write[["norm_pca"]]
## Error in eval(expr, envir, enclos): object 'few_write' not found
few_table[["plots"]][[1]][["deseq_ma_plots"]][["plot"]]
## Error in eval(expr, envir, enclos): object 'few_table' not found
up_go$pvalue_plots[[1]]
## Error in eval(expr, envir, enclos): object 'up_go' not found
down_go$pvalue_plots[[1]]
## Error in eval(expr, envir, enclos): object 'down_go' not found
In this context, exogenous just means samples which were not created here. E.g. samples I downloaded from SRA.
exo_annot <- mtb_annot
rownames(exo_annot) <- exo_annot[["db_xref"]]
exo_expt <- create_expt(metadata="sample_sheets/exo_samples.xlsx",
file_column="mtbfile",
gene_info=exo_annot)
## Reading the sample metadata.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## The sample definitions comprises: 19 rows(samples) and 12 columns(metadata fields).
## Reading count tables.
## Reading count files with read.table().
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/mycobacterium_tuberculosis_2020/preprocessing/SRR9214125/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows.
## preprocessing/SRR9214126/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214127/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214128/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214129/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214130/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214131/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214132/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214133/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214134/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214135/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214136/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214137/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214138/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214139/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214140/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214141/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214142/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## preprocessing/SRR9214143/outputs/hisat2_mtuberculosis_h37rv/r1.count.xz contains 4013 rows and merges to 4013 rows.
## Finished reading count data.
## Matched 4008 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 4008 rows and 19 columns.
The following blocks will plot and print a few common metrics of the new data.
exo_plots <- sm(graph_metrics(exo_expt))
exo_norm <- sm(normalize_expt(exo_expt, transform="log2", norm="quant", filter=TRUE))
exon_plots <- sm(graph_metrics(exo_norm))
exo_plots$libsize
exo_plots$density
tn <- normalize_expt(exo_expt, transform="log2")
## This function will replace the expt$expressionset slot with:
## log2(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 1020 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
tnp <- plot_density(tn)
tmp_ggstats <- ggstatsplot::ggbetweenstats(
data=tnp$table, x=sample, y=counts,
notch=TRUE, mean.ci=TRUE, k=3,
pairwise.comparisons=FALSE)
## Registered S3 method overwritten by 'broom.mixed':
## method from
## tidy.gamlss broom
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
## Registered S3 method overwritten by 'DescTools':
## method from
## reorder.factor gdata
## Warning: Number of labels is greater than default palette color count.
## Try using another color `palette` (and/or `package`).
##
tmp_ggstats
tmp_ggstats <- ggstatsplot::grouped_ggbetweenstats(
grouping.var=condition,
data=tnp$table, x=sample, y=counts,
notch=TRUE, mean.ci=TRUE, k=3,
pairwise.comparisons=FALSE)
tmp_ggstats
## Quick PCA
exon_pc_expt <- normalize_expt(exo_expt, transform="log2", filter=TRUE, convert="cpm",
norm="quant", batch="svaseq")
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(quant(cbcb(data)))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Warning in normalize_expt(exo_expt, transform = "log2", filter = TRUE, convert =
## "cpm", : Quantile normalization and sva do not always play well together.
## Step 1: performing count filter with option: cbcb
## Removing 20 low-count genes (3988 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 11 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 75205 entries are x>1: 99%.
## batch_counts: Before batch/surrogate estimation, 11 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 556 entries are 0<x<1: 1%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 13 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
pp(file="images/exo_pc.png", image=plot_pca(exon_pc_expt)$plot)
## Writing the image to: images/exo_pc.png and calling dev.off().
pander::pander(sessionInfo())
R version 4.0.0 (2020-04-24)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_US.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_US.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_US.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.UTF-8 and LC_IDENTIFICATION=C
attached base packages: parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: ruv(v.0.9.7.1), hpgltools(v.1.0), testthat(v.2.3.2), Biobase(v.2.48.0) and BiocGenerics(v.0.34.0)
loaded via a namespace (and not attached): corpcor(v.1.6.9), ps(v.1.3.3), Rsamtools(v.2.4.0), lmtest(v.0.9-37), V8(v.3.2.0), foreach(v.1.5.0), rprojroot(v.1.3-2), crayon(v.1.3.4), MASS(v.7.3-51.6), PMCMRplus(v.1.4.4), nlme(v.3.1-148), backports(v.1.1.8), metafor(v.2.4-0), ggcorrplot(v.0.1.3), sva(v.3.36.0), GOSemSim(v.2.14.0), rlang(v.0.4.7), readxl(v.1.3.1), XVector(v.0.28.0), performance(v.0.4.7), nloptr(v.1.2.2.2), callr(v.3.4.3), limma(v.3.44.3), BiocParallel(v.1.22.0), bit64(v.0.9-7.1), loo(v.2.3.1), glue(v.1.4.1), pbkrtest(v.0.4-8.6), rstan(v.2.21.1), processx(v.3.4.3), AnnotationDbi(v.1.50.1), ggstatsplot(v.0.5.0), DOSE(v.3.14.0), haven(v.2.3.1), tidyselect(v.1.1.0), SummarizedExperiment(v.1.18.2), usethis(v.1.6.1), rio(v.0.5.16), variancePartition(v.1.18.2), XML(v.3.99-0.4), tidyr(v.1.1.0), zoo(v.1.8-8), SuppDists(v.1.1-9.5), GenomicAlignments(v.1.24.0), mc2d(v.0.1-18), xtable(v.1.8-4), MatrixModels(v.0.4-1), magrittr(v.1.5), evaluate(v.0.14), ggplot2(v.3.3.2), cli(v.2.0.2), zlibbioc(v.1.34.0), rstudioapi(v.0.11), miniUI(v.0.1.1.1), fastmatch(v.1.1-0), shiny(v.1.5.0), xfun(v.0.15), askpass(v.1.1), parameters(v.0.8.0), groupedstats(v.1.0.1), inline(v.0.3.15), pkgbuild(v.1.1.0), bridgesampling(v.1.0-0), caTools(v.1.18.0), tidygraph(v.1.2.0), WRS2(v.1.1-0), expm(v.0.999-4), tibble(v.3.0.3), Brobdingnag(v.1.2-6), ggrepel(v.0.8.2), Biostrings(v.2.56.0), reshape(v.0.8.8), rcompanion(v.2.3.25), ez(v.4.4-0), zeallot(v.0.1.0), withr(v.2.2.0), bitops(v.1.0-6), ggforce(v.0.3.2), cellranger(v.1.1.0), plyr(v.1.8.6), coda(v.0.19-3), RcppParallel(v.5.0.2), pillar(v.1.4.6), gplots(v.3.0.4), GenomicFeatures(v.1.40.1), multcomp(v.1.4-13), Rmpfr(v.0.8-1), fs(v.1.4.2), europepmc(v.0.4), paletteer(v.1.2.0), clusterProfiler(v.3.16.0), vctrs(v.0.3.2), ellipsis(v.0.3.1), generics(v.0.0.2), nortest(v.1.0-4), urltools(v.1.7.3), devtools(v.2.3.0), tools(v.4.0.0), foreign(v.0.8-80), munsell(v.0.5.0), tweenr(v.1.0.1), fgsea(v.1.14.0), DelayedArray(v.0.14.1), abind(v.1.4-5), fastmap(v.1.0.1), compiler(v.4.0.0), pkgload(v.1.1.0), httpuv(v.1.5.4), rtracklayer(v.1.48.0), sessioninfo(v.1.1.1), DescTools(v.0.99.37), plotly(v.4.9.2.1), ggExtra(v.0.9), GenomeInfoDbData(v.1.2.3), gridExtra(v.2.3), edgeR(v.3.30.3), lattice(v.0.20-41), later(v.1.1.0.1), dplyr(v.1.0.0), prismatic(v.0.2.0), BiocFileCache(v.1.12.0), jsonlite(v.1.7.0), scales(v.1.1.1), carData(v.3.0-4), pbapply(v.1.4-2), genefilter(v.1.70.0), lazyeval(v.0.2.2), promises(v.1.1.1), car(v.3.0-8), BWStest(v.0.2.2), tidyBF(v.0.2.1), doParallel(v.1.0.15), metaBMA(v.0.6.3), effectsize(v.0.3.1), pairwiseComparisons(v.1.1.2), sandwich(v.2.5-1), rmarkdown(v.2.3), openxlsx(v.4.1.5), cowplot(v.1.0.0), statmod(v.1.4.34), Rtsne(v.0.15), ipmisc(v.3.1.0), forcats(v.0.5.0), pander(v.0.6.3), downloader(v.0.4), selectr(v.0.4-2), logspline(v.2.1.16), igraph(v.1.2.5), numDeriv(v.2016.8-1.1), survival(v.3.2-3), yaml(v.2.2.1), metaplus(v.0.7-11), rstantools(v.2.1.1), htmltools(v.0.5.0), memoise(v.1.1.0), fastGHQuad(v.1.0), modeltools(v.0.2-23), locfit(v.1.5-9.4), graphlayouts(v.0.7.0), IRanges(v.2.22.2), quadprog(v.1.5-8), dunn.test(v.1.3.5), viridisLite(v.0.3.0), gmp(v.0.6-0), digest(v.0.6.25), assertthat(v.0.2.1), mime(v.0.9), rappdirs(v.0.3.1), repr(v.1.1.0), bayestestR(v.0.7.0), RSQLite(v.2.2.0), Exact(v.2.0), LaplacesDemon(v.16.1.4), remotes(v.2.1.1), data.table(v.1.12.8), blob(v.1.2.1), S4Vectors(v.0.26.1), preprocessCore(v.1.50.0), splines(v.4.0.0), labeling(v.0.3), rematch2(v.2.1.2), RCurl(v.1.98-1.2), broom(v.0.7.0), hms(v.0.5.3), colorspace(v.1.4-1), base64enc(v.0.1-3), BiocManager(v.1.30.10), GenomicRanges(v.1.40.0), libcoin(v.1.0-5), broom.mixed(v.0.2.6), coin(v.1.3-1), Rcpp(v.1.0.5), mvtnorm(v.1.1-1), enrichplot(v.1.8.1), multcompView(v.0.1-8), fansi(v.0.4.1), R6(v.2.4.1), grid(v.4.0.0), ggridges(v.0.5.2), lifecycle(v.0.2.0), EMT(v.1.1), statsExpressions(v.0.4.2), StanHeaders(v.2.21.0-5), zip(v.2.0.4), BayesFactor(v.0.9.12-4.2), curl(v.4.3), ggsignif(v.0.6.0), minqa(v.1.2.4), gdata(v.2.18.0), broomExtra(v.4.0.3), fastcluster(v.1.1.25), DO.db(v.2.9), PROPER(v.1.20.0), Matrix(v.1.2-18), skimr(v.2.1.2), qvalue(v.2.20.0), TH.data(v.1.0-10), desc(v.1.2.0), RColorBrewer(v.1.1-2), iterators(v.1.0.12), TMB(v.1.7.16), stringr(v.1.4.0), directlabels(v.2020.6.17), htmlwidgets(v.1.5.1), polyclip(v.1.10-0), triebeard(v.0.3.0), biomaRt(v.2.44.1), purrr(v.0.3.4), crosstalk(v.1.1.0.1), gridGraphics(v.0.5-0), rvest(v.0.3.5), mgcv(v.1.8-31), openssl(v.1.4.2), insight(v.0.8.5), bdsmatrix(v.1.3-4), codetools(v.0.2-16), matrixStats(v.0.56.0), GO.db(v.3.11.4), gtools(v.3.8.2), prettyunits(v.1.1.1), dbplyr(v.1.4.4), GenomeInfoDb(v.1.24.2), correlation(v.0.3.0), gtable(v.0.3.0), DBI(v.1.1.0), stats4(v.4.0.0), httr(v.1.4.1), highr(v.0.8), KernSmooth(v.2.23-17), stringi(v.1.4.6), kSamples(v.1.2-9), progress(v.1.2.2), reshape2(v.1.4.4), farver(v.2.0.3), annotate(v.1.66.0), viridis(v.0.5.1), xml2(v.1.3.2), bbmle(v.1.0.23.1), colorRamps(v.2.3), rvcheck(v.0.1.8), boot(v.1.3-25), lme4(v.1.1-23), readr(v.1.3.1), ggplotify(v.0.0.5), bit(v.1.1-15.2), scatterpie(v.0.1.4), ggraph(v.2.0.3), pkgconfig(v.2.0.3) and knitr(v.1.29)
message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset b8af113ae7a9b8582c5d1a9e23febb5a2b2adb58
## This is hpgltools commit: Sun Jul 19 17:08:52 2020 -0400: b8af113ae7a9b8582c5d1a9e23febb5a2b2adb58
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to 01_mtb_analyses_$20200716-v20200716.rda.xz
tmp <- sm(saveme(filename=this_save))