1 Annotation version: 20180718

The following section loads the microbesonline and genbank annotations for Pseudomonas aeruginosa.

## Looks like it is taxon ID 208963
id <- 208963
paeruginosa_annotations <- load_microbesonline_annotations(208963)
## The species being downloaded is: Pseudomonas aeruginosa UCBPP-PA14
knitr::kable(head(paeruginosa_annotations))
locusId accession GI scaffoldId start stop strand sysName name desc COG COGFun COGDesc TIGRFam TIGRRoles GO EC ECDesc
2194572 YP_788156.1 116053721 4582 483 2027 + PA14_00010 dnaA chromosomal replication initiator protein DnaA (NCBI) COG593 L ATPase involved in DNA replication initiation TIGR00362 chromosomal replication initiator protein DnaA [dnaA] DNA metabolism:DNA replication, recombination, and repair GO:0006270,GO:0006275,GO:0003688,GO:0017111,GO:0005524
2194573 YP_788157.1 116053722 4582 2056 3159 + PA14_00020 dnaN DNA polymerase III, beta chain (NCBI) COG592 L DNA polymerase sliding clamp subunit (PCNA homolog) TIGR00663 DNA polymerase III, beta subunit [dnaN] DNA metabolism:DNA replication, recombination, and repair GO:0006260,GO:0003677,GO:0003893,GO:0008408,GO:0016449,GO:0019984,GO:0003889,GO:0003894,GO:0015999,GO:0016450,GO:0003890,GO:0003895,GO:0016000,GO:0016451,GO:0003891,GO:0016448,GO:0016452 2.7.7.7 DNA-directed DNA polymerase.
2194574 YP_788158.1 116053723 4582 3169 4278 + PA14_00030 recF DNA replication and repair protein RecF (NCBI) COG1195 L Recombinational DNA repair ATPase (RecF pathway) TIGR00611 DNA replication and repair protein RecF [recF] DNA metabolism:DNA replication, recombination, and repair GO:0006281,GO:0005694,GO:0005524,GO:0017111,GO:0003697
2194575 YP_788159.1 116053724 4582 4275 6695 + PA14_00050 gyrB DNA gyrase subunit B (NCBI) COG187 L Type IIA topoisomerase (DNA gyrase/topo II, topoisomerase IV), B subunit TIGR01059 DNA gyrase, B subunit [gyrB] DNA metabolism:DNA replication, recombination, and repair GO:0006304,GO:0006265,GO:0005694,GO:0003918,GO:0005524 5.99.1.3 DNA topoisomerase (ATP-hydrolyzing).
2194576 YP_788160.1 116053725 4582 7791 7018 - PA14_00060 PA14_00060 putative acyltransferase (NCBI) COG204 I 1-acyl-sn-glycerol-3-phosphate acyltransferase GO:0008152,GO:0003841 2.3.1.51 1-acylglycerol-3-phosphate O-acyltransferase.
2194577 YP_788161.1 116053726 4582 8339 7803 - PA14_00070 PA14_00070 putative histidinol-phosphatase (NCBI) COG241 E Histidinol phosphatase and related phosphatases TIGR01656 histidinol-phosphate phosphatase domain,TIGR01662 HAD hydrolase, family IIIA Unknown function:Enzymes of unknown specificity GO:0000105,GO:0004401 3.1.3.-
## While waiting for that to download, maybe grab a genbank genome for it, too.
## Looks like the genome is accession NC_002516.2, downloading it to reference/
## crap in a hat, that is strain pa01, dirty trick; NC_008463.1 is pa14 I think.
write.csv(x=paeruginosa_annotations, file="paeruginosa_microbes_annotations.csv")
## oh yeah I wrote something which should download and parse genbank files...
pa <- new.env()
test <- try(load("pa14.rda", envir=pa))
pa14 <- NULL
if (class(test) == "try-error") {
    pa14 <- load_genbank_annotations(accession="NC_008463.1")
    ## save a copy of this data structure to avoid having to redownload it
    save(list=c("pa14"), file="pa14.rda")
} else {
    pa14 <- pa$pa14
    rm(pa)
}

pa14_annot <- paeruginosa_annotations[, c("sysName", "name", "desc", "COGDesc")]
pa_names <- grepl(pattern="^PA14_", x=pa14_annot$name)
pa14_annot[pa_names, "name"] <- ""
write.csv(x=pa14_annot, file="pa14_annotations.csv")

1.1 gff annotations

Unfortunately, it appears that IDs from the Pseudomonas gff do not match what I get from microbesonline. So let us get them from the gff file and see if I can xref them.

gff_annot <- load_gff_annotations(gff="reference/paeruginosa_pa14.gff")
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Returning a df with 16 columns and 11946 rows.
rownames(gff_annot) <- make.names(gff_annot$Parent, unique=TRUE)
gff_annot$Alias <- as.character(gff_annot$Alias)
all_annot <- merge(gff_annot, pa14_annot, by.x="Alias", by.y="sysName", all.y=TRUE)
rownames(all_annot) <- make.names(all_annot$ID, unique=TRUE)
pander::pander(sessionInfo())

R version 3.5.1 (2018-07-02)

Platform: x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C

attached base packages: stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: hpgltools(v.2018.03)

loaded via a namespace (and not attached): Biobase(v.2.40.0), httr(v.1.3.1), bit64(v.0.9-7), foreach(v.1.4.4), assertthat(v.0.2.0), highr(v.0.7), stats4(v.3.5.1), pander(v.0.6.2), selectr(v.0.4-1), blob(v.1.1.1), GenomeInfoDbData(v.1.1.0), Rsamtools(v.1.32.2), yaml(v.2.1.19), progress(v.1.2.0), lattice(v.0.20-35), pillar(v.1.3.0), RSQLite(v.2.1.1), backports(v.1.1.2), glue(v.1.3.0), digest(v.0.6.15), GenomicRanges(v.1.32.4), XVector(v.0.20.0), rvest(v.0.3.2), colorspace(v.1.3-2), htmltools(v.0.3.6), Matrix(v.1.2-14), plyr(v.1.8.4), XML(v.3.98-1.12), pkgconfig(v.2.0.1), devtools(v.1.13.6), biomaRt(v.2.36.1), zlibbioc(v.1.26.0), purrr(v.0.2.5), scales(v.0.5.0), BiocParallel(v.1.14.2), tibble(v.1.4.2), IRanges(v.2.14.10), ggplot2(v.3.0.0), withr(v.2.1.2), SummarizedExperiment(v.1.10.1), GenomicFeatures(v.1.32.0), BiocGenerics(v.0.26.0), lazyeval(v.0.2.1), magrittr(v.1.5), crayon(v.1.3.4), memoise(v.1.1.0), evaluate(v.0.11), xml2(v.1.2.0), tools(v.3.5.1), data.table(v.1.11.4), prettyunits(v.1.0.2), hms(v.0.4.2), matrixStats(v.0.53.1), stringr(v.1.3.1), S4Vectors(v.0.18.3), munsell(v.0.5.0), DelayedArray(v.0.6.1), AnnotationDbi(v.1.42.1), bindrcpp(v.0.2.2), Biostrings(v.2.48.0), compiler(v.3.5.1), GenomeInfoDb(v.1.16.0), rlang(v.0.2.1), grid(v.3.5.1), RCurl(v.1.95-4.11), iterators(v.1.0.10), bitops(v.1.0-6), base64enc(v.0.1-3), rmarkdown(v.1.10), gtable(v.0.2.0), codetools(v.0.2-15), DBI(v.1.0.0), roxygen2(v.6.0.1), curl(v.3.2), R6(v.2.2.2), GenomicAlignments(v.1.16.0), knitr(v.1.20), dplyr(v.0.7.6), rtracklayer(v.1.40.3), bit(v.1.1-14), bindr(v.0.1.1), commonmark(v.1.5), rprojroot(v.1.3-2), stringi(v.1.2.3), parallel(v.3.5.1), Rcpp(v.0.12.17) and tidyselect(v.0.2.4)

message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset c730ef178f8e57bbf3819e21cf5e6cfe879e6328
## R> packrat::restore()
## This is hpgltools commit: Fri Jul 13 17:21:39 2018 -0400: c730ef178f8e57bbf3819e21cf5e6cfe879e6328
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to 01_annotation_20180718-v20180718.rda.xz
tmp <- sm(saveme(filename=this_save))
LS0tCnRpdGxlOiAiMjAxNzA3MTggUC5hZXJ1Z2lub3NhOiBhbm5vdGF0aW9uIGRhdGEiCmF1dGhvcjogImF0YiBhYmVsZXdAZ21haWwuY29tIgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKIGh0bWxfZG9jdW1lbnQ6CiAgY29kZV9kb3dubG9hZDogdHJ1ZQogIGNvZGVfZm9sZGluZzogc2hvdwogIGZpZ19jYXB0aW9uOiB0cnVlCiAgZmlnX2hlaWdodDogNwogIGZpZ193aWR0aDogNwogIGhpZ2hsaWdodDogZGVmYXVsdAogIGtlZXBfbWQ6IGZhbHNlCiAgbW9kZTogc2VsZmNvbnRhaW5lZAogIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogIHNlbGZfY29udGFpbmVkOiB0cnVlCiAgdGhlbWU6IHJlYWRhYmxlCiAgdG9jOiB0cnVlCiAgdG9jX2Zsb2F0OgogICAgY29sbGFwc2VkOiBmYWxzZQogICAgc21vb3RoX3Njcm9sbDogZmFsc2UKLS0tCgo8c3R5bGU+CiAgYm9keSAubWFpbi1jb250YWluZXIgewogICAgbWF4LXdpZHRoOiAxNjAwcHg7CiAgfQo8L3N0eWxlPgoKYGBge3Igb3B0aW9ucywgaW5jbHVkZT1GQUxTRX0KbGlicmFyeShocGdsdG9vbHMpCnR0IDwtIGRldnRvb2xzOjpsb2FkX2FsbCgifi9ocGdsdG9vbHMiKQprbml0cjo6b3B0c19rbml0JHNldCgKICAgICAgICAgICAgICAgICAgIHByb2dyZXNzPVRSVUUsCiAgICAgICAgICAgICAgICAgICB2ZXJib3NlPVRSVUUsCiAgICAgICAgICAgICAgICAgICB3aWR0aD05MCwKICAgICAgICAgICAgICAgICAgIGVjaG89VFJVRSkKa25pdHI6Om9wdHNfY2h1bmskc2V0KAogICAgICAgICAgICAgICAgICAgIGVycm9yPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgZmlnLndpZHRoPTgsCiAgICAgICAgICAgICAgICAgICAgZmlnLmhlaWdodD04LAogICAgICAgICAgICAgICAgICAgIGRwaT05NikKb2xkX29wdGlvbnMgPC0gb3B0aW9ucygKICBkaWdpdHM9NCwKICBzdHJpbmdzQXNGYWN0b3JzPUZBTFNFLAogIGtuaXRyLmR1cGxpY2F0ZS5sYWJlbD0iYWxsb3ciKQpnZ3Bsb3QyOjp0aGVtZV9zZXQoZ2dwbG90Mjo6dGhlbWVfYncoYmFzZV9zaXplPTEwKSkKdmVyIDwtICIyMDE4MDcxOCIKcHJldmlvdXNfZmlsZSA8LSAiaW5kZXguUm1kIgoKdG1wIDwtIHRyeShzbShsb2FkbWUoZmlsZW5hbWU9cGFzdGUwKGdzdWIocGF0dGVybj0iXFwuUm1kIiwgcmVwbGFjZT0iIiwgeD1wcmV2aW91c19maWxlKSwgIi12IiwgdmVyLCAiLnJkYS54eiIpKSkpCnJtZF9maWxlIDwtICIwMV9hbm5vdGF0aW9uXzIwMTgwNzE4LlJtZCIKYGBgCgojIEFubm90YXRpb24gdmVyc2lvbjogYHIgdmVyYAoKVGhlIGZvbGxvd2luZyBzZWN0aW9uIGxvYWRzIHRoZSBtaWNyb2Jlc29ubGluZSBhbmQgZ2VuYmFuayBhbm5vdGF0aW9ucyBmb3IgUHNldWRvbW9uYXMgYWVydWdpbm9zYS4KCmBgYHtyIGFubm90YXRpb259CiMjIExvb2tzIGxpa2UgaXQgaXMgdGF4b24gSUQgMjA4OTYzCmlkIDwtIDIwODk2MwpwYWVydWdpbm9zYV9hbm5vdGF0aW9ucyA8LSBsb2FkX21pY3JvYmVzb25saW5lX2Fubm90YXRpb25zKDIwODk2MykKa25pdHI6OmthYmxlKGhlYWQocGFlcnVnaW5vc2FfYW5ub3RhdGlvbnMpKQojIyBXaGlsZSB3YWl0aW5nIGZvciB0aGF0IHRvIGRvd25sb2FkLCBtYXliZSBncmFiIGEgZ2VuYmFuayBnZW5vbWUgZm9yIGl0LCB0b28uCiMjIExvb2tzIGxpa2UgdGhlIGdlbm9tZSBpcyBhY2Nlc3Npb24gTkNfMDAyNTE2LjIsIGRvd25sb2FkaW5nIGl0IHRvIHJlZmVyZW5jZS8KIyMgY3JhcCBpbiBhIGhhdCwgdGhhdCBpcyBzdHJhaW4gcGEwMSwgZGlydHkgdHJpY2s7IE5DXzAwODQ2My4xIGlzIHBhMTQgSSB0aGluay4Kd3JpdGUuY3N2KHg9cGFlcnVnaW5vc2FfYW5ub3RhdGlvbnMsIGZpbGU9InBhZXJ1Z2lub3NhX21pY3JvYmVzX2Fubm90YXRpb25zLmNzdiIpCiMjIG9oIHllYWggSSB3cm90ZSBzb21ldGhpbmcgd2hpY2ggc2hvdWxkIGRvd25sb2FkIGFuZCBwYXJzZSBnZW5iYW5rIGZpbGVzLi4uCnBhIDwtIG5ldy5lbnYoKQp0ZXN0IDwtIHRyeShsb2FkKCJwYTE0LnJkYSIsIGVudmlyPXBhKSkKcGExNCA8LSBOVUxMCmlmIChjbGFzcyh0ZXN0KSA9PSAidHJ5LWVycm9yIikgewogICAgcGExNCA8LSBsb2FkX2dlbmJhbmtfYW5ub3RhdGlvbnMoYWNjZXNzaW9uPSJOQ18wMDg0NjMuMSIpCiAgICAjIyBzYXZlIGEgY29weSBvZiB0aGlzIGRhdGEgc3RydWN0dXJlIHRvIGF2b2lkIGhhdmluZyB0byByZWRvd25sb2FkIGl0CiAgICBzYXZlKGxpc3Q9YygicGExNCIpLCBmaWxlPSJwYTE0LnJkYSIpCn0gZWxzZSB7CiAgICBwYTE0IDwtIHBhJHBhMTQKICAgIHJtKHBhKQp9CgpwYTE0X2Fubm90IDwtIHBhZXJ1Z2lub3NhX2Fubm90YXRpb25zWywgYygic3lzTmFtZSIsICJuYW1lIiwgImRlc2MiLCAiQ09HRGVzYyIpXQpwYV9uYW1lcyA8LSBncmVwbChwYXR0ZXJuPSJeUEExNF8iLCB4PXBhMTRfYW5ub3QkbmFtZSkKcGExNF9hbm5vdFtwYV9uYW1lcywgIm5hbWUiXSA8LSAiIgp3cml0ZS5jc3YoeD1wYTE0X2Fubm90LCBmaWxlPSJwYTE0X2Fubm90YXRpb25zLmNzdiIpCmBgYAoKIyMgZ2ZmIGFubm90YXRpb25zCgpVbmZvcnR1bmF0ZWx5LCBpdCBhcHBlYXJzIHRoYXQgSURzIGZyb20gdGhlIFBzZXVkb21vbmFzIGdmZiBkbyBub3QgbWF0Y2ggd2hhdCBJCmdldCBmcm9tIG1pY3JvYmVzb25saW5lLiAgU28gbGV0IHVzIGdldCB0aGVtIGZyb20gdGhlIGdmZiBmaWxlIGFuZCBzZWUgaWYgSSBjYW4KeHJlZiB0aGVtLgoKYGBge3IgZ2ZmX2Fubm90YXRpb25zfQpnZmZfYW5ub3QgPC0gbG9hZF9nZmZfYW5ub3RhdGlvbnMoZ2ZmPSJyZWZlcmVuY2UvcGFlcnVnaW5vc2FfcGExNC5nZmYiKQpyb3duYW1lcyhnZmZfYW5ub3QpIDwtIG1ha2UubmFtZXMoZ2ZmX2Fubm90JFBhcmVudCwgdW5pcXVlPVRSVUUpCmdmZl9hbm5vdCRBbGlhcyA8LSBhcy5jaGFyYWN0ZXIoZ2ZmX2Fubm90JEFsaWFzKQpgYGAKCmBgYHtyIG1lcmdlZF9hbm5vdGF0aW9uc30KYWxsX2Fubm90IDwtIG1lcmdlKGdmZl9hbm5vdCwgcGExNF9hbm5vdCwgYnkueD0iQWxpYXMiLCBieS55PSJzeXNOYW1lIiwgYWxsLnk9VFJVRSkKcm93bmFtZXMoYWxsX2Fubm90KSA8LSBtYWtlLm5hbWVzKGFsbF9hbm5vdCRJRCwgdW5pcXVlPVRSVUUpCmBgYAoKYGBge3Igc2F2ZW1lfQpwYW5kZXI6OnBhbmRlcihzZXNzaW9uSW5mbygpKQptZXNzYWdlKHBhc3RlMCgiVGhpcyBpcyBocGdsdG9vbHMgY29tbWl0OiAiLCBnZXRfZ2l0X2NvbW1pdCgpKSkKdGhpc19zYXZlIDwtIHBhc3RlMChnc3ViKHBhdHRlcm49IlxcLlJtZCIsIHJlcGxhY2U9IiIsIHg9cm1kX2ZpbGUpLCAiLXYiLCB2ZXIsICIucmRhLnh6IikKbWVzc2FnZShwYXN0ZTAoIlNhdmluZyB0byAiLCB0aGlzX3NhdmUpKQp0bXAgPC0gc20oc2F2ZW1lKGZpbGVuYW1lPXRoaXNfc2F2ZSkpCmBgYAo=