1 Annotation version: 20180215

1.1 Genome annotation input

1.1.1 Read a gff file

In contrast, it is possible to load most annotations of interest directly from the gff files used in the alignments. More in-depth information for the human transcriptome may be extracted from biomart.

## The old way of getting genome/annotation data
mtb_gff <- "reference/mycobacterium_tuberculosis_h37rv_2.gff.gz"

mtb_genome <- "reference/mtuberculosis_h37rv_genbank.fasta"
mtb_cds <- "reference/mtb_cds.fasta"

mtb_annotations <- sm(load_gff_annotations(mtb_gff, type="gene"))
rownames(mtb_annotations) <- mtb_annotations[["ID"]]

1.1.2 Download from microbesonline

## First figure out the ID for the Mtb genome:
ids <- get_microbesonline_ids("37")
head(ids)
## Mycobacterium tuberculosis H37Rv is the first entry and has id: 83332
mtb_microbes <- load_microbesonline_annotations(ids=83332)
## I made a nifty function to do this stuff: load_uniprotws_annotations().
## It is slow, though.
mtb_uniprot_annot <- load_uniprotws_annotations()

1.2 Getting ontology data

mtb_go <- load_microbesonline_go(id=83332)
if (!isTRUE(get0("skip_load"))) {
  pander::pander(sessionInfo())
  message(paste0("This is hpgltools commit: ", get_git_commit()))
  this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
  message(paste0("Saving to ", this_save))
  tmp <- sm(saveme(filename=this_save))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 7de4503f6bb5724c28cce24af5dbee22bb1c0cae
## R> packrat::restore()
## This is hpgltools commit: Thu Apr 12 22:08:53 2018 -0400: 7de4503f6bb5724c28cce24af5dbee22bb1c0cae
## Saving to 01_annotation-v20180215.rda.xz
LS0tCnRpdGxlOiAiTS50dWJlcmN1bG9zaXMgMjAxOCBwcm90ZW9taWNzOiBDb2xsZWN0aW5nIGFubm90YXRpb24gZGF0YS4iCmF1dGhvcjogImF0YiBhYmVsZXdAZ21haWwuY29tIgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKIGh0bWxfZG9jdW1lbnQ6CiAgY29kZV9kb3dubG9hZDogdHJ1ZQogIGNvZGVfZm9sZGluZzogc2hvdwogIGZpZ19jYXB0aW9uOiB0cnVlCiAgZmlnX2hlaWdodDogNwogIGZpZ193aWR0aDogNwogIGhpZ2hsaWdodDogZGVmYXVsdAogIGtlZXBfbWQ6IGZhbHNlCiAgbW9kZTogc2VsZmNvbnRhaW5lZAogIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogIHNlbGZfY29udGFpbmVkOiB0cnVlCiAgdGhlbWU6IHJlYWRhYmxlCiAgdG9jOiB0cnVlCiAgdG9jX2Zsb2F0OgogICAgY29sbGFwc2VkOiBmYWxzZQogICAgc21vb3RoX3Njcm9sbDogZmFsc2UKLS0tCgo8c3R5bGU+CiAgYm9keSAubWFpbi1jb250YWluZXIgewogICAgbWF4LXdpZHRoOiAxNjAwcHg7CiAgfQo8L3N0eWxlPgoKYGBge3Igb3B0aW9ucywgaW5jbHVkZT1GQUxTRX0KaWYgKCFpc1RSVUUoZ2V0MCgic2tpcF9sb2FkIikpKSB7CiAgbGlicmFyeShocGdsdG9vbHMpCiAgdHQgPC0gZGV2dG9vbHM6OmxvYWRfYWxsKCJ+L2hwZ2x0b29scyIpCiAga25pdHI6Om9wdHNfa25pdCRzZXQocHJvZ3Jlc3M9VFJVRSwKICAgICAgICAgICAgICAgICAgICAgICB2ZXJib3NlPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgICAgd2lkdGg9OTAsCiAgICAgICAgICAgICAgICAgICAgICAgZWNobz1UUlVFKQogIGtuaXRyOjpvcHRzX2NodW5rJHNldChlcnJvcj1UUlVFLAogICAgICAgICAgICAgICAgICAgICAgICBmaWcud2lkdGg9OCwKICAgICAgICAgICAgICAgICAgICAgICAgZmlnLmhlaWdodD04LAogICAgICAgICAgICAgICAgICAgICAgICBkcGk9OTYpCiAgb2xkX29wdGlvbnMgPC0gb3B0aW9ucyhkaWdpdHM9NCwKICAgICAgICAgICAgICAgICAgICAgICAgIHN0cmluZ3NBc0ZhY3RvcnM9RkFMU0UsCiAgICAgICAgICAgICAgICAgICAgICAgICBrbml0ci5kdXBsaWNhdGUubGFiZWw9ImFsbG93IikKICBnZ3Bsb3QyOjp0aGVtZV9zZXQoZ2dwbG90Mjo6dGhlbWVfYncoYmFzZV9zaXplPTEwKSkKICB2ZXIgPC0gIjIwMTgwMjE1IgogIHByZXZpb3VzX2ZpbGUgPC0gImluZGV4LlJtZCIKCiAgdG1wIDwtIHRyeShzbShsb2FkbWUoZmlsZW5hbWU9cGFzdGUwKGdzdWIocGF0dGVybj0iXFwuUm1kIiwgcmVwbGFjZT0iIiwgeD1wcmV2aW91c19maWxlKSwgIi12IiwgdmVyLCAiLnJkYS54eiIpKSkpCiAgcm1kX2ZpbGUgPC0gIjAxX2Fubm90YXRpb24uUm1kIgp9CmBgYAoKIyBBbm5vdGF0aW9uIHZlcnNpb246IGByIHZlcmAKCiMjIEdlbm9tZSBhbm5vdGF0aW9uIGlucHV0CgojIyMgUmVhZCBhIGdmZiBmaWxlCgpJbiBjb250cmFzdCwgaXQgaXMgcG9zc2libGUgdG8gbG9hZCBtb3N0IGFubm90YXRpb25zIG9mIGludGVyZXN0IGRpcmVjdGx5IGZyb20gdGhlIGdmZiBmaWxlcyB1c2VkIGluCnRoZSBhbGlnbm1lbnRzLiAgTW9yZSBpbi1kZXB0aCBpbmZvcm1hdGlvbiBmb3IgdGhlIGh1bWFuIHRyYW5zY3JpcHRvbWUgbWF5IGJlIGV4dHJhY3RlZCBmcm9tIGJpb21hcnQuCgpgYGB7ciBnZW5vbWVfaW5wdXQsIGNhY2hlPVRSVUV9CiMjIFRoZSBvbGQgd2F5IG9mIGdldHRpbmcgZ2Vub21lL2Fubm90YXRpb24gZGF0YQptdGJfZ2ZmIDwtICJyZWZlcmVuY2UvbXljb2JhY3Rlcml1bV90dWJlcmN1bG9zaXNfaDM3cnZfMi5nZmYuZ3oiCgptdGJfZ2Vub21lIDwtICJyZWZlcmVuY2UvbXR1YmVyY3Vsb3Npc19oMzdydl9nZW5iYW5rLmZhc3RhIgptdGJfY2RzIDwtICJyZWZlcmVuY2UvbXRiX2Nkcy5mYXN0YSIKCm10Yl9hbm5vdGF0aW9ucyA8LSBzbShsb2FkX2dmZl9hbm5vdGF0aW9ucyhtdGJfZ2ZmLCB0eXBlPSJnZW5lIikpCnJvd25hbWVzKG10Yl9hbm5vdGF0aW9ucykgPC0gbXRiX2Fubm90YXRpb25zW1siSUQiXV0KYGBgCgojIyMgRG93bmxvYWQgZnJvbSBtaWNyb2Jlc29ubGluZQoKYGBge3IgbWljcm9iZXNvbmxpbmUsIGV2YWw9RkFMU0V9CiMjIEZpcnN0IGZpZ3VyZSBvdXQgdGhlIElEIGZvciB0aGUgTXRiIGdlbm9tZToKaWRzIDwtIGdldF9taWNyb2Jlc29ubGluZV9pZHMoIjM3IikKaGVhZChpZHMpCiMjIE15Y29iYWN0ZXJpdW0gdHViZXJjdWxvc2lzIEgzN1J2IGlzIHRoZSBmaXJzdCBlbnRyeSBhbmQgaGFzIGlkOiA4MzMzMgptdGJfbWljcm9iZXMgPC0gbG9hZF9taWNyb2Jlc29ubGluZV9hbm5vdGF0aW9ucyhpZHM9ODMzMzIpCmBgYAoKYGBge3IgZ2VuYmFuaywgZXZhbD1GQUxTRX0KIyMgSSBtYWRlIGEgbmlmdHkgZnVuY3Rpb24gdG8gZG8gdGhpcyBzdHVmZjogbG9hZF91bmlwcm90d3NfYW5ub3RhdGlvbnMoKS4KIyMgSXQgaXMgc2xvdywgdGhvdWdoLgptdGJfdW5pcHJvdF9hbm5vdCA8LSBsb2FkX3VuaXByb3R3c19hbm5vdGF0aW9ucygpCmBgYAoKIyMgR2V0dGluZyBvbnRvbG9neSBkYXRhCgpgYGB7ciBvbnRvbG9neSwgZXZhbD1GQUxTRX0KbXRiX2dvIDwtIGxvYWRfbWljcm9iZXNvbmxpbmVfZ28oaWQ9ODMzMzIpCmBgYAoKYGBge3Igc2F2ZW1lfQppZiAoIWlzVFJVRShnZXQwKCJza2lwX2xvYWQiKSkpIHsKICBwYW5kZXI6OnBhbmRlcihzZXNzaW9uSW5mbygpKQogIG1lc3NhZ2UocGFzdGUwKCJUaGlzIGlzIGhwZ2x0b29scyBjb21taXQ6ICIsIGdldF9naXRfY29tbWl0KCkpKQogIHRoaXNfc2F2ZSA8LSBwYXN0ZTAoZ3N1YihwYXR0ZXJuPSJcXC5SbWQiLCByZXBsYWNlPSIiLCB4PXJtZF9maWxlKSwgIi12IiwgdmVyLCAiLnJkYS54eiIpCiAgbWVzc2FnZShwYXN0ZTAoIlNhdmluZyB0byAiLCB0aGlzX3NhdmUpKQogIHRtcCA8LSBzbShzYXZlbWUoZmlsZW5hbWU9dGhpc19zYXZlKSkKfQpgYGAK