1 Annotation version: 20180501

1.1 Genome annotation with OrgDb/TxDb/OrganismDbi

The tritrypdb just released a new version. Let us make new annotation data from it.

## These functions take _forever_ the first time around.
lm_annot <- make_eupath_organismdbi(species="Leishmania major", reinstall=TRUE)

In order to load these new packages, I rather need to remember their names… Happily I have a function for that.

lm_name <- get_eupath_pkgnames("Leishmania major")
lm_name$organismdbi

For those packages I have generated/installed, use this to generate an annotation table. Oh, but I prefixed the column names with ‘annot_’ in order to make sure that nothing is duplicated with the GO tables, ortholog tables, etc. As a result, these are wrong until the new annotations are loaded.

## Just to save on typing
lm_db <- get0(lm_name$orgdb)
lm_db

wanted_fields <- c("annot_gene_location_text",
                   "annot_cds_length",
                   "annot_gene_name",
                   "annot_gene_product",
                   "annot_gene_type",
                   "annot_strand",
                   "annot_gene_entrez_id",
                   "annot_gene_orthomcl_name")
lm_annot <- load_orgdb_annotations(lm_db,
                                   keytype="gid",
                                   fields=wanted_fields)
lm_annot <- extract_gene_locations(lm_annot$genes)

1.2 Extract data from the gff files

lm_gff_file <- "reference/TriTrypDB-37_LmajorFriedlin.gff"
lm_gff_annotations <- sm(load_gff_annotations(lm_gff_file, type="exon"))
if (!isTRUE(get0("skip_load"))) {
  pander::pander(sessionInfo())
  message(paste0("This is hpgltools commit: ", get_git_commit()))
  message(paste0("Saving to ", savefile))
  tmp <- sm(saveme(filename=savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset cab8817432ec3a0a81e7922677332f023341b9dd
## R> packrat::restore()
## This is hpgltools commit: Thu Jul 12 11:17:53 2018 -0400: cab8817432ec3a0a81e7922677332f023341b9dd
## Saving to 01_annotation_v20180501.rda.xz
LS0tCnRpdGxlOiAiTGVpc2htYW5pYSBzdHJhaW5zIDIwMTgwNTogQ29sbGVjdGluZyBhbm5vdGF0aW9uIGluZm9ybWF0aW9uLiIKYXV0aG9yOiAiYXRiIGFiZWxld0BnbWFpbC5jb20iCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCIKb3V0cHV0OgogaHRtbF9kb2N1bWVudDoKICBjb2RlX2Rvd25sb2FkOiB0cnVlCiAgY29kZV9mb2xkaW5nOiBzaG93CiAgZmlnX2NhcHRpb246IHRydWUKICBmaWdfaGVpZ2h0OiA3CiAgZmlnX3dpZHRoOiA3CiAgaGlnaGxpZ2h0OiBkZWZhdWx0CiAga2VlcF9tZDogZmFsc2UKICBtb2RlOiBzZWxmY29udGFpbmVkCiAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgc2VsZl9jb250YWluZWQ6IHRydWUKICB0aGVtZTogcmVhZGFibGUKICB0b2M6IHRydWUKICB0b2NfZmxvYXQ6CiAgIGNvbGxhcHNlZDogZmFsc2UKICAgc21vb3RoX3Njcm9sbDogZmFsc2UKLS0tCgo8c3R5bGU+CiAgYm9keSAubWFpbi1jb250YWluZXIgewogICAgbWF4LXdpZHRoOiAxNjAwcHg7CiAgfQo8L3N0eWxlPgoKYGBge3Igb3B0aW9ucywgaW5jbHVkZT1GQUxTRX0KaWYgKCFpc1RSVUUoZ2V0MCgic2tpcF9sb2FkIikpKSB7CiAgbGlicmFyeShocGdsdG9vbHMpCiAgdHQgPC0gc20oZGV2dG9vbHM6OmxvYWRfYWxsKCJ+L2hwZ2x0b29scyIpKQogIGtuaXRyOjpvcHRzX2tuaXQkc2V0KHByb2dyZXNzPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgICAgdmVyYm9zZT1UUlVFLAogICAgICAgICAgICAgICAgICAgICAgIHdpZHRoPTkwLAogICAgICAgICAgICAgICAgICAgICAgIGVjaG89VFJVRSkKICBrbml0cjo6b3B0c19jaHVuayRzZXQoZXJyb3I9VFJVRSwKICAgICAgICAgICAgICAgICAgICAgICAgZmlnLndpZHRoPTgsCiAgICAgICAgICAgICAgICAgICAgICAgIGZpZy5oZWlnaHQ9OCwKICAgICAgICAgICAgICAgICAgICAgICAgZHBpPTk2KQogIG9sZF9vcHRpb25zIDwtIG9wdGlvbnMoZGlnaXRzPTQsCiAgICAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICAgICAga25pdHIuZHVwbGljYXRlLmxhYmVsPSJhbGxvdyIpCiAgZ2dwbG90Mjo6dGhlbWVfc2V0KGdncGxvdDI6OnRoZW1lX2J3KGJhc2Vfc2l6ZT0xMikpCiAgdmVyIDwtICIyMDE4MDUwMSIKICBwcmV2aW91c19maWxlIDwtIHBhc3RlMCgiaW5kZXhfdiIsIHZlciwgIi5SbWQiKQoKICB0bXAgPC0gdHJ5KHNtKGxvYWRtZShmaWxlbmFtZT1nc3ViKHBhdHRlcm49IlxcLlJtZCIsIHJlcGxhY2U9IlxcLnJkYVxcLnh6IiwgeD1wcmV2aW91c19maWxlKSkpKQogIHJtZF9maWxlIDwtIHBhc3RlMCgiMDFfYW5ub3RhdGlvbl92IiwgdmVyLCAiLlJtZCIpCiAgc2F2ZWZpbGUgPC0gZ3N1YihwYXR0ZXJuPSJcXC5SbWQiLCByZXBsYWNlPSJcXC5yZGFcXC54eiIsIHg9cm1kX2ZpbGUpCn0KYGBgCgojIEFubm90YXRpb24gdmVyc2lvbjogYHIgdmVyYAoKIyMgR2Vub21lIGFubm90YXRpb24gd2l0aCBPcmdEYi9UeERiL09yZ2FuaXNtRGJpCgpUaGUgdHJpdHJ5cGRiIGp1c3QgcmVsZWFzZWQgYSBuZXcgdmVyc2lvbi4gIExldCB1cyBtYWtlIG5ldyBhbm5vdGF0aW9uIGRhdGEgZnJvbSBpdC4KCmBgYHtyIGNyZWF0ZV9vcmdhbmlzbXMsIGV2YWw9RkFMU0V9CiMjIFRoZXNlIGZ1bmN0aW9ucyB0YWtlIF9mb3JldmVyXyB0aGUgZmlyc3QgdGltZSBhcm91bmQuCmxtX2Fubm90IDwtIG1ha2VfZXVwYXRoX29yZ2FuaXNtZGJpKHNwZWNpZXM9IkxlaXNobWFuaWEgbWFqb3IiLCByZWluc3RhbGw9VFJVRSkKYGBgCgpJbiBvcmRlciB0byBsb2FkIHRoZXNlIG5ldyBwYWNrYWdlcywgSSByYXRoZXIgbmVlZCB0byByZW1lbWJlciB0aGVpciBuYW1lcy4uLgpIYXBwaWx5IEkgaGF2ZSBhIGZ1bmN0aW9uIGZvciB0aGF0LgoKYGBge3IgbG9hZF9hbm5vdGF0aW9ucywgZXZhbD1GQUxTRX0KbG1fbmFtZSA8LSBnZXRfZXVwYXRoX3BrZ25hbWVzKCJMZWlzaG1hbmlhIG1ham9yIikKbG1fbmFtZSRvcmdhbmlzbWRiaQpgYGAKCkZvciB0aG9zZSBwYWNrYWdlcyBJIGhhdmUgZ2VuZXJhdGVkL2luc3RhbGxlZCwgdXNlIHRoaXMgdG8gZ2VuZXJhdGUgYW4KYW5ub3RhdGlvbiB0YWJsZS4gT2gsIGJ1dCBJIHByZWZpeGVkIHRoZSBjb2x1bW4gbmFtZXMgd2l0aCAnYW5ub3RfJyBpbiBvcmRlciB0bwptYWtlIHN1cmUgdGhhdCBub3RoaW5nIGlzIGR1cGxpY2F0ZWQgd2l0aCB0aGUgR08gdGFibGVzLCBvcnRob2xvZyB0YWJsZXMsIGV0Yy4KQXMgYSByZXN1bHQsIHRoZXNlIGFyZSB3cm9uZyB1bnRpbCB0aGUgbmV3IGFubm90YXRpb25zIGFyZSBsb2FkZWQuCgpgYGB7ciBsb2FkX29yZ2RiLCBldmFsPUZBTFNFfQojIyBKdXN0IHRvIHNhdmUgb24gdHlwaW5nCmxtX2RiIDwtIGdldDAobG1fbmFtZSRvcmdkYikKbG1fZGIKCndhbnRlZF9maWVsZHMgPC0gYygiYW5ub3RfZ2VuZV9sb2NhdGlvbl90ZXh0IiwKICAgICAgICAgICAgICAgICAgICJhbm5vdF9jZHNfbGVuZ3RoIiwKICAgICAgICAgICAgICAgICAgICJhbm5vdF9nZW5lX25hbWUiLAogICAgICAgICAgICAgICAgICAgImFubm90X2dlbmVfcHJvZHVjdCIsCiAgICAgICAgICAgICAgICAgICAiYW5ub3RfZ2VuZV90eXBlIiwKICAgICAgICAgICAgICAgICAgICJhbm5vdF9zdHJhbmQiLAogICAgICAgICAgICAgICAgICAgImFubm90X2dlbmVfZW50cmV6X2lkIiwKICAgICAgICAgICAgICAgICAgICJhbm5vdF9nZW5lX29ydGhvbWNsX25hbWUiKQpsbV9hbm5vdCA8LSBsb2FkX29yZ2RiX2Fubm90YXRpb25zKGxtX2RiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGtleXR5cGU9ImdpZCIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZmllbGRzPXdhbnRlZF9maWVsZHMpCmxtX2Fubm90IDwtIGV4dHJhY3RfZ2VuZV9sb2NhdGlvbnMobG1fYW5ub3QkZ2VuZXMpCmBgYAoKIyMgRXh0cmFjdCBkYXRhIGZyb20gdGhlIGdmZiBmaWxlcwoKYGBge3IgZ2V0X2Zyb21fZ2ZmfQpsbV9nZmZfZmlsZSA8LSAicmVmZXJlbmNlL1RyaVRyeXBEQi0zN19MbWFqb3JGcmllZGxpbi5nZmYiCmxtX2dmZl9hbm5vdGF0aW9ucyA8LSBzbShsb2FkX2dmZl9hbm5vdGF0aW9ucyhsbV9nZmZfZmlsZSwgdHlwZT0iZXhvbiIpKQpgYGAKCmBgYHtyIHNhdmVtZX0KaWYgKCFpc1RSVUUoZ2V0MCgic2tpcF9sb2FkIikpKSB7CiAgcGFuZGVyOjpwYW5kZXIoc2Vzc2lvbkluZm8oKSkKICBtZXNzYWdlKHBhc3RlMCgiVGhpcyBpcyBocGdsdG9vbHMgY29tbWl0OiAiLCBnZXRfZ2l0X2NvbW1pdCgpKSkKICBtZXNzYWdlKHBhc3RlMCgiU2F2aW5nIHRvICIsIHNhdmVmaWxlKSkKICB0bXAgPC0gc20oc2F2ZW1lKGZpbGVuYW1lPXNhdmVmaWxlKSkKfQpgYGAK