The tritrypdb just released a new version. Let us make new annotation data from it.
## These functions take _forever_ the first time around.
lm_annot <- make_eupath_organismdbi(species="Leishmania major", reinstall=TRUE)
In order to load these new packages, I rather need to remember their names… Happily I have a function for that.
lm_name <- get_eupath_pkgnames("Leishmania major", version="37")
## Starting metadata download.
## Finished metadata download.
## Found the following hits: Leishmania major strain Friedlin, Leishmania major strain LV39c5, Leishmania major strain SD 75.1, choosing the first.
lm_name$organismdbi
## [1] "eupathdb.Leishmania.major.Friedlin.v37"
lmx_name <- get_eupath_pkgnames("Leishmania mexicana", version="37")
## Starting metadata download.
## Finished metadata download.
## Found the following hits: Leishmania mexicana MHOM/GT/2001/U1103, choosing the first.
lmx_name$organismdbi
## [1] "eupathdb.Leishmania.mexicana.MHOMGT2001U1103.v37"
For those packages I have generated/installed, use this to generate an annotation table. Oh, but I prefixed the column names with ‘annot_’ in order to make sure that nothing is duplicated with the GO tables, ortholog tables, etc. As a result, these are wrong until the new annotations are loaded.
## Just to save on typing
library(lm_name$orgdb, character=TRUE)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colMeans, colnames, colSums, dirname, do.call, duplicated,
## eval, evalq, Filter, Find, get, grep, grepl, intersect,
## is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
## paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
## Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which, which.max,
## which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
##
lm_db <- get0(lm_name$orgdb)
lm_db
## OrgDb object:
## | DBSCHEMAVERSION: 2.1
## | DBSCHEMA: NOSCHEMA_DB
## | ORGANISM: Leishmania major strain Friedlin
## | SPECIES: Leishmania major strain Friedlin
## | CENTRALID: GID
## | Taxonomy ID: 347515
## | Db type: OrgDb
## | Supporting package: AnnotationDbi
##
## Please see: help('select') for usage information
library(lmx_name$orgdb, character=TRUE)
##
lmx_db <- get0(lmx_name$orgdb)
lmx_db
## OrgDb object:
## | DBSCHEMAVERSION: 2.1
## | DBSCHEMA: NOSCHEMA_DB
## | ORGANISM: Leishmania mexicana MHOM/GT/2001/U1103
## | SPECIES: Leishmania mexicana MHOM/GT/2001/U1103
## | CENTRALID: GID
## | Taxonomy ID: 929439
## | Db type: OrgDb
## | Supporting package: AnnotationDbi
##
## Please see: help('select') for usage information
wanted_fields <- c("annot_gene_location_text",
"annot_cds_length",
"annot_gene_name",
"annot_gene_product",
"annot_gene_type",
"annot_strand",
"annot_gene_entrez_id",
"annot_gene_orthomcl_name")
lm_annot <- load_orgdb_annotations(lm_db,
keytype="gid",
fields=wanted_fields)
## Unable to find CDSNAME, setting it to ANNOT_GENE_NAME.
## Unable to find CDSCHROM in the db, removing it.
## Unable to find CDSSTRAND in the db, removing it.
## Unable to find CDSSTART in the db, removing it.
## Unable to find CDSEND in the db, removing it.
## Extracted all gene ids.
## Attempting to select: ANNOT_GENE_NAME, GENE_TYPE, ANNOT_GENE_LOCATION_TEXT, ANNOT_CDS_LENGTH, ANNOT_GENE_NAME, ANNOT_GENE_PRODUCT, ANNOT_GENE_TYPE, ANNOT_STRAND, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ORTHOMCL_NAME
## 'select()' returned 1:many mapping between keys and columns
lm_annot <- extract_gene_locations(lm_annot$genes)
rownames(lm_annot) <- paste0(rownames(lm_annot), ".1")
lmx_annot <- load_orgdb_annotations(lmx_db,
keytype="gid",
fields=wanted_fields)
## Unable to find CDSNAME, setting it to ANNOT_GENE_NAME.
## Unable to find CDSCHROM in the db, removing it.
## Unable to find CDSSTRAND in the db, removing it.
## Unable to find CDSSTART in the db, removing it.
## Unable to find CDSEND in the db, removing it.
## Extracted all gene ids.
## Attempting to select: ANNOT_GENE_NAME, GENE_TYPE, ANNOT_GENE_LOCATION_TEXT, ANNOT_CDS_LENGTH, ANNOT_GENE_NAME, ANNOT_GENE_PRODUCT, ANNOT_GENE_TYPE, ANNOT_STRAND, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ORTHOMCL_NAME
## 'select()' returned 1:1 mapping between keys and columns
lmx_annot <- extract_gene_locations(lmx_annot$genes)
rownames(lmx_annot) <- paste0(rownames(lmx_annot), ".1")
lm_gff_file <- "reference/TriTrypDB-37_LmajorFriedlin.gff"
lm_gff_annotations <- sm(load_gff_annotations(lm_gff_file, type="exon"))
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
message(paste0("Saving to ", savefile))
tmp <- sm(saveme(filename=savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 0b63ce6abcbb822832fe4631b4916f94931d8648
## R> packrat::restore()
## This is hpgltools commit: Wed Sep 5 12:04:45 2018 -0400: 0b63ce6abcbb822832fe4631b4916f94931d8648
## Saving to 01_annotation_v20180828.rda.xz