The tritrypdb just released a new version. Let us make new annotation data from it.
## These functions take _forever_ the first time around.
devtools::load_all("~/scratch/git/EuPathDB")
## Loading EuPathDB
## Loading required package: GenomeInfoDbData
## Adding files missing in collate: load_ah_annotations.R
##
## This is EuPathDB version 1.6.0
## Read 'EuPathDB()' to get started.
pan_entry <- get_eupath_entry("panamensis", webservice="tritrypdb")
## Found the following hits: Leishmania panamensis MHOM/COL/81/L13, Leishmania panamensis strain MHOM/PA/94/PSC-1, choosing the first.
## Using: Leishmania panamensis MHOM/COL/81/L13.
installedp <- get_eupath_pkgnames(pan_entry)$orgdb_installed
if (!isTRUE(installedp)) {
pan_annot <- EuPathDB::make_eupath_orgdb(pan_entry, reinstall=TRUE,
overwrite=TRUE)
}
pan_names <- get_eupath_pkgnames(pan_entry)
library(pan_names$orgdb, character=TRUE)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:EuPathDB':
##
## first, rename
## The following object is masked from 'package:base':
##
## expand.grid
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:EuPathDB':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:EuPathDB':
##
## select
##
pan_db <- get0(pan_names$orgdb)
pan_db
## OrgDb object:
## | DBSCHEMAVERSION: 2.1
## | DBSCHEMA: NOSCHEMA_DB
## | ORGANISM: Leishmania panamensis MHOM/COL/81/L13
## | SPECIES: Leishmania panamensis MHOM/COL/81/L13
## | CENTRALID: GID
## | Taxonomy ID: 1295824
## | Db type: OrgDb
## | Supporting package: AnnotationDbi
##
## Please see: help('select') for usage information
For those packages I have generated/installed, use this to generate an annotation table. Oh, but I prefixed the column names with ‘annot_’ in order to make sure that nothing is duplicated with the GO tables, ortholog tables, etc.
But first, lets see what columns are available in the annotation packages.
all_fields <- columns(pan_db)
all_fields
## [1] "ANNOT_BFD3_CDS"
## [2] "ANNOT_BFD3_MODEL"
## [3] "ANNOT_BFD6_CDS"
## [4] "ANNOT_BFD6_MODEL"
## [5] "ANNOT_CDS"
## [6] "ANNOT_CDS_LENGTH"
## [7] "ANNOT_CHROMOSOME"
## [8] "ANNOT_DIF_CDS"
## [9] "ANNOT_DIF_MODEL"
## [10] "ANNOT_EC_NUMBERS"
## [11] "ANNOT_EC_NUMBERS_DERIVED"
## [12] "ANNOT_EXON_COUNT"
## [13] "ANNOT_FC_BFD3_CDS"
## [14] "ANNOT_FC_BFD3_MODEL"
## [15] "ANNOT_FC_BFD6_CDS"
## [16] "ANNOT_FC_BFD6_MODEL"
## [17] "ANNOT_FC_DIF_CDS"
## [18] "ANNOT_FC_DIF_MODEL"
## [19] "ANNOT_FC_PF_CDS"
## [20] "ANNOT_FC_PF_MODEL"
## [21] "ANNOT_FIVE_PRIME_UTR_LENGTH"
## [22] "ANNOT_GENE_ENTREZ_ID"
## [23] "ANNOT_GENE_EXON_COUNT"
## [24] "ANNOT_GENE_HTS_NONCODING_SNPS"
## [25] "ANNOT_GENE_HTS_NONSYN_SYN_RATIO"
## [26] "ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS"
## [27] "ANNOT_GENE_HTS_STOP_CODON_SNPS"
## [28] "ANNOT_GENE_HTS_SYNONYMOUS_SNPS"
## [29] "ANNOT_GENE_LOCATION_TEXT"
## [30] "ANNOT_GENE_NAME"
## [31] "ANNOT_GENE_ORTHOLOG_NUMBER"
## [32] "ANNOT_GENE_ORTHOMCL_NAME"
## [33] "ANNOT_GENE_PARALOG_NUMBER"
## [34] "ANNOT_GENE_PREVIOUS_IDS"
## [35] "ANNOT_GENE_PRODUCT"
## [36] "ANNOT_GENE_SOURCE_ID"
## [37] "ANNOT_GENE_TOTAL_HTS_SNPS"
## [38] "ANNOT_GENE_TRANSCRIPT_COUNT"
## [39] "ANNOT_GENE_TYPE"
## [40] "ANNOT_GO_COMPONENT"
## [41] "ANNOT_GO_FUNCTION"
## [42] "ANNOT_GO_ID_COMPONENT"
## [43] "ANNOT_GO_ID_FUNCTION"
## [44] "ANNOT_GO_ID_PROCESS"
## [45] "ANNOT_GO_PROCESS"
## [46] "ANNOT_HAS_MISSING_TRANSCRIPTS"
## [47] "ANNOT_INTERPRO_DESCRIPTION"
## [48] "ANNOT_INTERPRO_ID"
## [49] "ANNOT_IS_PSEUDO"
## [50] "ANNOT_ISOELECTRIC_POINT"
## [51] "ANNOT_LOCATION_TEXT"
## [52] "ANNOT_MATCHED_RESULT"
## [53] "ANNOT_MOLECULAR_WEIGHT"
## [54] "ANNOT_NO_TET_CDS"
## [55] "ANNOT_NO_TET_MODEL"
## [56] "ANNOT_ORGANISM"
## [57] "ANNOT_PF_CDS"
## [58] "ANNOT_PF_MODEL"
## [59] "ANNOT_PFAM_DESCRIPTION"
## [60] "ANNOT_PFAM_ID"
## [61] "ANNOT_PIRSF_DESCRIPTION"
## [62] "ANNOT_PIRSF_ID"
## [63] "ANNOT_PREDICTED_GO_COMPONENT"
## [64] "ANNOT_PREDICTED_GO_FUNCTION"
## [65] "ANNOT_PREDICTED_GO_ID_COMPONENT"
## [66] "ANNOT_PREDICTED_GO_ID_FUNCTION"
## [67] "ANNOT_PREDICTED_GO_ID_PROCESS"
## [68] "ANNOT_PREDICTED_GO_PROCESS"
## [69] "ANNOT_PROJECT_ID"
## [70] "ANNOT_PROSITEPROFILES_DESCRIPTION"
## [71] "ANNOT_PROSITEPROFILES_ID"
## [72] "ANNOT_PROTEIN_LENGTH"
## [73] "ANNOT_PROTEIN_SEQUENCE"
## [74] "ANNOT_SEQUENCE_ID"
## [75] "ANNOT_SIGNALP_PEPTIDE"
## [76] "ANNOT_SIGNALP_SCORES"
## [77] "ANNOT_SMART_DESCRIPTION"
## [78] "ANNOT_SMART_ID"
## [79] "ANNOT_SOURCE_ID"
## [80] "ANNOT_STRAND"
## [81] "ANNOT_SUPERFAMILY_DESCRIPTION"
## [82] "ANNOT_SUPERFAMILY_ID"
## [83] "ANNOT_THREE_PRIME_UTR_LENGTH"
## [84] "ANNOT_TIGRFAM_DESCRIPTION"
## [85] "ANNOT_TIGRFAM_ID"
## [86] "ANNOT_TM_COUNT"
## [87] "ANNOT_TRANS_FOUND_PER_GENE_INTERNAL"
## [88] "ANNOT_TRANSCRIPT_INDEX_PER_GENE"
## [89] "ANNOT_TRANSCRIPT_LENGTH"
## [90] "ANNOT_TRANSCRIPT_LINK"
## [91] "ANNOT_TRANSCRIPT_PRODUCT"
## [92] "ANNOT_TRANSCRIPT_SEQUENCE"
## [93] "ANNOT_TRANSCRIPTS_FOUND_PER_GENE"
## [94] "ANNOT_UNIPROT_ID"
## [95] "ANNOT_URI"
## [96] "ANNOT_WDK_WEIGHT"
## [97] "CHR_ID"
## [98] "EVIDENCE"
## [99] "GENE_TYPE"
## [100] "GID"
## [101] "GO"
## [102] "GO_EVIDENCE_CODE"
## [103] "GO_ID"
## [104] "GO_IS_NOT"
## [105] "GO_ONTOLOGY"
## [106] "GO_REFERENCE"
## [107] "GO_SORT_KEY"
## [108] "GO_SOURCE"
## [109] "GO_SUPPORT_FOR_EVIDENCE_CODE_ASSIGNMENT"
## [110] "GO_TERM_NAME"
## [111] "GO_TRANSCRIPT_ID_S"
## [112] "GOSLIM_EVIDENCE_CODE"
## [113] "GOSLIM_GO_ID"
## [114] "GOSLIM_GO_TERM_NAME"
## [115] "GOSLIM_IS_NOT"
## [116] "GOSLIM_ONTOLOGY"
## [117] "GOSLIM_REFERENCE"
## [118] "GOSLIM_SORT_KEY"
## [119] "GOSLIM_SOURCE"
## [120] "GOSLIM_SUPPORT_FOR_EVIDENCE_CODE_ASSIGNMENT"
## [121] "GOSLIM_TRANSCRIPT_ID_S"
## [122] "INTERPRO_DESCRIPTION"
## [123] "INTERPRO_E_VALUE"
## [124] "INTERPRO_END_MIN"
## [125] "INTERPRO_ID"
## [126] "INTERPRO_NAME"
## [127] "INTERPRO_PRIMARY_ID"
## [128] "INTERPRO_SECONDARY_ID"
## [129] "INTERPRO_START_MIN"
## [130] "INTERPRO_TRANSCRIPT_ID_S"
## [131] "LINKOUT_DATABASE"
## [132] "LINKOUT_EXT_ID"
## [133] "LINKOUT_LINK_URL"
## [134] "LINKOUT_SOURCE_ID"
## [135] "ORTHOLOGS_GID"
## [136] "ORTHOLOGS_ORGANISM"
## [137] "ORTHOLOGS_PRODUCT"
## [138] "ORTHOLOGS_SYNTENIC"
## [139] "PATHWAY_EC_NUMBER_MATCHED_IN_PATHWAY"
## [140] "PATHWAY_EXACT_EC_NUMBER_MATCH"
## [141] "PATHWAY_EXPASY_URL"
## [142] "PATHWAY_ID"
## [143] "PATHWAY_REACTIONS_MATCHING_EC_NUMBER"
## [144] "PATHWAY_SOURCE"
## [145] "PATHWAY_SOURCE_ID"
all_annot <- load_orgdb_annotations(
pan_db,
keytype="gid",
fields="all")
## Selecting the following fields, this might be too many:
## ANNOT_BFD3_CDS, ANNOT_BFD3_MODEL, ANNOT_BFD6_CDS, ANNOT_BFD6_MODEL, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_DIF_CDS, ANNOT_DIF_MODEL, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_EXON_COUNT, ANNOT_FC_BFD3_CDS, ANNOT_FC_BFD3_MODEL, ANNOT_FC_BFD6_CDS, ANNOT_FC_BFD6_MODEL, ANNOT_FC_DIF_CDS, ANNOT_FC_DIF_MODEL, ANNOT_FC_PF_CDS, ANNOT_FC_PF_MODEL, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_SOURCE_ID, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GO_COMPONENT, ANNOT_GO_FUNCTION, ANNOT_GO_ID_COMPONENT, ANNOT_GO_ID_FUNCTION, ANNOT_GO_ID_PROCESS, ANNOT_GO_PROCESS, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MATCHED_RESULT, ANNOT_MOLECULAR_WEIGHT, ANNOT_NO_TET_CDS, ANNOT_NO_TET_MODEL, ANNOT_ORGANISM, ANNOT_PF_CDS, ANNOT_PF_MODEL, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PROJECT_ID, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SIGNALP_SCORES, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SOURCE_ID, ANNOT_STRAND, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_ID, ANNOT_URI, ANNOT_WDK_WEIGHT
## Extracted all gene ids.
## Attempting to select: ANNOT_BFD3_CDS, ANNOT_BFD3_MODEL, ANNOT_BFD6_CDS, ANNOT_BFD6_MODEL, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_DIF_CDS, ANNOT_DIF_MODEL, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_EXON_COUNT, ANNOT_FC_BFD3_CDS, ANNOT_FC_BFD3_MODEL, ANNOT_FC_BFD6_CDS, ANNOT_FC_BFD6_MODEL, ANNOT_FC_DIF_CDS, ANNOT_FC_DIF_MODEL, ANNOT_FC_PF_CDS, ANNOT_FC_PF_MODEL, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_SOURCE_ID, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GO_COMPONENT, ANNOT_GO_FUNCTION, ANNOT_GO_ID_COMPONENT, ANNOT_GO_ID_FUNCTION, ANNOT_GO_ID_PROCESS, ANNOT_GO_PROCESS, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MATCHED_RESULT, ANNOT_MOLECULAR_WEIGHT, ANNOT_NO_TET_CDS, ANNOT_NO_TET_MODEL, ANNOT_ORGANISM, ANNOT_PF_CDS, ANNOT_PF_MODEL, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PROJECT_ID, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SIGNALP_SCORES, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SOURCE_ID, ANNOT_STRAND, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_ID, ANNOT_URI, ANNOT_WDK_WEIGHT
## 'select()' returned 1:1 mapping between keys and columns
For the moment I think I will just ask for esmer->nonesmer
orthos <- EuPathDB::extract_eupath_orthologs(db=pan_db)
## Some columns were missing: ORTHOLOGS_ORTHOLOG, ORTHOLOGS_COUNT
## Removing them, which may end badly.
## 'select()' returned 1:many mapping between keys and columns
## There are 48 possible species in this group.
## Found species: Blechomonas ayalai B08-376
## Found species: Bodo saltans strain Lake Konstanz
## Found species: Crithidia fasciculata strain Cf-Cl
## Found species: Endotrypanum monterogeii strain LV88
## Found species: Leishmania aethiopica L147
## Found species: Leishmania amazonensis MHOM/BR/71973/M2269
## Found species: Leishmania arabica strain LEM1108
## Found species: Leishmania braziliensis MHOM/BR/75/M2903
## Found species: Leishmania braziliensis MHOM/BR/75/M2904
## Found species: Leishmania donovani BPK282A1
## Found species: Leishmania donovani CL-SL
## Found species: Leishmania donovani strain LV9
## Found species: Leishmania enriettii strain LEM3045
## Found species: Leishmania gerbilli strain LEM452
## Found species: Leishmania infantum JPCM5
## Found species: Leishmania major strain Friedlin
## Found species: Leishmania major strain LV39c5
## Found species: Leishmania major strain SD 75.1
## Found species: Leishmania mexicana MHOM/GT/2001/U1103
## Found species: Leishmania panamensis MHOM/COL/81/L13
## Found species: Leishmania panamensis strain MHOM/PA/94/PSC-1
## Found species: Leishmania sp. MAR LEM2494
## Found species: Leishmania tarentolae Parrot-TarII
## Found species: Leishmania tropica L590
## Found species: Leishmania turanica strain LEM423
## Found species: Leptomonas pyrrhocoris H10
## Found species: Leptomonas seymouri ATCC 30220
## Found species: Paratrypanosoma confusum CUL13
## Found species: Trypanosoma brucei brucei TREU927
## Found species: Trypanosoma brucei gambiense DAL972
## Found species: Trypanosoma brucei Lister strain 427
## Found species: Trypanosoma brucei Lister strain 427 2018
## Found species: Trypanosoma congolense IL3000
## Found species: Trypanosoma cruzi CL Brener Esmeraldo-like
## Found species: Trypanosoma cruzi CL Brener Non-Esmeraldo-like
## Found species: Trypanosoma cruzi Dm28c 2014
## Found species: Trypanosoma cruzi Dm28c 2017
## Found species: Trypanosoma cruzi Dm28c 2018
## Found species: Trypanosoma cruzi marinkellei strain B7
## Found species: Trypanosoma cruzi strain CL Brener
## Found species: Trypanosoma cruzi Sylvio X10/1
## Found species: Trypanosoma cruzi Sylvio X10/1-2012
## Found species: Trypanosoma cruzi TCC
## Found species: Trypanosoma evansi strain STIB 805
## Found species: Trypanosoma grayi ANR4
## Found species: Trypanosoma rangeli SC58
## Found species: Trypanosoma theileri isolate Edinburgh
## Found species: Trypanosoma vivax Y486
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
message(paste0("Saving to ", savefile))
tmp <- sm(saveme(filename=savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset f3c1e03852c87dc60c7e72e726bb640572e695ff
## This is hpgltools commit: Thu Aug 22 15:32:44 2019 -0400: f3c1e03852c87dc60c7e72e726bb640572e695ff
## Saving to 01_annotation_v20191001.rda.xz