The tritrypdb just released a new version. Let us make new annotation data from it.
## These functions take _forever_ the first time around.
devtools::load_all("~/scratch/git/EuPathDB")
esmer_entry <- get_eupath_entry("Esmeraldo-like", webservice="tritrypdb")
installedp <- get_eupath_pkgnames(esmer_entry)$orgdb_installed
if (!isTRUE(installedp)) {
esmer_annot <- EuPathDB::make_eupath_orgdb(esmer_entry, reinstall=TRUE,
overwrite=TRUE)
}
nonesmer_entry <- get_eupath_entry("Non-Esmeraldo-like", webservice="tritrypdb")
installedp <- get_eupath_pkgnames(nonesmer_entry)$orgdb_installed
if (!isTRUE(installedp)) {
nonesmer_annot <- EuPathDB::make_eupath_orgdb(nonesmer_entry, reinstall=TRUE,
overwrite=TRUE)
}
unas_entry <- get_eupath_entry("strain CL Brener", webservice="tritrypdb")
installedp <- get_eupath_pkgnames(unas_entry)$orgdb_installed
if (!isTRUE(installedp)) {
unas_annot <- EuPathDB::make_eupath_orgdb(unas_entry, reinstall=TRUE,
overwrite=TRUE)
## unas_annot <- EuPathDB::make_eupath_orgdb(unas_entry, reinstall=TRUE)
}
maj_entry <- get_eupath_entry("Friedlin", webservice="tritrypdb")
maj_annot <- EuPathDB::make_eupath_orgdb(maj_entry, reinstall=TRUE,
overwrite=TRUE)
maj_names <- get_eupath_pkgnames(maj_entry)
library(maj_names$orgdb, character=TRUE)
maj_db <- get0(maj_names$orgdb)
maj_db
tail(keys(maj_db))
pan_entry <- get_eupath_entry("anamensis", webservice="tritrypdb")
pan_annot <- EuPathDB::make_eupath_orgdb(pan_entry, reinstall=TRUE,
overwrite=TRUE)
In order to load these new packages, I rather need to remember their names… Happily I have a function for that.
##library(EuPathDB)
devtools::load_all("~/scratch/git/EuPathDB")
## Loading EuPathDB
## Loading required package: GenomeInfoDbData
## Adding files missing in collate: load_ah_annotations.R
##
## This is EuPathDB version 1.6.0
## Read 'EuPathDB()' to get started.
esmer_entry <- EuPathDB::get_eupath_entry(species="Esmeraldo-like", webservice="tritrypdb")
## Found the following hits: Trypanosoma cruzi CL Brener Esmeraldo-like, Trypanosoma cruzi CL Brener Non-Esmeraldo-like, choosing the first.
## Using: Trypanosoma cruzi CL Brener Esmeraldo-like.
nonesmer_entry <- EuPathDB::get_eupath_entry(species="Brener Non", webservice="tritrypdb")
## Found: Trypanosoma cruzi CL Brener Non-Esmeraldo-like
unas_entry <- EuPathDB::get_eupath_entry(species="CL Brener$", webservice="tritrypdb")
## Found: Trypanosoma cruzi strain CL Brener
esmer_names <- get_eupath_pkgnames(esmer_entry)
esmer_names$orgdb
## org.Tcruzi.CL.Brener.Esmeraldo.like.v45.eg.db
nonesmer_names <- get_eupath_pkgnames(nonesmer_entry)
nonesmer_names$orgdb
## org.Tcruzi.CL.Brener.Non.Esmeraldo.like.v45.eg.db
unas_names <- get_eupath_pkgnames(unas_entry)
unas_names$orgdb
## org.Tcruzi.CL.Brener.v45.eg.db
For those packages I have generated/installed, use this to generate an annotation table. Oh, but I prefixed the column names with ‘annot_’ in order to make sure that nothing is duplicated with the GO tables, ortholog tables, etc.
## Just to save on typing
library(esmer_names$orgdb, character=TRUE)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:EuPathDB':
##
## first, rename
## The following object is masked from 'package:base':
##
## expand.grid
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:EuPathDB':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:EuPathDB':
##
## select
##
library(nonesmer_names$orgdb, character=TRUE)
##
library(unas_names$orgdb, character=TRUE)
##
esmer_db <- get0(esmer_names$orgdb)
esmer_db
## OrgDb object:
## | DBSCHEMAVERSION: 2.1
## | DBSCHEMA: NOSCHEMA_DB
## | ORGANISM: Trypanosoma cruzi
## | SPECIES: Trypanosoma cruzi
## | CENTRALID: GID
## | Taxonomy ID: 5693
## | Db type: OrgDb
## | Supporting package: AnnotationDbi
##
## Please see: help('select') for usage information
nonesmer_db <- get0(nonesmer_names$orgdb)
nonesmer_db
## OrgDb object:
## | DBSCHEMAVERSION: 2.1
## | DBSCHEMA: NOSCHEMA_DB
## | ORGANISM: Trypanosoma cruzi
## | SPECIES: Trypanosoma cruzi
## | CENTRALID: GID
## | Taxonomy ID: 5693
## | Db type: OrgDb
## | Supporting package: AnnotationDbi
##
## Please see: help('select') for usage information
unas_db <- get0(unas_names$orgdb)
unas_db
## OrgDb object:
## | DBSCHEMAVERSION: 2.1
## | DBSCHEMA: NOSCHEMA_DB
## | ORGANISM: Trypanosoma cruzi strain CL Brener
## | SPECIES: Trypanosoma cruzi strain CL Brener
## | CENTRALID: GID
## | Taxonomy ID: 353153
## | Db type: OrgDb
## | Supporting package: AnnotationDbi
##
## Please see: help('select') for usage information
Lets see what columns are available in the annotation packages.
all_fields <- columns(esmer_db)
all_fields
## [1] "ANNOT_BFD3_CDS"
## [2] "ANNOT_BFD3_MODEL"
## [3] "ANNOT_BFD6_CDS"
## [4] "ANNOT_BFD6_MODEL"
## [5] "ANNOT_CDS"
## [6] "ANNOT_CDS_LENGTH"
## [7] "ANNOT_CHROMOSOME"
## [8] "ANNOT_DIF_CDS"
## [9] "ANNOT_DIF_MODEL"
## [10] "ANNOT_EC_NUMBERS"
## [11] "ANNOT_EC_NUMBERS_DERIVED"
## [12] "ANNOT_EXON_COUNT"
## [13] "ANNOT_FC_BFD3_CDS"
## [14] "ANNOT_FC_BFD3_MODEL"
## [15] "ANNOT_FC_BFD6_CDS"
## [16] "ANNOT_FC_BFD6_MODEL"
## [17] "ANNOT_FC_DIF_CDS"
## [18] "ANNOT_FC_DIF_MODEL"
## [19] "ANNOT_FC_PF_CDS"
## [20] "ANNOT_FC_PF_MODEL"
## [21] "ANNOT_FIVE_PRIME_UTR_LENGTH"
## [22] "ANNOT_GENE_ENTREZ_ID"
## [23] "ANNOT_GENE_EXON_COUNT"
## [24] "ANNOT_GENE_HTS_NONCODING_SNPS"
## [25] "ANNOT_GENE_HTS_NONSYN_SYN_RATIO"
## [26] "ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS"
## [27] "ANNOT_GENE_HTS_STOP_CODON_SNPS"
## [28] "ANNOT_GENE_HTS_SYNONYMOUS_SNPS"
## [29] "ANNOT_GENE_LOCATION_TEXT"
## [30] "ANNOT_GENE_NAME"
## [31] "ANNOT_GENE_ORTHOLOG_NUMBER"
## [32] "ANNOT_GENE_ORTHOMCL_NAME"
## [33] "ANNOT_GENE_PARALOG_NUMBER"
## [34] "ANNOT_GENE_PREVIOUS_IDS"
## [35] "ANNOT_GENE_PRODUCT"
## [36] "ANNOT_GENE_SOURCE_ID"
## [37] "ANNOT_GENE_TOTAL_HTS_SNPS"
## [38] "ANNOT_GENE_TRANSCRIPT_COUNT"
## [39] "ANNOT_GENE_TYPE"
## [40] "ANNOT_GO_COMPONENT"
## [41] "ANNOT_GO_FUNCTION"
## [42] "ANNOT_GO_ID_COMPONENT"
## [43] "ANNOT_GO_ID_FUNCTION"
## [44] "ANNOT_GO_ID_PROCESS"
## [45] "ANNOT_GO_PROCESS"
## [46] "ANNOT_HAS_MISSING_TRANSCRIPTS"
## [47] "ANNOT_INTERPRO_DESCRIPTION"
## [48] "ANNOT_INTERPRO_ID"
## [49] "ANNOT_IS_PSEUDO"
## [50] "ANNOT_ISOELECTRIC_POINT"
## [51] "ANNOT_LOCATION_TEXT"
## [52] "ANNOT_MATCHED_RESULT"
## [53] "ANNOT_MOLECULAR_WEIGHT"
## [54] "ANNOT_NO_TET_CDS"
## [55] "ANNOT_NO_TET_MODEL"
## [56] "ANNOT_ORGANISM"
## [57] "ANNOT_PF_CDS"
## [58] "ANNOT_PF_MODEL"
## [59] "ANNOT_PFAM_DESCRIPTION"
## [60] "ANNOT_PFAM_ID"
## [61] "ANNOT_PIRSF_DESCRIPTION"
## [62] "ANNOT_PIRSF_ID"
## [63] "ANNOT_PREDICTED_GO_COMPONENT"
## [64] "ANNOT_PREDICTED_GO_FUNCTION"
## [65] "ANNOT_PREDICTED_GO_ID_COMPONENT"
## [66] "ANNOT_PREDICTED_GO_ID_FUNCTION"
## [67] "ANNOT_PREDICTED_GO_ID_PROCESS"
## [68] "ANNOT_PREDICTED_GO_PROCESS"
## [69] "ANNOT_PROJECT_ID"
## [70] "ANNOT_PROSITEPROFILES_DESCRIPTION"
## [71] "ANNOT_PROSITEPROFILES_ID"
## [72] "ANNOT_PROTEIN_LENGTH"
## [73] "ANNOT_PROTEIN_SEQUENCE"
## [74] "ANNOT_SEQUENCE_ID"
## [75] "ANNOT_SIGNALP_PEPTIDE"
## [76] "ANNOT_SIGNALP_SCORES"
## [77] "ANNOT_SMART_DESCRIPTION"
## [78] "ANNOT_SMART_ID"
## [79] "ANNOT_SOURCE_ID"
## [80] "ANNOT_STRAND"
## [81] "ANNOT_SUPERFAMILY_DESCRIPTION"
## [82] "ANNOT_SUPERFAMILY_ID"
## [83] "ANNOT_THREE_PRIME_UTR_LENGTH"
## [84] "ANNOT_TIGRFAM_DESCRIPTION"
## [85] "ANNOT_TIGRFAM_ID"
## [86] "ANNOT_TM_COUNT"
## [87] "ANNOT_TRANS_FOUND_PER_GENE_INTERNAL"
## [88] "ANNOT_TRANSCRIPT_INDEX_PER_GENE"
## [89] "ANNOT_TRANSCRIPT_LENGTH"
## [90] "ANNOT_TRANSCRIPT_LINK"
## [91] "ANNOT_TRANSCRIPT_PRODUCT"
## [92] "ANNOT_TRANSCRIPT_SEQUENCE"
## [93] "ANNOT_TRANSCRIPTS_FOUND_PER_GENE"
## [94] "ANNOT_UNIPROT_ID"
## [95] "ANNOT_URI"
## [96] "ANNOT_WDK_WEIGHT"
## [97] "CHR_ID"
## [98] "EVIDENCE"
## [99] "GENE_TYPE"
## [100] "GID"
## [101] "GO"
## [102] "GO_EVIDENCE_CODE"
## [103] "GO_ID"
## [104] "GO_IS_NOT"
## [105] "GO_ONTOLOGY"
## [106] "GO_REFERENCE"
## [107] "GO_SORT_KEY"
## [108] "GO_SOURCE"
## [109] "GO_SUPPORT_FOR_EVIDENCE_CODE_ASSIGNMENT"
## [110] "GO_TERM_NAME"
## [111] "GO_TRANSCRIPT_ID_S"
## [112] "GOSLIM_EVIDENCE_CODE"
## [113] "GOSLIM_GO_ID"
## [114] "GOSLIM_GO_TERM_NAME"
## [115] "GOSLIM_IS_NOT"
## [116] "GOSLIM_ONTOLOGY"
## [117] "GOSLIM_REFERENCE"
## [118] "GOSLIM_SORT_KEY"
## [119] "GOSLIM_SOURCE"
## [120] "GOSLIM_SUPPORT_FOR_EVIDENCE_CODE_ASSIGNMENT"
## [121] "GOSLIM_TRANSCRIPT_ID_S"
## [122] "INTERPRO_DESCRIPTION"
## [123] "INTERPRO_E_VALUE"
## [124] "INTERPRO_END_MIN"
## [125] "INTERPRO_ID"
## [126] "INTERPRO_NAME"
## [127] "INTERPRO_PRIMARY_ID"
## [128] "INTERPRO_SECONDARY_ID"
## [129] "INTERPRO_START_MIN"
## [130] "INTERPRO_TRANSCRIPT_ID_S"
## [131] "KEGGREST_KEGG_GENEID"
## [132] "KEGGREST_NCBI_GENEID"
## [133] "KEGGREST_NCBI_PROTEINID"
## [134] "KEGGREST_PATHWAYS"
## [135] "KEGGREST_UNIPROTID"
## [136] "LINKOUT_DATABASE"
## [137] "LINKOUT_EXT_ID"
## [138] "LINKOUT_LINK_URL"
## [139] "LINKOUT_SOURCE_ID"
## [140] "ORTHOLOGS_GID"
## [141] "ORTHOLOGS_ORGANISM"
## [142] "ORTHOLOGS_PRODUCT"
## [143] "ORTHOLOGS_SYNTENIC"
## [144] "PATHWAY_EC_NUMBER_MATCHED_IN_PATHWAY"
## [145] "PATHWAY_EXACT_EC_NUMBER_MATCH"
## [146] "PATHWAY_EXPASY_URL"
## [147] "PATHWAY_ID"
## [148] "PATHWAY_REACTIONS_MATCHING_EC_NUMBER"
## [149] "PATHWAY_SOURCE"
## [150] "PATHWAY_SOURCE_ID"
## [151] "PUBMED_AUTHORS"
## [152] "PUBMED_DOI"
## [153] "PUBMED_ID"
## [154] "PUBMED_TITLE"
tt <- load_orgdb_annotations(
esmer_db,
keytype="gid",
fields="all")
## Selecting the following fields, this might be too many:
## ANNOT_BFD3_CDS, ANNOT_BFD3_MODEL, ANNOT_BFD6_CDS, ANNOT_BFD6_MODEL, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_DIF_CDS, ANNOT_DIF_MODEL, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_EXON_COUNT, ANNOT_FC_BFD3_CDS, ANNOT_FC_BFD3_MODEL, ANNOT_FC_BFD6_CDS, ANNOT_FC_BFD6_MODEL, ANNOT_FC_DIF_CDS, ANNOT_FC_DIF_MODEL, ANNOT_FC_PF_CDS, ANNOT_FC_PF_MODEL, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_SOURCE_ID, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GO_COMPONENT, ANNOT_GO_FUNCTION, ANNOT_GO_ID_COMPONENT, ANNOT_GO_ID_FUNCTION, ANNOT_GO_ID_PROCESS, ANNOT_GO_PROCESS, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MATCHED_RESULT, ANNOT_MOLECULAR_WEIGHT, ANNOT_NO_TET_CDS, ANNOT_NO_TET_MODEL, ANNOT_ORGANISM, ANNOT_PF_CDS, ANNOT_PF_MODEL, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PROJECT_ID, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SIGNALP_SCORES, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SOURCE_ID, ANNOT_STRAND, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_ID, ANNOT_URI, ANNOT_WDK_WEIGHT
## Extracted all gene ids.
## Attempting to select: ANNOT_BFD3_CDS, ANNOT_BFD3_MODEL, ANNOT_BFD6_CDS, ANNOT_BFD6_MODEL, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_DIF_CDS, ANNOT_DIF_MODEL, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_EXON_COUNT, ANNOT_FC_BFD3_CDS, ANNOT_FC_BFD3_MODEL, ANNOT_FC_BFD6_CDS, ANNOT_FC_BFD6_MODEL, ANNOT_FC_DIF_CDS, ANNOT_FC_DIF_MODEL, ANNOT_FC_PF_CDS, ANNOT_FC_PF_MODEL, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_SOURCE_ID, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GO_COMPONENT, ANNOT_GO_FUNCTION, ANNOT_GO_ID_COMPONENT, ANNOT_GO_ID_FUNCTION, ANNOT_GO_ID_PROCESS, ANNOT_GO_PROCESS, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MATCHED_RESULT, ANNOT_MOLECULAR_WEIGHT, ANNOT_NO_TET_CDS, ANNOT_NO_TET_MODEL, ANNOT_ORGANISM, ANNOT_PF_CDS, ANNOT_PF_MODEL, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PROJECT_ID, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SIGNALP_SCORES, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SOURCE_ID, ANNOT_STRAND, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_ID, ANNOT_URI, ANNOT_WDK_WEIGHT
## 'select()' returned 1:1 mapping between keys and columns
wanted_fields <- c("annot_gene_location_text",
"annot_cds_length",
"annot_gene_name",
"annot_gene_product",
"annot_gene_type",
"annot_strand",
"annot_gene_entrez_id",
"annot_gene_orthomcl_name")
esmer_annot <- load_orgdb_annotations(
esmer_db,
keytype="gid",
fields=wanted_fields)
## Extracted all gene ids.
## Attempting to select: ANNOT_GENE_PRODUCT, ANNOT_GENE_TYPE, ANNOT_LOCATION_TEXT, ANNOT_GENE_LOCATION_TEXT, ANNOT_CDS_LENGTH, ANNOT_GENE_NAME, ANNOT_GENE_PRODUCT, ANNOT_GENE_TYPE, ANNOT_STRAND, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ORTHOMCL_NAME
## 'select()' returned 1:1 mapping between keys and columns
nonesmer_annot <- load_orgdb_annotations(
nonesmer_db,
keytype="gid",
fields=wanted_fields)
## Extracted all gene ids.
## Attempting to select: ANNOT_GENE_PRODUCT, ANNOT_GENE_TYPE, ANNOT_LOCATION_TEXT, ANNOT_GENE_LOCATION_TEXT, ANNOT_CDS_LENGTH, ANNOT_GENE_NAME, ANNOT_GENE_PRODUCT, ANNOT_GENE_TYPE, ANNOT_STRAND, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ORTHOMCL_NAME
## 'select()' returned 1:1 mapping between keys and columns
unas_annot <- load_orgdb_annotations(
unas_db,
keytype="gid",
fields=wanted_fields)
## Extracted all gene ids.
## Attempting to select: ANNOT_GENE_PRODUCT, ANNOT_GENE_TYPE, ANNOT_LOCATION_TEXT, ANNOT_GENE_LOCATION_TEXT, ANNOT_CDS_LENGTH, ANNOT_GENE_NAME, ANNOT_GENE_PRODUCT, ANNOT_GENE_TYPE, ANNOT_STRAND, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ORTHOMCL_NAME
## 'select()' returned 1:1 mapping between keys and columns
Now combine the esmer, nonesmer, and unassigned annotations.
clbr_annot <- rbind(
rbind(esmer_annot[["genes"]],
nonesmer_annot[["genes"]]),
unas_annot[["genes"]])
nona_idx <- !is.na(clbr_annot[["annot_gene_type"]])
clbr_annot <- clbr_annot[nona_idx, ]
##mrna_idx <- clbr_annot[["annot_gene_type"]] == "protein coding"
##clbr_annot <- clbr_annot[mrna_idx, ]
## Wait, did they finally combine esmer_annot/nonesmer_annot and the whole clbr?
testers <- head(rownames(unas_annot[["genes"]]))
testers %in% rownames(nonesmer_annot$genes)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE
testers %in% rownames(esmer_annot$genes)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE
## No, I guess not.
clbr_annot <- extract_gene_locations(clbr_annot)
hisat_clbr_annot <- clbr_annot
rownames(hisat_clbr_annot) <- paste0("exon_", rownames(hisat_clbr_annot), ".1")
salmon_clbr_annot <- clbr_annot
rownames(salmon_clbr_annot) <- paste0(rownames(salmon_clbr_annot), ".mRNA")
For the moment I think I will just ask for esmer->nonesmer
orthos <- EuPathDB::extract_eupath_orthologs(
db=esmer_db,
query_species="Trypanosoma cruzi CL Brener Non-Esmeraldo-like",
id_column="ORTHOLOGS_GID")
## Some columns were missing: ORTHOLOGS_COUNT
## Removing them, which may end badly.
## 'select()' returned 1:many mapping between keys and columns
## There are 48 possible species in this group.
## Found species: Trypanosoma cruzi CL Brener Non-Esmeraldo-like
orthos <- orthos[, c("GID", "ORTHOLOGS_GID")]
colnames(orthos) <- c("Esmeraldo", "NonEsmeraldo")
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
message(paste0("Saving to ", savefile))
tmp <- sm(saveme(filename=savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset f3c1e03852c87dc60c7e72e726bb640572e695ff
## This is hpgltools commit: Thu Aug 22 15:32:44 2019 -0400: f3c1e03852c87dc60c7e72e726bb640572e695ff
## Saving to 01_annotation_v20190813.rda.xz