In contrast, it is possible to load most annotations of interest directly from the gff files used in the alignments. More in-depth information for the human transcriptome may be extracted from biomart.
## The old way of getting genome/annotation data
mtb_gff <- "reference/mycobacterium_tuberculosis_h37rv_2.gff.gz"
mtb_genome <- "reference/mtuberculosis_h37rv_genbank.fasta"
mtb_cds <- "reference/mtb_cds.fasta"
mtb_annotations <- sm(load_gff_annotations(mtb_gff, type="gene"))
rownames(mtb_annotations) <- mtb_annotations[["ID"]]
## First figure out the ID for the Mtb genome:
ids <- get_microbesonline_ids("37")
head(ids)
## taxonomyId shortName
## 1 83332 Mycobacterium tuberculosis H37Rv
## 2 243273 Mycoplasma genitalium G37
## 3 316274 Herpetosiphon aurantiacus ATCC 23779
## 4 331111 Escherichia coli E24377A
## 5 338966 Pelobacter propionicus DSM 2379
## 6 350704 Pseudomonas aeruginosa C3719
## Mycobacterium tuberculosis H37Rv is the first entry and has id: 83332
mtb_microbes <- load_microbesonline_annotations(ids=83332)
## Querying microbesonline for: Mycobacterium tuberculosis H37Rv.
## I made a nifty function to do this stuff: load_uniprot_annotations().
library(UniProt.ws)
colnames(availableUniprotSpecies())
found <- availableUniprotSpecies(pattern="Mycobacterium tuberculosis")
info
mtb_uniprot <- UniProt.ws(13120)
mtb_keys <- keys(x=mtb_uniprot, keytype="UCSC")
mtb_keys
columns <- c("UNIGENE", "ENSEMBL")
result <- select(mtb_uniprot, mtb_keys, columns, "ENTREZ_GENE")
mtb_go <- load_microbesonline_go(id=83332)
## Collecting go data for: Mycobacterium tuberculosis H37Rv.
I want to be able to cross reference some work from Keith. His gene IDs are MTBxxxx
all_de <- read.table("limma_result.csv", header=TRUE, sep="\t")
summary(mtb_microbes[[1]])
## locusId accession GI scaffoldId
## Min. : 31772 Length:4611 Min. :1.56e+07 Min. :7022
## 1st Qu.: 33068 Class :character 1st Qu.:1.56e+07 1st Qu.:7022
## Median : 34351 Mode :character Median :1.56e+07 Median :7022
## Mean : 1665113 Mean :2.11e+07 Mean :7022
## 3rd Qu.: 35642 3rd Qu.:1.56e+07 3rd Qu.:7022
## Max. :11685601 Max. :1.61e+08 Max. :7022
## NA's :622
## start stop strand sysName
## Min. : 1 Min. : 1524 Length:4611 Length:4611
## 1st Qu.:1135016 1st Qu.:1134353 Class :character Class :character
## Median :2367711 Median :2368442 Mode :character Mode :character
## Mean :2273832 Mean :2273812
## 3rd Qu.:3333271 3rd Qu.:3332429
## Max. :4410929 Max. :4410786
##
## name desc COG COGFun
## Length:4611 Length:4611 Length:4611 Length:4611
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## COGDesc TIGRFam TIGRRoles GO
## Length:4611 Length:4611 Length:4611 Length:4611
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## EC ECDesc
## Length:4611 Length:4611
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
mtb_microbes <- mtb_microbes[[1]]
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 08c9274d41bd6db1189f9330baf0b7ec57d22dc9> git reset yesterday
## R> packrat::restore()
## This is hpgltools commit: Wed Apr 4 11:03:17 2018 -0400: 08c9274d41bd6db1189f9330baf0b7ec57d22dc9This is hpgltools commit: Wed Apr 4 11:03:17 2018 -0400: yesterday
## Saving to 01_annotation-v20171205.rda.xz