There are a few methods of importing annotation data into R. The following are two attempts, the second is currently being used in these analyses.
AnnotationHub is the new and fancier version of what OrganismDb does. Keith already made these for the parasites though, lets try and use one of those.
The OrganismDb packages are installable via Keith’s builder: https://github.com/elsayed-lab/eupathdb-organismdb
I did a git pull of it, changed a couple small things and ran ‘make lpanamensis’. After 5 or so minutes a brand new package ‘Leishmania.panamensis.MHOMCOL81L13’ appeared in my R environment.
tt <- make_organismdbi(id="lmexicana")
orgdb_name <- hpgltools:::get_eupath_pkgnames(species="Leishmania major")
## Starting metadata download.
## Finished metadata download.
## Found the following hits: Leishmania major strain Friedlin, Leishmania major strain LV39c5, Leishmania major strain SD 75.1, choosing the first.
tt <- orgdb_name$orgdb
tmp <- sm(library("org.Lmajor.Friedlin.v36.eg.db"))
tmp <- sm(library("TxDb.Leishmania.major.Friedlin.TriTrypDB.v36"))
orgdb_name <- hpgltools:::get_eupath_pkgnames(species="Leishmania mexicana")
## Starting metadata download.
## Finished metadata download.
## Found the following hits: Leishmania mexicana MHOM/GT/2001/U1103, choosing the first.
orgdb_name$orgdb
## [1] "org.Lmexicana.MHOMGT2001U1103.v36.eg.db"
tmp <- sm(library("org.Lmexicana.MHOMGT2001U1103.v36.eg.db"))
lmaj <- load_orgdb_annotations("org.Lmajor.Friedlin.v36.eg.db", keytype="GID")$genes
## Unable to find GENENAME, setting it to GENE_NAME_OR_SYMBOL.
## Unable to find TYPE in the db, removing it.
## Unable to find CHR in the db, removing it.
## Unable to find TXSTRAND in the db, removing it.
## Unable to find TXSTART in the db, removing it.
## Unable to find TXEND in the db, removing it.
## Extracted all gene ids.
## 'select()' returned 1:1 mapping between keys and columns
rownames(lmaj) <- paste0("exon_", make.names(lmaj$gid, unique=TRUE), ".1")
lama <- load_orgdb_annotations("org.Lmexicana.MHOMGT2001U1103.v36.eg.db", keytype="GID")$genes
## Unable to find GENENAME, setting it to GENE_NAME_OR_SYMBOL.
## Unable to find TYPE in the db, removing it.
## Unable to find CHR in the db, removing it.
## Unable to find TXSTRAND in the db, removing it.
## Unable to find TXSTART in the db, removing it.
## Unable to find TXEND in the db, removing it.
## Extracted all gene ids.
## 'select()' returned 1:1 mapping between keys and columns
rownames(lama) <- paste0("exon_", make.names(lama$gid, unique=TRUE), ".1")
lmaj_expt <- create_expt("sample_sheets/lmajor_samples.xlsx", gene_info=lmaj, file_column="fileparasite")
## Reading the sample metadata.
## The sample definitions comprises: 24, 11 rows, columns.
## Reading count tables.
## Reading count tables with read.table().
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0364_Lmajor60_20140329.txt.xz contains 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0366_Lmajor60_20140329.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0368_Lmajor60_20140329.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0370_Lmajor60_20140329.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0372_Lmajor60_20140329.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0374_Lmajor60_20140329.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0376_Lmajor60_20140329.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0378_Lmajor60_20140329.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0380_Lmajor60_20140329.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0382_Lmajor60_20140329.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0397_Lmajor60_20140712.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0399_Lmajor60_20140712.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0401_Lmajor60_20140712.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0403_Lmajor60_20140712.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0405_Lmajor60_20140712.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0456_Lmajor60_20141001.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0459_Lmajor60_20141001.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0463_Lmajor60_20141001.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0467_Lmajor60_20141001.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0494_Lmajor60_20141211.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0497_Lmajor60_20141211.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0501_Lmajor60_20141211.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0505_Lmajor60_20141211.txt.xz contains 9469 rows and merges to 9469 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0509_Lmajor60_20141211.txt.xz contains 9469 rows and merges to 9469 rows.
## Finished reading count tables.
## Matched 8398 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
colnames(lmaj_expt$design)
## [1] "sampleid" "condition" "batch" "genus"
## [5] "species" "strain" "stage" "hpi"
## [9] "selection" "fileparasite" "filehost" "file"
lama_expt <- create_expt("sample_sheets/lamazonensis_samples.xlsx", gene_info=lama, file_column="fileparasite")
## Reading the sample metadata.
## The sample definitions comprises: 15, 10 rows, columns.
## Reading count tables.
## Reading count tables with read.table().
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0435_Lmexicana81_20141001.txt.xz contains 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0437_Lmexicana81_20141001.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0440_Lmexicana81_20141001.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0443_Lmexicana81_20141001.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0446_Lmexicana81_20141001.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0454_Lmexicana81_20141001.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0458_Lmexicana81_20141001.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0462_Lmexicana81_20141001.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0466_Lmexicana81_20141001.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0470_Lmexicana81_20141001.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0492_Lmexicana81_20141211.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0496_Lmexicana81_20141211.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0500_Lmexicana81_20141211.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0504_Lmexicana81_20141211.txt.xz contains 9154 rows and merges to 9154 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lminfectome_2016/preprocessing/dillonl/htseq_HPGL0508_Lmexicana81_20141211.txt.xz contains 9154 rows and merges to 9154 rows.
## Finished reading count tables.
## Matched 8246 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
colnames(lmaj_expt$design)
## [1] "sampleid" "condition" "batch" "genus"
## [5] "species" "strain" "stage" "hpi"
## [9] "selection" "fileparasite" "filehost" "file"
At this point, we should have everything necessary to perform the various analyses of the 4 sub-experiments. So save the current data for reuse elsewhere.
knitr::kable(lmaj_expt$design)
sampleid | condition | batch | genus | species | strain | stage | hpi | selection | fileparasite | filehost | file | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
HPGL0364 | HPGL0364 | metac_pna | a | Leishmania | major | undef | metacyclic | undef | pna | preprocessing/dillonl/htseq_HPGL0364_Lmajor60_20140329.txt.xz | none | null |
HPGL0366 | HPGL0366 | amastLM4 | a | Leishmania | major | undef | amastigote | 4 | none | preprocessing/dillonl/htseq_HPGL0366_Lmajor60_20140329.txt.xz | preprocessing/dillonl/htseq_HPGL0366_hg19_20140329.txt.xz | null |
HPGL0368 | HPGL0368 | amastLM24 | a | Leishmania | major | undef | amastigote | 24 | none | preprocessing/dillonl/htseq_HPGL0368_Lmajor60_20140329.txt.xz | preprocessing/dillonl/htseq_HPGL0366_hg19_20140329.txt.xz | null |
HPGL0370 | HPGL0370 | amastLM48 | a | Leishmania | major | undef | amastigote | 48 | none | preprocessing/dillonl/htseq_HPGL0370_Lmajor60_20140329.txt.xz | preprocessing/dillonl/htseq_HPGL0370_hg19_20140329.txt.xz | null |
HPGL0372 | HPGL0372 | amastLM72 | a | Leishmania | major | undef | amastigote | 72 | none | preprocessing/dillonl/htseq_HPGL0372_Lmajor60_20140329.txt.xz | preprocessing/dillonl/htseq_HPGL0372_hg19_20140329.txt.xz | null |
HPGL0374 | HPGL0374 | metac_pna | b | Leishmania | major | undef | metacyclic | undef | pna | preprocessing/dillonl/htseq_HPGL0374_Lmajor60_20140329.txt.xz | none | null |
HPGL0376 | HPGL0376 | amastLM4 | b | Leishmania | major | undef | amastigote | 4 | none | preprocessing/dillonl/htseq_HPGL0376_Lmajor60_20140329.txt.xz | preprocessing/dillonl/htseq_HPGL0376_hg19_20140329.txt.xz | null |
HPGL0378 | HPGL0378 | amastLM24 | b | Leishmania | major | undef | amastigote | 24 | none | preprocessing/dillonl/htseq_HPGL0378_Lmajor60_20140329.txt.xz | preprocessing/dillonl/htseq_HPGL0378_hg19_20140329.txt.xz | null |
HPGL0380 | HPGL0380 | amastLM48 | b | Leishmania | major | undef | amastigote | 48 | none | preprocessing/dillonl/htseq_HPGL0380_Lmajor60_20140329.txt.xz | preprocessing/dillonl/htseq_HPGL0380_hg19_20140329.txt.xz | null |
HPGL0382 | HPGL0382 | amastLM72 | b | Leishmania | major | undef | amastigote | 72 | none | preprocessing/dillonl/htseq_HPGL0382_Lmajor60_20140329.txt.xz | preprocessing/dillonl/htseq_HPGL0382_hg19_20140329.txt.xz | null |
HPGL0397 | HPGL0397 | metac_pna | c | Leishmania | major | undef | metacyclic | undef | pna | preprocessing/dillonl/htseq_HPGL0397_Lmajor60_20140712.txt.xz | none | null |
HPGL0399 | HPGL0399 | amastLM4 | c | Leishmania | major | undef | amastigote | 4 | none | preprocessing/dillonl/htseq_HPGL0399_Lmajor60_20140712.txt.xz | preprocessing/dillonl/htseq_HPGL0399_hg19_20140329.txt.xz | null |
HPGL0401 | HPGL0401 | amastLM24 | c | Leishmania | major | undef | amastigote | 24 | none | preprocessing/dillonl/htseq_HPGL0401_Lmajor60_20140712.txt.xz | preprocessing/dillonl/htseq_HPGL0401_hg19_20140329.txt.xz | null |
HPGL0403 | HPGL0403 | amastLM48 | c | Leishmania | major | undef | amastigote | 48 | none | preprocessing/dillonl/htseq_HPGL0403_Lmajor60_20140712.txt.xz | preprocessing/dillonl/htseq_HPGL0403_hg19_20140329.txt.xz | null |
HPGL0405 | HPGL0405 | amastLM72 | c | Leishmania | major | undef | amastigote | 72 | none | preprocessing/dillonl/htseq_HPGL0405_Lmajor60_20140712.txt.xz | preprocessing/dillonl/htseq_HPGL0405_hg19_20140329.txt.xz | null |
HPGL0456 | HPGL0456 | metac_pna | e | Leishmania | major | undef | metacyclic | undef | pna | preprocessing/dillonl/htseq_HPGL0456_Lmajor60_20141001.txt.xz | none | null |
HPGL0459 | HPGL0459 | amastLM4 | e | Leishmania | major | undef | amastigote | 4 | none | preprocessing/dillonl/htseq_HPGL0459_Lmajor60_20141001.txt.xz | preprocessing/dillonl/htseq_HPGL0459_hg19_20140329.txt.xz | null |
HPGL0463 | HPGL0463 | amastLM24 | e | Leishmania | major | undef | amastigote | 24 | none | preprocessing/dillonl/htseq_HPGL0463_Lmajor60_20141001.txt.xz | preprocessing/dillonl/htseq_HPGL0463_hg19_20140329.txt.xz | null |
HPGL0467 | HPGL0467 | amastLM48 | e | Leishmania | major | undef | amastigote | 48 | none | preprocessing/dillonl/htseq_HPGL0467_Lmajor60_20141001.txt.xz | preprocessing/dillonl/htseq_HPGL0467_hg19_20140329.txt.xz | null |
HPGL0494 | HPGL0494 | metac_pna | f | Leishmania | major | undef | metacyclic | undef | pna | preprocessing/dillonl/htseq_HPGL0494_Lmajor60_20141211.txt.xz | none | null |
HPGL0497 | HPGL0497 | amastLM4 | f | Leishmania | major | undef | amastigote | 4 | none | preprocessing/dillonl/htseq_HPGL0497_Lmajor60_20141211.txt.xz | preprocessing/dillonl/htseq_HPGL0497_hg19_20140329.txt.xz | null |
HPGL0501 | HPGL0501 | amastLM24 | f | Leishmania | major | undef | amastigote | 24 | none | preprocessing/dillonl/htseq_HPGL0501_Lmajor60_20141211.txt.xz | preprocessing/dillonl/htseq_HPGL0501_hg19_20140329.txt.xz | null |
HPGL0505 | HPGL0505 | amastLM48 | f | Leishmania | major | undef | amastigote | 48 | none | preprocessing/dillonl/htseq_HPGL0505_Lmajor60_20141211.txt.xz | preprocessing/dillonl/htseq_HPGL0505_hg19_20140329.txt.xz | null |
HPGL0509 | HPGL0509 | amastLM72 | f | Leishmania | major | undef | amastigote | 72 | none | preprocessing/dillonl/htseq_HPGL0509_Lmajor60_20141211.txt.xz | preprocessing/dillonl/htseq_HPGL0509_hg19_20140329.txt.xz | null |
knitr::kable(lama_expt$design)
sampleid | condition | batch | genus | species | strain | stage | hpi | fileparasite | filehost | file | |
---|---|---|---|---|---|---|---|---|---|---|---|
HPGL0435 | HPGL0435 | metac | D | Leishmania | amazonensis | undef | metacyclic | undef | preprocessing/dillonl/htseq_HPGL0435_Lmexicana81_20141001.txt.xz | none | null |
HPGL0437 | HPGL0437 | amastLA4 | D | Leishmania | amazonensis | undef | amastigote | 4 | preprocessing/dillonl/htseq_HPGL0437_Lmexicana81_20141001.txt.xz | preprocessing/dillonl/htseq_HPGL0437_hg19_20141001.txt.xz | null |
HPGL0440 | HPGL0440 | amastLA24 | D | Leishmania | amazonensis | undef | amastigote | 24 | preprocessing/dillonl/htseq_HPGL0440_Lmexicana81_20141001.txt.xz | preprocessing/dillonl/htseq_HPGL0440_hg19_20141001.txt.xz | null |
HPGL0443 | HPGL0443 | amastLA48 | D | Leishmania | amazonensis | undef | amastigote | 48 | preprocessing/dillonl/htseq_HPGL0443_Lmexicana81_20141001.txt.xz | preprocessing/dillonl/htseq_HPGL0443_hg19_20141001.txt.xz | null |
HPGL0446 | HPGL0446 | amastLA72 | D | Leishmania | amazonensis | undef | amastigote | 72 | preprocessing/dillonl/htseq_HPGL0446_Lmexicana81_20141001.txt.xz | preprocessing/dillonl/htseq_HPGL0446_hg19_20141001.txt.xz | null |
HPGL0454 | HPGL0454 | metac | E | Leishmania | amazonensis | undef | metacyclic | undef | preprocessing/dillonl/htseq_HPGL0454_Lmexicana81_20141001.txt.xz | none | null |
HPGL0458 | HPGL0458 | amastLA4 | E | Leishmania | amazonensis | undef | amastigote | 4 | preprocessing/dillonl/htseq_HPGL0458_Lmexicana81_20141001.txt.xz | preprocessing/dillonl/htseq_HPGL0458_hg19_20141001.txt.xz | null |
HPGL0462 | HPGL0462 | amastLA24 | E | Leishmania | amazonensis | undef | amastigote | 24 | preprocessing/dillonl/htseq_HPGL0462_Lmexicana81_20141001.txt.xz | preprocessing/dillonl/htseq_HPGL0462_hg19_20141001.txt.xz | null |
HPGL0466 | HPGL0466 | amastLA48 | E | Leishmania | amazonensis | undef | amastigote | 48 | preprocessing/dillonl/htseq_HPGL0466_Lmexicana81_20141001.txt.xz | preprocessing/dillonl/htseq_HPGL0466_hg19_20141001.txt.xz | null |
HPGL0470 | HPGL0470 | amastLA72 | E | Leishmania | amazonensis | undef | amastigote | 72 | preprocessing/dillonl/htseq_HPGL0470_Lmexicana81_20141001.txt.xz | preprocessing/dillonl/htseq_HPGL0470_hg19_20141001.txt.xz | null |
HPGL0492 | HPGL0492 | metac | F | Leishmania | amazonensis | undef | metacyclic | undef | preprocessing/dillonl/htseq_HPGL0492_Lmexicana81_20141211.txt.xz | none | null |
HPGL0496 | HPGL0496 | amastLA4 | F | Leishmania | amazonensis | undef | amastigote | 4 | preprocessing/dillonl/htseq_HPGL0496_Lmexicana81_20141211.txt.xz | preprocessing/dillonl/htseq_HPGL0496_hg19_20141001.txt.xz | null |
HPGL0500 | HPGL0500 | amastLA24 | F | Leishmania | amazonensis | undef | amastigote | 24 | preprocessing/dillonl/htseq_HPGL0500_Lmexicana81_20141211.txt.xz | preprocessing/dillonl/htseq_HPGL0500_hg19_20141001.txt.xz | null |
HPGL0504 | HPGL0504 | amastLA48 | F | Leishmania | amazonensis | undef | amastigote | 48 | preprocessing/dillonl/htseq_HPGL0504_Lmexicana81_20141211.txt.xz | preprocessing/dillonl/htseq_HPGL0504_hg19_20141001.txt.xz | null |
HPGL0508 | HPGL0508 | amastLA72 | F | Leishmania | amazonensis | undef | amastigote | 72 | preprocessing/dillonl/htseq_HPGL0508_Lmexicana81_20141211.txt.xz | preprocessing/dillonl/htseq_HPGL0508_hg19_20141001.txt.xz | null |