1 Annotation version: 20171012

1.1 Genome annotation input

For this analysis, I will use simply the OrganismDbi objects I generated from the Eupathdb.

The data files for these are found in $SCRATCH/rnaseq/eupathdb and should be installed in the R environment.

1.2 OrganismDb

AnnotationHub is the new and fancier version of what OrganismDb does. Keith already made these for the parasites though, lets try and use one of those.

The OrganismDb packages are installable via Keith’s builder: https://github.com/elsayed-lab/eupathdb-organismdb

I did a git pull of it, changed a couple small things and ran ‘make lpanamensis’. After 5 or so minutes a brand new package ‘Leishmania.panamensis.MHOMCOL81L13’ appeared in my R environment.

tmp <- sm(library("org.Lmexicana.MHOMGT2001U1103.v34.eg.db"))
tmp <- sm(library("TxDb.Leishmania.mexicana.MHOMGT2001U1103.TriTrypDB.v34"))
tmp <- sm(library("Leishmania.mexicana.MHOMGT2001U1103.v34"))

##wanted_fields <- c("genedescription", "gid", "cdschrom", "cdsstart", "cdsend", "cdsstrand", "cdsname", "txstart", "stxend")
wanted_fields <- c("genedescription", "txid", "txchrom", "txstart", "txend", "txname", "txstrand", "txtype", "type")
lmex_org <- load_orgdb_annotations(Leishmania.mexicana.MHOMGT2001U1103.v34,
                                   keytype="geneid",
                                   fields = wanted_fields)
## Unable to find GENENAME, setting it to CDSNAME.
## Extracted all gene ids.
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
lmex_genes <- lmex_org[["genes"]]
rownames(lmex_genes) <- paste0("exon_", rownames(lmex_genes), ".1")
rownames(lmex_genes) <- make.names(rownames(lmex_genes), unique=TRUE)
lmex_genes[["length"]] <- lmex_genes[["txend"]] - lmex_genes[["txstart"]]

1.3 Create an expt

lmex_expt <- create_expt("sample_sheets/all_samples.xlsx", gene_info=lmex_genes,
                         ID="sampleid", file_column="filenamelmexcounts")
## Reading the sample metadata.
## The sample definitions comprises: 18, 13 rows, columns.
## Reading count tables.
## Reading count tables with read.table().
## /cbcb/nelsayed-scratch/atb/rnaseq/lmexicana_2017/count_tables/ERR789803_1-trimmed.count.xz contains 9154 rows.
## count_tables/ERR789795_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789792_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789786_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789794_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789788_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789799_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789790_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789797_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789791_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789798_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789800_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789801_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789796_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## count_tables/ERR789787_1-trimmed.count.xz contains 9154 rows and merges to 9154 rows.
## Finished reading count tables.
## Matched 8246 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
lmex_nine_samples <- subset_expt(lmex_expt, subset="batch=='a'")
## There were 15, now there are 9 samples.

message(paste0(“This is hpgltools commit:”, get_git_commit())) this_save <- paste0(gsub(pattern=“\.Rmd”, replace=“”, x=rmd_file), “-v”, ver, “.rda.xz”) message(paste0(“Saving to”, this_save)) tmp <- sm(saveme(filename=this_save)) ```

LS0tCnRpdGxlOiAiTC5tZXhpY2FuYSAyMDE3OiBBbm5vdGF0aW9uIGRhdGEuIgphdXRob3I6ICJhdGIgYWJlbGV3QGdtYWlsLmNvbSIKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiBodG1sX2RvY3VtZW50OgogIGNvZGVfZG93bmxvYWQ6IHRydWUKICBjb2RlX2ZvbGRpbmc6IHNob3cKICBmaWdfY2FwdGlvbjogdHJ1ZQogIGZpZ19oZWlnaHQ6IDcKICBmaWdfd2lkdGg6IDcKICBoaWdobGlnaHQ6IGRlZmF1bHQKICBrZWVwX21kOiBmYWxzZQogIG1vZGU6IHNlbGZjb250YWluZWQKICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICBzZWxmX2NvbnRhaW5lZDogdHJ1ZQogIHRoZW1lOiByZWFkYWJsZQogIHRvYzogdHJ1ZQogIHRvY19mbG9hdDoKICAgIGNvbGxhcHNlZDogZmFsc2UKICAgIHNtb290aF9zY3JvbGw6IGZhbHNlCi0tLQoKPHN0eWxlPgogIGJvZHkgLm1haW4tY29udGFpbmVyIHsKICAgIG1heC13aWR0aDogMTYwMHB4OwogIH0KPC9zdHlsZT4KCmBgYHtyIG9wdGlvbnMsIGluY2x1ZGU9RkFMU0V9CmlmICghaXNUUlVFKGdldDAoInNraXBfbG9hZCIpKSkgewogIGxpYnJhcnkoaHBnbHRvb2xzKQogIHR0IDwtIGRldnRvb2xzOjpsb2FkX2FsbCgifi9ocGdsdG9vbHMiKQogIGtuaXRyOjpvcHRzX2tuaXQkc2V0KHByb2dyZXNzPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgICAgdmVyYm9zZT1UUlVFLAogICAgICAgICAgICAgICAgICAgICAgIHdpZHRoPTkwLAogICAgICAgICAgICAgICAgICAgICAgIGVjaG89VFJVRSkKICBrbml0cjo6b3B0c19jaHVuayRzZXQoZXJyb3I9VFJVRSwKICAgICAgICAgICAgICAgICAgICAgICAgZmlnLndpZHRoPTgsCiAgICAgICAgICAgICAgICAgICAgICAgIGZpZy5oZWlnaHQ9OCwKICAgICAgICAgICAgICAgICAgICAgICAgZHBpPTk2KQogIG9sZF9vcHRpb25zIDwtIG9wdGlvbnMoZGlnaXRzPTQsCiAgICAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICAgICAga25pdHIuZHVwbGljYXRlLmxhYmVsPSJhbGxvdyIpCiAgZ2dwbG90Mjo6dGhlbWVfc2V0KGdncGxvdDI6OnRoZW1lX2J3KGJhc2Vfc2l6ZT0xMCkpCiAgdmVyIDwtICIyMDE3MTAxMiIKICBwcmV2aW91c19maWxlIDwtICJpbmRleC5SbWQiCgogIHRtcCA8LSB0cnkoc20obG9hZG1lKGZpbGVuYW1lPXBhc3RlMChnc3ViKHBhdHRlcm49IlxcLlJtZCIsIHJlcGxhY2U9IiIsIHg9cHJldmlvdXNfZmlsZSksICItdiIsIHZlciwgIi5yZGEueHoiKSkpKQogIHJtZF9maWxlIDwtICIwMV9hbm5vdGF0aW9uLlJtZCIKfQpgYGAKCiMgQW5ub3RhdGlvbiB2ZXJzaW9uOiBgciB2ZXJgCgojIyBHZW5vbWUgYW5ub3RhdGlvbiBpbnB1dAoKRm9yIHRoaXMgYW5hbHlzaXMsIEkgd2lsbCB1c2Ugc2ltcGx5IHRoZSBPcmdhbmlzbURiaSBvYmplY3RzIEkgZ2VuZXJhdGVkIGZyb20gdGhlIEV1cGF0aGRiLgoKVGhlIGRhdGEgZmlsZXMgZm9yIHRoZXNlIGFyZSBmb3VuZCBpbiAkU0NSQVRDSC9ybmFzZXEvZXVwYXRoZGIgYW5kIHNob3VsZCBiZQppbnN0YWxsZWQgaW4gdGhlIFIgZW52aXJvbm1lbnQuCgojIyBPcmdhbmlzbURiCgpBbm5vdGF0aW9uSHViIGlzIHRoZSBuZXcgYW5kIGZhbmNpZXIgdmVyc2lvbiBvZiB3aGF0IE9yZ2FuaXNtRGIgZG9lcy4gIEtlaXRoIGFscmVhZHkKbWFkZSB0aGVzZSBmb3IgdGhlIHBhcmFzaXRlcyB0aG91Z2gsIGxldHMgdHJ5IGFuZCB1c2Ugb25lIG9mIHRob3NlLgoKVGhlIE9yZ2FuaXNtRGIgcGFja2FnZXMgYXJlIGluc3RhbGxhYmxlIHZpYSBLZWl0aCdzIGJ1aWxkZXI6Cmh0dHBzOi8vZ2l0aHViLmNvbS9lbHNheWVkLWxhYi9ldXBhdGhkYi1vcmdhbmlzbWRiCgpJIGRpZCBhIGdpdCBwdWxsIG9mIGl0LCBjaGFuZ2VkIGEgY291cGxlIHNtYWxsIHRoaW5ncyBhbmQgcmFuICdtYWtlIGxwYW5hbWVuc2lzJy4KQWZ0ZXIgNSBvciBzbyBtaW51dGVzIGEgYnJhbmQgbmV3IHBhY2thZ2UgJ0xlaXNobWFuaWEucGFuYW1lbnNpcy5NSE9NQ09MODFMMTMnCmFwcGVhcmVkIGluIG15IFIgZW52aXJvbm1lbnQuCgpgYGB7ciBsbWV4X29yZ2RifQp0bXAgPC0gc20obGlicmFyeSgib3JnLkxtZXhpY2FuYS5NSE9NR1QyMDAxVTExMDMudjM0LmVnLmRiIikpCnRtcCA8LSBzbShsaWJyYXJ5KCJUeERiLkxlaXNobWFuaWEubWV4aWNhbmEuTUhPTUdUMjAwMVUxMTAzLlRyaVRyeXBEQi52MzQiKSkKdG1wIDwtIHNtKGxpYnJhcnkoIkxlaXNobWFuaWEubWV4aWNhbmEuTUhPTUdUMjAwMVUxMTAzLnYzNCIpKQoKIyN3YW50ZWRfZmllbGRzIDwtIGMoImdlbmVkZXNjcmlwdGlvbiIsICJnaWQiLCAiY2RzY2hyb20iLCAiY2Rzc3RhcnQiLCAiY2RzZW5kIiwgImNkc3N0cmFuZCIsICJjZHNuYW1lIiwgInR4c3RhcnQiLCAic3R4ZW5kIikKd2FudGVkX2ZpZWxkcyA8LSBjKCJnZW5lZGVzY3JpcHRpb24iLCAidHhpZCIsICJ0eGNocm9tIiwgInR4c3RhcnQiLCAidHhlbmQiLCAidHhuYW1lIiwgInR4c3RyYW5kIiwgInR4dHlwZSIsICJ0eXBlIikKbG1leF9vcmcgPC0gbG9hZF9vcmdkYl9hbm5vdGF0aW9ucyhMZWlzaG1hbmlhLm1leGljYW5hLk1IT01HVDIwMDFVMTEwMy52MzQsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAga2V5dHlwZT0iZ2VuZWlkIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmaWVsZHMgPSB3YW50ZWRfZmllbGRzKQpsbWV4X2dlbmVzIDwtIGxtZXhfb3JnW1siZ2VuZXMiXV0Kcm93bmFtZXMobG1leF9nZW5lcykgPC0gcGFzdGUwKCJleG9uXyIsIHJvd25hbWVzKGxtZXhfZ2VuZXMpLCAiLjEiKQpyb3duYW1lcyhsbWV4X2dlbmVzKSA8LSBtYWtlLm5hbWVzKHJvd25hbWVzKGxtZXhfZ2VuZXMpLCB1bmlxdWU9VFJVRSkKbG1leF9nZW5lc1tbImxlbmd0aCJdXSA8LSBsbWV4X2dlbmVzW1sidHhlbmQiXV0gLSBsbWV4X2dlbmVzW1sidHhzdGFydCJdXQpgYGAKCiMjIENyZWF0ZSBhbiBleHB0CgpgYGB7ciBjcmVhdGVfZXhwdH0KbG1leF9leHB0IDwtIGNyZWF0ZV9leHB0KCJzYW1wbGVfc2hlZXRzL2FsbF9zYW1wbGVzLnhsc3giLCBnZW5lX2luZm89bG1leF9nZW5lcywKICAgICAgICAgICAgICAgICAgICAgICAgIElEPSJzYW1wbGVpZCIsIGZpbGVfY29sdW1uPSJmaWxlbmFtZWxtZXhjb3VudHMiKQpsbWV4X25pbmVfc2FtcGxlcyA8LSBzdWJzZXRfZXhwdChsbWV4X2V4cHQsIHN1YnNldD0iYmF0Y2g9PSdhJyIpCmBgYAoKCm1lc3NhZ2UocGFzdGUwKCJUaGlzIGlzIGhwZ2x0b29scyBjb21taXQ6ICIsIGdldF9naXRfY29tbWl0KCkpKQp0aGlzX3NhdmUgPC0gcGFzdGUwKGdzdWIocGF0dGVybj0iXFwuUm1kIiwgcmVwbGFjZT0iIiwgeD1ybWRfZmlsZSksICItdiIsIHZlciwgIi5yZGEueHoiKQptZXNzYWdlKHBhc3RlMCgiU2F2aW5nIHRvICIsIHRoaXNfc2F2ZSkpCnRtcCA8LSBzbShzYXZlbWUoZmlsZW5hbWU9dGhpc19zYXZlKSkKYGBgCg==