1 Annotation version: 20180119

1.1 Biomart

biomart is pretty reliable for getting mouse annotation data.

annotation_file <- "reference/ixodes_exons.gff"
annot_df <- load_gff_annotations(annotation_file)
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=FALSE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=FALSE)
## Returning a df with 13 columns and 89845 rows.
annot_df$ID <- gsub("\\-", "\\.", annot_df$ID, perl=TRUE)
annot_df$Parent <- gsub("\\-RA","", annot_df$Parent)
rownames(annot_df) <- annot_df$ID

description_file <- "reference/ixodes_mRNA.gff"
description_df <- load_gff_annotations(description_file)
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=FALSE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=FALSE)
## Returning a df with 13 columns and 20486 rows.
descriptions <- merge(annot_df, description_df, by="Parent")
descriptions <- descriptions[,c("ID.x","width.x","description.y")]
rownames(descriptions) <- descriptions$ID.x
colnames(descriptions) <- c("ID","width","description")
isc_biomart <- load_biomart_annotations(species="iscapularis",
                                        host="metazoa.ensembl.org",
                                        include_lengths=TRUE)
## The biomart annotations file already exists, loading from it.
isc_biomart <- isc_biomart$annotation

ixo_expt <- create_expt(metadata="sample_sheets/gut_samples.xlsx", gene_info=isc_biomart)
## Reading the sample metadata.
## The sample definitions comprises: 10, 22 rows, columns.
## Reading count tables.
## Reading count tables with read.table().
## /cbcb/nelsayed-scratch/atb/rnaseq/iscapularis_2017/preprocessing/hpgl0612/outputs/tophat_iscapularis/accepted_hits.count.xz contains 20491 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/iscapularis_2017/preprocessing/hpgl0613/outputs/tophat_iscapularis/accepted_hits.count.xz contains 20491 rows and merges to 20491 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/iscapularis_2017/preprocessing/hpgl0614/outputs/tophat_iscapularis/accepted_hits.count.xz contains 20491 rows and merges to 20491 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/iscapularis_2017/preprocessing/hpgl0615/outputs/tophat_iscapularis/accepted_hits.count.xz contains 20491 rows and merges to 20491 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/iscapularis_2017/preprocessing/hpgl0616/outputs/tophat_iscapularis/accepted_hits.count.xz contains 20491 rows and merges to 20491 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/iscapularis_2017/preprocessing/hpgl0617/outputs/tophat_iscapularis/accepted_hits.count.xz contains 20491 rows and merges to 20491 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/iscapularis_2017/preprocessing/hpgl0689/outputs/tophat_iscapularis/accepted_hits.count.xz contains 20491 rows and merges to 20491 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/iscapularis_2017/preprocessing/hpgl0690/outputs/tophat_iscapularis/accepted_hits.count.xz contains 20491 rows and merges to 20491 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/iscapularis_2017/preprocessing/hpgl0691/outputs/tophat_iscapularis/accepted_hits.count.xz contains 20491 rows and merges to 20491 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/iscapularis_2017/preprocessing/hpgl0692/outputs/tophat_iscapularis/accepted_hits.count.xz contains 20491 rows and merges to 20491 rows.
## Finished reading count tables.
## Matched 20486 annotations and counts.
## Bringing together the count matrix and gene information.
pander::pander(sessionInfo())

R version 3.4.4 (2018-03-15)

**Platform:** x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C

attached base packages: stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: hpgltools(v.2018.03)

loaded via a namespace (and not attached): Rcpp(v.0.12.16), RColorBrewer(v.1.1-2), GenomeInfoDb(v.1.14.0), XVector(v.0.18.0), compiler(v.3.4.4), pillar(v.1.2.1), plyr(v.1.8.4), base64enc(v.0.1-3), bitops(v.1.0-6), iterators(v.1.0.9), tools(v.3.4.4), zlibbioc(v.1.24.0), digest(v.0.6.15), lattice(v.0.20-35), evaluate(v.0.10.1), memoise(v.1.1.0), tibble(v.1.4.2), gtable(v.0.2.0), rlang(v.0.2.0.9001), openxlsx(v.4.0.17), Matrix(v.1.2-14), foreach(v.1.4.4), DelayedArray(v.0.4.1), commonmark(v.1.4), yaml(v.2.1.18), parallel(v.3.4.4), GenomeInfoDbData(v.1.0.0), rtracklayer(v.1.38.3), withr(v.2.1.2), stringr(v.1.3.0), roxygen2(v.6.0.1), xml2(v.1.2.0), knitr(v.1.20), Biostrings(v.2.46.0), IRanges(v.2.12.0), S4Vectors(v.0.16.0), devtools(v.1.13.5), stats4(v.3.4.4), rprojroot(v.1.3-2), grid(v.3.4.4), data.table(v.1.10.4-3), Biobase(v.2.38.0), R6(v.2.2.2), BiocParallel(v.1.12.0), XML(v.3.98-1.11), rmarkdown(v.1.9), pander(v.0.6.1), ggplot2(v.2.2.1), magrittr(v.1.5), matrixStats(v.0.53.1), GenomicAlignments(v.1.14.2), Rsamtools(v.1.30.0), GenomicRanges(v.1.30.3), backports(v.1.1.2), scales(v.0.5.0.9000), codetools(v.0.2-15), htmltools(v.0.3.6), BiocGenerics(v.0.24.0), SummarizedExperiment(v.1.8.1), colorspace(v.1.3-2), stringi(v.1.1.7), RCurl(v.1.95-4.10), lazyeval(v.0.2.1) and munsell(v.0.4.3)

message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 1b009834267dea125ee94934203413fbd606e783
## R> packrat::restore()
## This is hpgltools commit: Mon Apr 23 14:59:56 2018 -0400: 1b009834267dea125ee94934203413fbd606e783
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to 01_annotation_iscapularis-v20180119.rda.xz
tmp <- sm(saveme(filename=this_save))
LS0tCnRpdGxlOiAiSS5zY2FwdWxhcmlzIDIwMTc6IFRpY2sgQW5ub3RhdGlvbiBJbmZvcm1hdGlvbi4iCmF1dGhvcjogImF0YiBhYmVsZXdAZ21haWwuY29tIgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKIGh0bWxfZG9jdW1lbnQ6CiAgY29kZV9kb3dubG9hZDogdHJ1ZQogIGNvZGVfZm9sZGluZzogc2hvdwogIGZpZ19jYXB0aW9uOiB0cnVlCiAgZmlnX2hlaWdodDogNwogIGZpZ193aWR0aDogNwogIGhpZ2hsaWdodDogZGVmYXVsdAogIGtlZXBfbWQ6IGZhbHNlCiAgbW9kZTogc2VsZmNvbnRhaW5lZAogIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogIHNlbGZfY29udGFpbmVkOiB0cnVlCiAgdGhlbWU6IHJlYWRhYmxlCiAgdG9jOiB0cnVlCiAgdG9jX2Zsb2F0OgogICAgY29sbGFwc2VkOiBmYWxzZQogICAgc21vb3RoX3Njcm9sbDogZmFsc2UKLS0tCgo8c3R5bGU+CiAgYm9keSAubWFpbi1jb250YWluZXIgewogICAgbWF4LXdpZHRoOiAxNjAwcHg7Cn0KPC9zdHlsZT4KCmBgYHtyIG9wdGlvbnMsIGluY2x1ZGU9RkFMU0V9CnR0IDwtIGRldnRvb2xzOjpsb2FkX2FsbCgifi9ocGdsdG9vbHMiKQprbml0cjo6b3B0c19rbml0JHNldChwcm9ncmVzcz1UUlVFLAogICAgICAgICAgICAgICAgICAgICB2ZXJib3NlPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgIHdpZHRoPTkwLAogICAgICAgICAgICAgICAgICAgICBlY2hvPVRSVUUpCmtuaXRyOjpvcHRzX2NodW5rJHNldChlcnJvcj1UUlVFLAogICAgICAgICAgICAgICAgICAgICAgZmlnLndpZHRoPTgsCiAgICAgICAgICAgICAgICAgICAgICBmaWcuaGVpZ2h0PTgsCiAgICAgICAgICAgICAgICAgICAgICBkcGk9OTYpCm9sZF9vcHRpb25zIDwtIG9wdGlvbnMoZGlnaXRzPTQsCiAgICAgICAgc3RyaW5nc0FzRmFjdG9ycz1GQUxTRSwKICAgICAgICBrbml0ci5kdXBsaWNhdGUubGFiZWw9ImFsbG93IikKZ2dwbG90Mjo6dGhlbWVfc2V0KGdncGxvdDI6OnRoZW1lX2J3KGJhc2Vfc2l6ZT0xMCkpCnNldC5zZWVkKDEpCnZlciA8LSAiMjAxODAxMTkiCnByZXZpb3VzX2ZpbGUgPC0gImluZGV4LlJtZCIKCnRtcCA8LSB0cnkoc20obG9hZG1lKGZpbGVuYW1lPXBhc3RlMChnc3ViKHBhdHRlcm49IlxcLlJtZCIsIHJlcGxhY2U9IiIsIHg9cHJldmlvdXNfZmlsZSksICItdiIsIHZlciwgIi5yZGEueHoiKSkpKQoKcm1kX2ZpbGUgPC0gIjAxX2Fubm90YXRpb25faXNjYXB1bGFyaXMuUm1kIgpgYGAKCiMgQW5ub3RhdGlvbiB2ZXJzaW9uOiBgciB2ZXJgCgojIyBCaW9tYXJ0CgpiaW9tYXJ0IGlzIHByZXR0eSByZWxpYWJsZSBmb3IgZ2V0dGluZyBtb3VzZSBhbm5vdGF0aW9uIGRhdGEuCgpgYGB7ciBwcmV2aW91c19hbm5vdH0KYW5ub3RhdGlvbl9maWxlIDwtICJyZWZlcmVuY2UvaXhvZGVzX2V4b25zLmdmZiIKYW5ub3RfZGYgPC0gbG9hZF9nZmZfYW5ub3RhdGlvbnMoYW5ub3RhdGlvbl9maWxlKQphbm5vdF9kZiRJRCA8LSBnc3ViKCJcXC0iLCAiXFwuIiwgYW5ub3RfZGYkSUQsIHBlcmw9VFJVRSkKYW5ub3RfZGYkUGFyZW50IDwtIGdzdWIoIlxcLVJBIiwiIiwgYW5ub3RfZGYkUGFyZW50KQpyb3duYW1lcyhhbm5vdF9kZikgPC0gYW5ub3RfZGYkSUQKCmRlc2NyaXB0aW9uX2ZpbGUgPC0gInJlZmVyZW5jZS9peG9kZXNfbVJOQS5nZmYiCmRlc2NyaXB0aW9uX2RmIDwtIGxvYWRfZ2ZmX2Fubm90YXRpb25zKGRlc2NyaXB0aW9uX2ZpbGUpCgpkZXNjcmlwdGlvbnMgPC0gbWVyZ2UoYW5ub3RfZGYsIGRlc2NyaXB0aW9uX2RmLCBieT0iUGFyZW50IikKZGVzY3JpcHRpb25zIDwtIGRlc2NyaXB0aW9uc1ssYygiSUQueCIsIndpZHRoLngiLCJkZXNjcmlwdGlvbi55IildCnJvd25hbWVzKGRlc2NyaXB0aW9ucykgPC0gZGVzY3JpcHRpb25zJElELngKY29sbmFtZXMoZGVzY3JpcHRpb25zKSA8LSBjKCJJRCIsIndpZHRoIiwiZGVzY3JpcHRpb24iKQpgYGAKCmBgYHtyIGRhdGFfaW1wb3J0fQppc2NfYmlvbWFydCA8LSBsb2FkX2Jpb21hcnRfYW5ub3RhdGlvbnMoc3BlY2llcz0iaXNjYXB1bGFyaXMiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaG9zdD0ibWV0YXpvYS5lbnNlbWJsLm9yZyIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBpbmNsdWRlX2xlbmd0aHM9VFJVRSkKaXNjX2Jpb21hcnQgPC0gaXNjX2Jpb21hcnQkYW5ub3RhdGlvbgoKaXhvX2V4cHQgPC0gY3JlYXRlX2V4cHQobWV0YWRhdGE9InNhbXBsZV9zaGVldHMvZ3V0X3NhbXBsZXMueGxzeCIsIGdlbmVfaW5mbz1pc2NfYmlvbWFydCkKYGBgCgpgYGB7ciBzYXZlbWV9CnBhbmRlcjo6cGFuZGVyKHNlc3Npb25JbmZvKCkpCm1lc3NhZ2UocGFzdGUwKCJUaGlzIGlzIGhwZ2x0b29scyBjb21taXQ6ICIsIGdldF9naXRfY29tbWl0KCkpKQp0aGlzX3NhdmUgPC0gcGFzdGUwKGdzdWIocGF0dGVybj0iXFwuUm1kIiwgcmVwbGFjZT0iIiwgeD1ybWRfZmlsZSksICItdiIsIHZlciwgIi5yZGEueHoiKQptZXNzYWdlKHBhc3RlMCgiU2F2aW5nIHRvICIsIHRoaXNfc2F2ZSkpCnRtcCA8LSBzbShzYXZlbWUoZmlsZW5hbWU9dGhpc19zYXZlKSkKYGBgCg==