1 Annotation version: 20180402

2 Leishmania major annotation data

There are a few methods of importing annotation data into R. I will attempt some of them in preparation for loading them into the L.major RNASeq data.

3 AnnotationHub: loading OrgDb

AnnotationHub is a newer service and has promise to be an excellent top-level resource for gathering annotation data.

tmp <- sm(library(AnnotationHub))
ah = sm(AnnotationHub())
orgdbs <- sm(query(ah, "OrgDb"))
lm_orgdb <- sm(query(ah, c("OrgDB", "Leishmania")))
lm_orgdb
## AnnotationHub with 4 records
## # snapshotDate(): 2017-10-27 
## # $dataprovider: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/
## # $species: Leishmania donovani, Leishmania major_strain_Friedlin, Leis...
## # $rdataclass: OrgDb
## # additional mcols(): taxonomyid, genome, description,
## #   coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## #   tags, rdatapath, sourceurl, sourcetype 
## # retrieve records with, e.g., 'object[["AH59612"]]' 
## 
##             title                                               
##   AH59612 | org.Leishmania_major_strain_Friedlin.eg.sqlite      
##   AH59675 | org.Leishmania_mexicana_MHOM|GT|2001|U1103.eg.sqlite
##   AH59676 | org.Leishmania_donovani.eg.sqlite                   
##   AH59684 | org.Leishmania_panamensis.eg.sqlite
lm_orgdb <- ah[["AH59612"]]
## loading from cache '/home/trey//.AnnotationHub/66358'
lm_orgdb
## OrgDb object:
## | DBSCHEMAVERSION: 2.1
## | DBSCHEMA: NOSCHEMA_DB
## | ORGANISM: Leishmania major_strain_Friedlin
## | SPECIES: Leishmania major_strain_Friedlin
## | CENTRALID: GID
## | Taxonomy ID: 347515
## | Db type: OrgDb
## | Supporting package: AnnotationDbi
## 
## Please see: help('select') for usage information
## Holy crap it worked!
lm_annotv1 <- load_orgdb_annotations(lm_orgdb,
                                     keytype="entrezid",
                                     fields=c("entrezid", "alias", "genename", "refseq", "symbol"))
## Unable to find TYPE in the db, removing it.
## Unable to find TXSTRAND in the db, removing it.
## Unable to find TXSTART in the db, removing it.
## Unable to find TXEND in the db, removing it.
## Extracted all gene ids.
## 'select()' returned 1:many mapping between keys and columns
summary(lm_annotv1)
##             Length Class      Mode
## genes       6      data.frame list
## transcripts 0      -none-     NULL
lm_annotv1 <- lm_annotv1[["genes"]]
head(lm_annotv1)
##             entrezid genename chr              alias         refseq
## X12980396   12980396    ncRNA  20 LMJF_20_snoRNA0211 XR_002460236.1
## X12980396.1 12980396    ncRNA  20 LMJF_20_snoRNA0211 XR_002460237.1
## X12980396.2 12980396    ncRNA  20        LM20Cs1C1.1 XR_002460236.1
## X12980396.3 12980396    ncRNA  20        LM20Cs1C1.1 XR_002460237.1
## X12980397   12980397    ncRNA  20 LMJF_20_snoRNA0220 XR_002460308.1
## X12980397.1 12980397    ncRNA  20 LMJF_20_snoRNA0220 XR_002460309.1
##                   symbol
## X12980396    LM20Cs1C1.1
## X12980396.1  LM20Cs1C1.1
## X12980396.2  LM20Cs1C1.1
## X12980396.3  LM20Cs1C1.1
## X12980397   LM20Cs1C1.10
## X12980397.1 LM20Cs1C1.10

4 Loading from biomart

A completely separate and competing annotation source is biomart.

lm_annotv2 <- sm(load_biomart_annotations(species="lmajor", host="protists.ensembl.org"))$annotation
head(lm_annotv2)
##                        transcriptID       geneID
## LmjF.01.0010.mRNA LmjF.01.0010:mRNA LmjF.01.0010
## LmjF.01.0020.mRNA LmjF.01.0020:mRNA LmjF.01.0020
## LmjF.01.0030.mRNA LmjF.01.0030:mRNA LmjF.01.0030
## LmjF.01.0040.mRNA LmjF.01.0040:mRNA LmjF.01.0040
## LmjF.01.0050.mRNA LmjF.01.0050:mRNA LmjF.01.0050
## LmjF.01.0060.mRNA LmjF.01.0060:mRNA LmjF.01.0060
##                                                                               Description
## LmjF.01.0010.mRNA hypothetical protein, unknown function.[Source:GeneDB;Acc:LmjF.01.0010]
## LmjF.01.0020.mRNA        hypothetical protein, conserved.[Source:GeneDB;Acc:LmjF.01.0020]
## LmjF.01.0030.mRNA            MCAK-like kinesin, putative.[Source:GeneDB;Acc:LmjF.01.0030]
## LmjF.01.0040.mRNA hypothetical protein, unknown function.[Source:GeneDB;Acc:LmjF.01.0040]
## LmjF.01.0050.mRNA                  carboxylase, putative.[Source:GeneDB;Acc:LmjF.01.0050]
## LmjF.01.0060.mRNA        hypothetical protein, conserved.[Source:GeneDB;Acc:LmjF.01.0060]
##                             Type length chromosome strand start   end
## LmjF.01.0010.mRNA protein_coding    999          1     -1  3704  4702
## LmjF.01.0020.mRNA protein_coding   1650          1     -1  5790  7439
## LmjF.01.0030.mRNA protein_coding   2007          1     -1  9061 11067
## LmjF.01.0040.mRNA protein_coding    570          1     -1 12073 12642
## LmjF.01.0050.mRNA protein_coding   1998          1     -1 15025 17022
## LmjF.01.0060.mRNA protein_coding    750          1     -1 18137 18886
lm_ontology <- sm(load_biomart_go("lmajor", host="protists.ensembl.org"))

5 Load from the tritrypdb

The hpgltools package has some improved methods for collecting annotation information directly from the eupathdb webservices api. It does this by first downloading all the available data for a given species and then creating a sqlite orgdb instance from them.

5.1 Example orgdb creation

The creation of orgdbs takes a long time, so here is an example invocation.

testing_major <- make_eupath_organismdbi("major")

Assuming the above packages got created, we may load them and extract the annotation data.

major_names <- get_eupath_pkgnames("major")
## Starting metadata download.
## Finished metadata download.
## Found the following hits: Leishmania major strain Friedlin, Leishmania major strain LV39c5, Leishmania major strain SD 75.1, choosing the first.
major_names$orgdb
## [1] "org.Lmajor.Friedlin.v36.eg.db"
wanted_fields <- c("cds_length", "chromosome", "entrez_gene_id" , "gene_name_or_symbol",
                   "gene_strand", "gid", "go_go_id", "go_go_term_name", "go_ontology",
                   "interpro_description" ,"interpro_e_value", "type_gene_type")
lm_org <- load_orgdb_annotations("org.Lmajor.Friedlin.v36.eg.db", keytype="gid", fields=wanted_fields)$genes
## Unable to find GENENAME, setting it to GENE_NAME_OR_SYMBOL.
## Unable to find TYPE in the db, removing it.
## Unable to find CHR in the db, removing it.
## Unable to find TXSTRAND in the db, removing it.
## Unable to find TXSTART in the db, removing it.
## Unable to find TXEND in the db, removing it.
## Extracted all gene ids.
## 'select()' returned 1:many mapping between keys and columns
knitr::kable(head(lm_org))
gid gene_name_or_symbol cds_length chromosome entrez_gene_id gene_strand go_go_id go_go_term_name go_ontology interpro_description interpro_e_value type_gene_type
LmjF.01.0010 LmjF.01.0010 999.0 1 reverse NA NA NA Protein of unknown function (DUF2946) 7.7e-18 protein coding
LmjF.01.0010.1 LmjF.01.0010 999.0 1 reverse NA NA NA Prokaryotic membrane lipoprotein lipid attachment site profile 5.0 protein coding
LmjF.01.0020 LmjF.01.0020 1650.0 1 reverse NA NA NA Endonuclease/Exonuclease/phosphatase family 1.3e-10 protein coding
LmjF.01.0020.1 LmjF.01.0020 1650.0 1 reverse NA NA NA DNase I-like 7.9e-21 protein coding
LmjF.01.0030 LmjF.01.0030 KIN13-1 2007.0 1 reverse GO:0007018 microtubule-based movement Biological Process Kinesin motor domain 1.2e-94 protein coding
LmjF.01.0030.1 LmjF.01.0030 KIN13-1 2007.0 1 reverse GO:0007018 microtubule-based movement Biological Process Kinesin motor domain profile 43.0 protein coding

6 Read a gff file

In contrast, it is possible to load most annotations of interest directly from the gff files used in the alignments.

## The old way of getting genome/annotation data
lm_gff <- "reference/lmajor.gff.gz"
lm_gff_annotations <- load_gff_annotations(lm_gff, type="gene")
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Returning a df with 24 columns and 9379 rows.
rownames(lm_gff_annotations) <- make.names(lm_gff_annotations$Name, unique=TRUE)
head(lm_gff_annotations)
##              seqnames start   end width strand    source type score phase
## LmjF.01.0010  LmjF.01  3704  4702   999      - TriTrypDB gene    NA    NA
## LmjF.01.0020  LmjF.01  5790  7439  1650      - TriTrypDB gene    NA    NA
## LmjF.01.0030  LmjF.01  9061 11067  2007      - TriTrypDB gene    NA    NA
## LmjF.01.0040  LmjF.01 12073 12642   570      - TriTrypDB gene    NA    NA
## LmjF.01.0050  LmjF.01 15025 17022  1998      - TriTrypDB gene    NA    NA
## LmjF.01.0060  LmjF.01 18137 18886   750      - TriTrypDB gene    NA    NA
##                        ID         Name
## LmjF.01.0010 LmjF.01.0010 LmjF.01.0010
## LmjF.01.0020 LmjF.01.0020 LmjF.01.0020
## LmjF.01.0030 LmjF.01.0030 LmjF.01.0030
## LmjF.01.0040 LmjF.01.0040 LmjF.01.0040
## LmjF.01.0050 LmjF.01.0050 LmjF.01.0050
## LmjF.01.0060 LmjF.01.0060 LmjF.01.0060
##                                         description size       web_id
## LmjF.01.0010 hypothetical+protein,+unknown+function  999 LmjF.01.0010
## LmjF.01.0020        hypothetical+protein,+conserved 1650 LmjF.01.0020
## LmjF.01.0030       Kinesin-13+1,+putative+(KIN13-1) 2007 LmjF.01.0030
## LmjF.01.0040 hypothetical+protein,+unknown+function  570 LmjF.01.0040
## LmjF.01.0050                  carboxylase,+putative 1998 LmjF.01.0050
## LmjF.01.0060        hypothetical+protein,+conserved  750 LmjF.01.0060
##              molecule_type organism_name translation_table topology
## LmjF.01.0010          <NA>          <NA>              <NA>     <NA>
## LmjF.01.0020          <NA>          <NA>              <NA>     <NA>
## LmjF.01.0030          <NA>          <NA>              <NA>     <NA>
## LmjF.01.0040          <NA>          <NA>              <NA>     <NA>
## LmjF.01.0050          <NA>          <NA>              <NA>     <NA>
## LmjF.01.0060          <NA>          <NA>              <NA>     <NA>
##              localization Dbxref    locus_tag
## LmjF.01.0010         <NA>        LmjF.01.0010
## LmjF.01.0020         <NA>        LmjF.01.0020
## LmjF.01.0030         <NA>        LmjF.01.0030
## LmjF.01.0040         <NA>        LmjF.01.0040
## LmjF.01.0050         <NA>        LmjF.01.0050
## LmjF.01.0060         <NA>        LmjF.01.0060
##                                                                                                                          Alias
## LmjF.01.0010                    321438052, 389592307, LmjF1.0010, LmjF01.0010, LmjF.01.0010, LmjF01.0010:pep, LmjF01.0010:mRNA
## LmjF.01.0020                    321438053, 389592309, LmjF1.0020, LmjF01.0020, LmjF.01.0020, LmjF01.0020:pep, LmjF01.0020:mRNA
## LmjF.01.0030 KIN13-1, Kif-13-1, 321438054, 389592311, LmjF1.0030, LmjF01.0030, LmjF.01.0030, LmjF01.0030:pep, LmjF01.0030:mRNA
## LmjF.01.0040                    321438055, 389592313, LmjF1.0040, LmjF01.0040, LmjF.01.0040, LmjF01.0040:pep, LmjF01.0040:mRNA
## LmjF.01.0050                    321438056, 389592315, LmjF1.0050, LmjF01.0050, LmjF.01.0050, LmjF01.0050:pep, LmjF01.0050:mRNA
## LmjF.01.0060                    321438057, 389592317, LmjF1.0060, LmjF01.0060, LmjF.01.0060, LmjF01.0060:pep, LmjF01.0060:mRNA
##              Parent Ontology_term
## LmjF.01.0010                     
## LmjF.01.0020                     
## LmjF.01.0030                     
## LmjF.01.0040                     
## LmjF.01.0050                     
## LmjF.01.0060

7 Putting the pieces together

In the following block we create an expressionset using the sample sheet and the annotations.

Annoyingly, the gff annotations are keyed in a peculiar fashion. Therefore I need to do a little work to merge them.

lm_annotations <- lm_annotv2
rownames(lm_annotations) <- paste0("exon_", rownames(lm_annotations), "1")
rownames(lm_annotations) <- gsub(pattern="mRNA", replacement="", x=rownames(lm_annotations))
lm_expt <- create_expt(metadata="sample_sheets/all_samples.xlsx",
                       gene_info=lm_annotations,
                       file_column="lmajorfile")
## Reading the sample metadata.
## The sample definitions comprises: 24, 25 rows, columns.
## Reading count tables.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0840/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0841/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0842/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0843/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0844/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0845/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0846/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0847/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0848/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0849/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0850/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0851/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0852/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0853/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0854/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0855/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0856/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0857/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0858/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0859/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0860/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0861/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0862/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0863/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## Finished reading count tables.
## Matched 8324 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
##                                                amast       inf12   inf14d     uninf12hr  uninf14d  pro
##lm_expt <- set_expt_colors(expt=lm_expt, colors=c("darkgray", "gray", "darkred", "pink", "darkblue", "blue"))
lm_expt <- set_expt_colors(expt=lm_expt, colors=c("gray", "darkred", "darkblue", "pink", "blue", "darkgreen"))
library(Biobase)
head(exprs(lm_expt$expressionset))
##                     HPGL0840 HPGL0841 HPGL0842 HPGL0843 HPGL0844 HPGL0845
## exon_LmjF.01.0010.1       30       22       10       21        2        3
## exon_LmjF.01.0020.1       10        2       12       16        9        1
## exon_LmjF.01.0030.1       34       47       26       19       15       26
## exon_LmjF.01.0040.1        1        0        8        0        0        0
## exon_LmjF.01.0050.1       50       63       61       45       25       11
## exon_LmjF.01.0060.1       19       15        6       13        3       12
##                     HPGL0846 HPGL0847 HPGL0848 HPGL0849 HPGL0850 HPGL0851
## exon_LmjF.01.0010.1       26       10       12       28        0        0
## exon_LmjF.01.0020.1        0       18       10       57        1        4
## exon_LmjF.01.0030.1       21       58       40      124        0        0
## exon_LmjF.01.0040.1        1        9        5        7        0        0
## exon_LmjF.01.0050.1       16       41       62       85       78        0
## exon_LmjF.01.0060.1       27       27       34       44        0        0
##                     HPGL0852 HPGL0853 HPGL0854 HPGL0855 HPGL0856 HPGL0857
## exon_LmjF.01.0010.1        0        0        0        0       13       38
## exon_LmjF.01.0020.1        0        0        8        2       30       17
## exon_LmjF.01.0030.1        7        0        3       10       52       51
## exon_LmjF.01.0040.1        0        0        0        0        4        6
## exon_LmjF.01.0050.1        0        0        0        0       20       48
## exon_LmjF.01.0060.1        0        0        0        0       10       17
##                     HPGL0858 HPGL0859 HPGL0860 HPGL0861 HPGL0862 HPGL0863
## exon_LmjF.01.0010.1       18       18        0        0       11        0
## exon_LmjF.01.0020.1       19        9       40        8       69        9
## exon_LmjF.01.0030.1       83       58       19       41        8      184
## exon_LmjF.01.0040.1        1        6       14       12        6       38
## exon_LmjF.01.0050.1       58       38       10       30       34        4
## exon_LmjF.01.0060.1       29       34       22       12       29        0
head(fData(lm_expt$expressionset))
##                          transcriptID       geneID
## exon_LmjF.01.0010.1 LmjF.01.0010:mRNA LmjF.01.0010
## exon_LmjF.01.0020.1 LmjF.01.0020:mRNA LmjF.01.0020
## exon_LmjF.01.0030.1 LmjF.01.0030:mRNA LmjF.01.0030
## exon_LmjF.01.0040.1 LmjF.01.0040:mRNA LmjF.01.0040
## exon_LmjF.01.0050.1 LmjF.01.0050:mRNA LmjF.01.0050
## exon_LmjF.01.0060.1 LmjF.01.0060:mRNA LmjF.01.0060
##                                                                                 Description
## exon_LmjF.01.0010.1 hypothetical protein, unknown function.[Source:GeneDB;Acc:LmjF.01.0010]
## exon_LmjF.01.0020.1        hypothetical protein, conserved.[Source:GeneDB;Acc:LmjF.01.0020]
## exon_LmjF.01.0030.1            MCAK-like kinesin, putative.[Source:GeneDB;Acc:LmjF.01.0030]
## exon_LmjF.01.0040.1 hypothetical protein, unknown function.[Source:GeneDB;Acc:LmjF.01.0040]
## exon_LmjF.01.0050.1                  carboxylase, putative.[Source:GeneDB;Acc:LmjF.01.0050]
## exon_LmjF.01.0060.1        hypothetical protein, conserved.[Source:GeneDB;Acc:LmjF.01.0060]
##                               Type length chromosome strand start   end
## exon_LmjF.01.0010.1 protein_coding    999          1     -1  3704  4702
## exon_LmjF.01.0020.1 protein_coding   1650          1     -1  5790  7439
## exon_LmjF.01.0030.1 protein_coding   2007          1     -1  9061 11067
## exon_LmjF.01.0040.1 protein_coding    570          1     -1 12073 12642
## exon_LmjF.01.0050.1 protein_coding   1998          1     -1 15025 17022
## exon_LmjF.01.0060.1 protein_coding    750          1     -1 18137 18886
head(pData(lm_expt$expressionset))
##          dnarnanumber projectid rmlid inst        organism
## HPGL0840     RNA10165    HS_157 10845   HS Mouse/Leshmania
## HPGL0841     RNA10166    HS_157 10851   HS Mouse/Leshmania
## HPGL0842     RNA10167    HS_157 10857   HS Mouse/Leshmania
## HPGL0843     RNA10168    HS_157 10863   HS Mouse/Leshmania
## HPGL0844     RNA10169    HS_157 10846   HS Mouse/Leshmania
## HPGL0845     RNA10170    HS_157 10852   HS Mouse/Leshmania
##                        investigator batch    condition sampleid
## HPGL0840 SangLee/David Sacks/A.Sher     a promastigote HPGL0840
## HPGL0841 SangLee/David Sacks/A.Sher     b promastigote HPGL0841
## HPGL0842 SangLee/David Sacks/A.Sher     c promastigote HPGL0842
## HPGL0843 SangLee/David Sacks/A.Sher     d promastigote HPGL0843
## HPGL0844 SangLee/David Sacks/A.Sher     a   amastigote HPGL0844
## HPGL0845 SangLee/David Sacks/A.Sher     b   amastigote HPGL0845
##          investigatorsample librarystartdate preppedby
## HPGL0840      Promastigote1            42830       KB 
## HPGL0841      Promastigote2            42830       KB 
## HPGL0842      Promastigote3            42830       KB 
## HPGL0843      Promastigote4            42830       KB 
## HPGL0844        Amastigote1            42830       KB 
## HPGL0845        Amastigote2            42830       KB 
##                                                         librarytype
## HPGL0840 SMARTer Stranded Total RNA - Pico Mammalian_300pg-starting
## HPGL0841 SMARTer Stranded Total RNA - Pico Mammalian_300pg-starting
## HPGL0842 SMARTer Stranded Total RNA - Pico Mammalian_300pg-starting
## HPGL0843 SMARTer Stranded Total RNA - Pico Mammalian_300pg-starting
## HPGL0844 SMARTer Stranded Total RNA - Pico Mammalian_300pg-starting
## HPGL0845 SMARTer Stranded Total RNA - Pico Mammalian_300pg-starting
##          averagesize titrationamt sequencelength lane  indexmid
## HPGL0840         340           10    100/8/8/100  1,2 D501-D701
## HPGL0841         310           10    100/8/8/100  1,2 D501-D707
## HPGL0842         340           10    100/8/8/100  1,2 D502-D701
## HPGL0843         320           10    100/8/8/100  1,2 D502-D707
## HPGL0844         310           10    100/8/8/100  1,2 D501-D702
## HPGL0845         355           10    100/8/8/100  1,2 D501-D708
##                 startrundate
## HPGL0840 4/7/2017; 4/11/2017
## HPGL0841 4/7/2017; 4/11/2017
## HPGL0842 4/7/2017; 4/11/2017
## HPGL0843 4/7/2017; 4/11/2017
## HPGL0844 4/7/2017; 4/11/2017
## HPGL0845 4/7/2017; 4/11/2017
##                                                             runname
## HPGL0840 170407_SN406_0395_Ahjncnbcxy ;170411_SN406_0396_BHJNCCBCXY
## HPGL0841 170407_SN406_0395_Ahjncnbcxy ;170411_SN406_0396_BHJNCCBCXY
## HPGL0842 170407_SN406_0395_Ahjncnbcxy ;170411_SN406_0396_BHJNCCBCXY
## HPGL0843 170407_SN406_0395_Ahjncnbcxy ;170411_SN406_0396_BHJNCCBCXY
## HPGL0844 170407_SN406_0395_Ahjncnbcxy ;170411_SN406_0396_BHJNCCBCXY
## HPGL0845 170407_SN406_0395_Ahjncnbcxy ;170411_SN406_0396_BHJNCCBCXY
##          samplenumber mousefile
## HPGL0840          840      <NA>
## HPGL0841          841      <NA>
## HPGL0842          842      <NA>
## HPGL0843          843      <NA>
## HPGL0844          844      <NA>
## HPGL0845          845      <NA>
##                                                                     lmajorfile
## HPGL0840 preprocessing/hpgl0840/outputs/tophat_lmajor/accepted_paired.count.xz
## HPGL0841 preprocessing/hpgl0841/outputs/tophat_lmajor/accepted_paired.count.xz
## HPGL0842 preprocessing/hpgl0842/outputs/tophat_lmajor/accepted_paired.count.xz
## HPGL0843 preprocessing/hpgl0843/outputs/tophat_lmajor/accepted_paired.count.xz
## HPGL0844 preprocessing/hpgl0844/outputs/tophat_lmajor/accepted_paired.count.xz
## HPGL0845 preprocessing/hpgl0845/outputs/tophat_lmajor/accepted_paired.count.xz
##                                                                                lmajor395
## HPGL0840 preprocessing/run0395/hpgl0840/outputs/bowtie2_lmajor/hpgl0840-trimmed.count.xz
## HPGL0841                                                                            <NA>
## HPGL0842 preprocessing/run0395/hpgl0842/outputs/bowtie2_lmajor/hpgl0842-trimmed.count.xz
## HPGL0843 preprocessing/run0395/hpgl0843/outputs/bowtie2_lmajor/hpgl0843-trimmed.count.xz
## HPGL0844 preprocessing/run0395/hpgl0844/outputs/bowtie2_lmajor/hpgl0844-trimmed.count.xz
## HPGL0845 preprocessing/run0395/hpgl0845/outputs/bowtie2_lmajor/hpgl0845-trimmed.count.xz
##                                                                              mouse395
## HPGL0840 preprocessing/run0395/hpgl0840/outputs/kallisto_mmusculus/abundance.count.gz
## HPGL0841 preprocessing/run0395/hpgl0841/outputs/kallisto_mmusculus/abundance.count.gz
## HPGL0842 preprocessing/run0395/hpgl0842/outputs/kallisto_mmusculus/abundance.count.gz
## HPGL0843 preprocessing/run0395/hpgl0843/outputs/kallisto_mmusculus/abundance.count.gz
## HPGL0844 preprocessing/run0395/hpgl0844/outputs/kallisto_mmusculus/abundance.count.gz
## HPGL0845 preprocessing/run0395/hpgl0845/outputs/kallisto_mmusculus/abundance.count.gz
##          file
## HPGL0840 null
## HPGL0841 null
## HPGL0842 null
## HPGL0843 null
## HPGL0844 null
## HPGL0845 null
if (!isTRUE(get0("skip_load"))) {
  pander::pander(sessionInfo())
  message(paste0("This is hpgltools commit: ", get_git_commit()))
  this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
  message(paste0("Saving to ", this_save))
  tmp <- sm(saveme(filename=this_save))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 2a0661d6e37f8a3d8831eb3bbd6347c0d9c4b3b7
## R> packrat::restore()
## This is hpgltools commit: Thu Mar 29 16:59:07 2018 -0400: 2a0661d6e37f8a3d8831eb3bbd6347c0d9c4b3b7
## Saving to 01_annotation_lmajor-v20180402.rda.xz
LS0tCnRpdGxlOiAiTC5tYWpvciAyMDE4OiBNb3VzZSBQTU4gaW5mZWN0aW9uIGFubm90YXRpb25zLiIKYXV0aG9yOiAiYXRiIGFiZWxld0BnbWFpbC5jb20iCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCIKb3V0cHV0OgogaHRtbF9kb2N1bWVudDoKICBjb2RlX2Rvd25sb2FkOiB0cnVlCiAgY29kZV9mb2xkaW5nOiBzaG93CiAgZmlnX2NhcHRpb246IHRydWUKICBmaWdfaGVpZ2h0OiA3CiAgZmlnX3dpZHRoOiA3CiAgaGlnaGxpZ2h0OiBkZWZhdWx0CiAga2VlcF9tZDogZmFsc2UKICBtb2RlOiBzZWxmY29udGFpbmVkCiAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgc2VsZl9jb250YWluZWQ6IHRydWUKICB0aGVtZTogcmVhZGFibGUKICB0b2M6IHRydWUKICB0b2NfZmxvYXQ6CiAgICBjb2xsYXBzZWQ6IGZhbHNlCiAgICBzbW9vdGhfc2Nyb2xsOiBmYWxzZQotLS0KCjxzdHlsZT4KICBib2R5IC5tYWluLWNvbnRhaW5lciB7CiAgICBtYXgtd2lkdGg6IDE2MDBweDsKICB9Cjwvc3R5bGU+CgpgYGB7ciBvcHRpb25zLCBpbmNsdWRlPUZBTFNFfQppZiAoIWlzVFJVRShnZXQwKCJza2lwX2xvYWQiKSkpIHsKICBsaWJyYXJ5KGhwZ2x0b29scykKICB0dCA8LSBkZXZ0b29sczo6bG9hZF9hbGwoIn4vaHBnbHRvb2xzIikKICBrbml0cjo6b3B0c19rbml0JHNldChwcm9ncmVzcz1UUlVFLAogICAgICAgICAgICAgICAgICAgICAgIHZlcmJvc2U9VFJVRSwKICAgICAgICAgICAgICAgICAgICAgICB3aWR0aD05MCwKICAgICAgICAgICAgICAgICAgICAgICBlY2hvPVRSVUUpCiAga25pdHI6Om9wdHNfY2h1bmskc2V0KGVycm9yPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgICAgIGZpZy53aWR0aD04LAogICAgICAgICAgICAgICAgICAgICAgICBmaWcuaGVpZ2h0PTgsCiAgICAgICAgICAgICAgICAgICAgICAgIGRwaT05NikKICBvbGRfb3B0aW9ucyA8LSBvcHRpb25zKGRpZ2l0cz00LAogICAgICAgICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9ycz1GQUxTRSwKICAgICAgICAgICAgICAgICAgICAgICAgIGtuaXRyLmR1cGxpY2F0ZS5sYWJlbD0iYWxsb3ciKQogIGdncGxvdDI6OnRoZW1lX3NldChnZ3Bsb3QyOjp0aGVtZV9idyhiYXNlX3NpemU9MTApKQogIHZlciA8LSAiMjAxODA0MDIiCiAgcHJldmlvdXNfZmlsZSA8LSAiaW5kZXguUm1kIgoKICB0bXAgPC0gdHJ5KHNtKGxvYWRtZShmaWxlbmFtZT1wYXN0ZTAoZ3N1YihwYXR0ZXJuPSJcXC5SbWQiLCByZXBsYWNlPSIiLCB4PXByZXZpb3VzX2ZpbGUpLCAiLXYiLCB2ZXIsICIucmRhLnh6IikpKSkKICBybWRfZmlsZSA8LSAiMDFfYW5ub3RhdGlvbl9sbWFqb3IuUm1kIgp9CmBgYAoKIyBBbm5vdGF0aW9uIHZlcnNpb246IGByIHZlcmAKCkxlaXNobWFuaWEgbWFqb3IgYW5ub3RhdGlvbiBkYXRhCj09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09CgpUaGVyZSBhcmUgYSBmZXcgbWV0aG9kcyBvZiBpbXBvcnRpbmcgYW5ub3RhdGlvbiBkYXRhIGludG8gUi4gIEkgd2lsbCBhdHRlbXB0CnNvbWUgb2YgdGhlbSBpbiBwcmVwYXJhdGlvbiBmb3IgbG9hZGluZyB0aGVtIGludG8gdGhlIEwubWFqb3IgUk5BU2VxIGRhdGEuCgojIEFubm90YXRpb25IdWI6IGxvYWRpbmcgT3JnRGIKCkFubm90YXRpb25IdWIgaXMgYSBuZXdlciBzZXJ2aWNlIGFuZCBoYXMgcHJvbWlzZSB0byBiZSBhbiBleGNlbGxlbnQgdG9wLWxldmVsIHJlc291cmNlIGZvciBnYXRoZXJpbmcKYW5ub3RhdGlvbiBkYXRhLgoKYGBge3IgZGF0YV9pbnB1dF9nZW5vbWV9CnRtcCA8LSBzbShsaWJyYXJ5KEFubm90YXRpb25IdWIpKQphaCA9IHNtKEFubm90YXRpb25IdWIoKSkKb3JnZGJzIDwtIHNtKHF1ZXJ5KGFoLCAiT3JnRGIiKSkKbG1fb3JnZGIgPC0gc20ocXVlcnkoYWgsIGMoIk9yZ0RCIiwgIkxlaXNobWFuaWEiKSkpCmxtX29yZ2RiCmxtX29yZ2RiIDwtIGFoW1siQUg1OTYxMiJdXQoKbG1fb3JnZGIKIyMgSG9seSBjcmFwIGl0IHdvcmtlZCEKbG1fYW5ub3R2MSA8LSBsb2FkX29yZ2RiX2Fubm90YXRpb25zKGxtX29yZ2RiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAga2V5dHlwZT0iZW50cmV6aWQiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZmllbGRzPWMoImVudHJlemlkIiwgImFsaWFzIiwgImdlbmVuYW1lIiwgInJlZnNlcSIsICJzeW1ib2wiKSkKc3VtbWFyeShsbV9hbm5vdHYxKQpsbV9hbm5vdHYxIDwtIGxtX2Fubm90djFbWyJnZW5lcyJdXQpoZWFkKGxtX2Fubm90djEpCmBgYAoKIyBMb2FkaW5nIGZyb20gYmlvbWFydAoKQSBjb21wbGV0ZWx5IHNlcGFyYXRlIGFuZCBjb21wZXRpbmcgYW5ub3RhdGlvbiBzb3VyY2UgaXMgYmlvbWFydC4KCmBgYHtyIGxtYWpvcl9iaW9tYXJ0fQpsbV9hbm5vdHYyIDwtIHNtKGxvYWRfYmlvbWFydF9hbm5vdGF0aW9ucyhzcGVjaWVzPSJsbWFqb3IiLCBob3N0PSJwcm90aXN0cy5lbnNlbWJsLm9yZyIpKSRhbm5vdGF0aW9uCmhlYWQobG1fYW5ub3R2MikKbG1fb250b2xvZ3kgPC0gc20obG9hZF9iaW9tYXJ0X2dvKCJsbWFqb3IiLCBob3N0PSJwcm90aXN0cy5lbnNlbWJsLm9yZyIpKQpgYGAKCiMgTG9hZCBmcm9tIHRoZSB0cml0cnlwZGIKClRoZSBocGdsdG9vbHMgcGFja2FnZSBoYXMgc29tZSBpbXByb3ZlZCBtZXRob2RzIGZvciBjb2xsZWN0aW5nIGFubm90YXRpb24KaW5mb3JtYXRpb24gZGlyZWN0bHkgZnJvbSB0aGUgZXVwYXRoZGIgd2Vic2VydmljZXMgYXBpLiAgSXQgZG9lcyB0aGlzIGJ5IGZpcnN0CmRvd25sb2FkaW5nIGFsbCB0aGUgYXZhaWxhYmxlIGRhdGEgZm9yIGEgZ2l2ZW4gc3BlY2llcyBhbmQgdGhlbiBjcmVhdGluZyBhCnNxbGl0ZSBvcmdkYiBpbnN0YW5jZSBmcm9tIHRoZW0uCgojIyBFeGFtcGxlIG9yZ2RiIGNyZWF0aW9uCgpUaGUgY3JlYXRpb24gb2Ygb3JnZGJzIHRha2VzIGEgbG9uZyB0aW1lLCBzbyBoZXJlIGlzIGFuIGV4YW1wbGUgaW52b2NhdGlvbi4KCmBgYHtyIHRyaXRyeXBkYiwgZXZhbD1GQUxTRX0KdGVzdGluZ19tYWpvciA8LSBtYWtlX2V1cGF0aF9vcmdhbmlzbWRiaSgibWFqb3IiKQpgYGAKCkFzc3VtaW5nIHRoZSBhYm92ZSBwYWNrYWdlcyBnb3QgY3JlYXRlZCwgd2UgbWF5IGxvYWQgdGhlbSBhbmQgZXh0cmFjdCB0aGUgYW5ub3RhdGlvbiBkYXRhLgoKYGBge3IgbG1ham9yX29yZ2RifQptYWpvcl9uYW1lcyA8LSBnZXRfZXVwYXRoX3BrZ25hbWVzKCJtYWpvciIpCm1ham9yX25hbWVzJG9yZ2RiCgp3YW50ZWRfZmllbGRzIDwtIGMoImNkc19sZW5ndGgiLCAiY2hyb21vc29tZSIsICJlbnRyZXpfZ2VuZV9pZCIgLCAiZ2VuZV9uYW1lX29yX3N5bWJvbCIsCiAgICAgICAgICAgICAgICAgICAiZ2VuZV9zdHJhbmQiLCAiZ2lkIiwgImdvX2dvX2lkIiwgImdvX2dvX3Rlcm1fbmFtZSIsICJnb19vbnRvbG9neSIsCiAgICAgICAgICAgICAgICAgICAiaW50ZXJwcm9fZGVzY3JpcHRpb24iICwiaW50ZXJwcm9fZV92YWx1ZSIsICJ0eXBlX2dlbmVfdHlwZSIpCmxtX29yZyA8LSBsb2FkX29yZ2RiX2Fubm90YXRpb25zKCJvcmcuTG1ham9yLkZyaWVkbGluLnYzNi5lZy5kYiIsIGtleXR5cGU9ImdpZCIsIGZpZWxkcz13YW50ZWRfZmllbGRzKSRnZW5lcwprbml0cjo6a2FibGUoaGVhZChsbV9vcmcpKQpgYGAKCiMgUmVhZCBhIGdmZiBmaWxlCgpJbiBjb250cmFzdCwgaXQgaXMgcG9zc2libGUgdG8gbG9hZCBtb3N0IGFubm90YXRpb25zIG9mIGludGVyZXN0IGRpcmVjdGx5IGZyb20gdGhlIGdmZiBmaWxlcyB1c2VkIGluCnRoZSBhbGlnbm1lbnRzLgoKYGBge3IgZ2Vub21lX2lucHV0fQojIyBUaGUgb2xkIHdheSBvZiBnZXR0aW5nIGdlbm9tZS9hbm5vdGF0aW9uIGRhdGEKbG1fZ2ZmIDwtICJyZWZlcmVuY2UvbG1ham9yLmdmZi5neiIKbG1fZ2ZmX2Fubm90YXRpb25zIDwtIGxvYWRfZ2ZmX2Fubm90YXRpb25zKGxtX2dmZiwgdHlwZT0iZ2VuZSIpCnJvd25hbWVzKGxtX2dmZl9hbm5vdGF0aW9ucykgPC0gbWFrZS5uYW1lcyhsbV9nZmZfYW5ub3RhdGlvbnMkTmFtZSwgdW5pcXVlPVRSVUUpCmhlYWQobG1fZ2ZmX2Fubm90YXRpb25zKQpgYGAKCiMgUHV0dGluZyB0aGUgcGllY2VzIHRvZ2V0aGVyCgpJbiB0aGUgZm9sbG93aW5nIGJsb2NrIHdlIGNyZWF0ZSBhbiBleHByZXNzaW9uc2V0IHVzaW5nIHRoZSBzYW1wbGUgc2hlZXQgYW5kIHRoZQphbm5vdGF0aW9ucy4KCkFubm95aW5nbHksIHRoZSBnZmYgYW5ub3RhdGlvbnMgYXJlIGtleWVkIGluIGEgcGVjdWxpYXIgZmFzaGlvbi4gIFRoZXJlZm9yZSBJCm5lZWQgdG8gZG8gYSBsaXR0bGUgd29yayB0byBtZXJnZSB0aGVtLgoKYGBge3IgY3JlYXRlX2V4cHR9CmxtX2Fubm90YXRpb25zIDwtIGxtX2Fubm90djIKcm93bmFtZXMobG1fYW5ub3RhdGlvbnMpIDwtIHBhc3RlMCgiZXhvbl8iLCByb3duYW1lcyhsbV9hbm5vdGF0aW9ucyksICIxIikKcm93bmFtZXMobG1fYW5ub3RhdGlvbnMpIDwtIGdzdWIocGF0dGVybj0ibVJOQSIsIHJlcGxhY2VtZW50PSIiLCB4PXJvd25hbWVzKGxtX2Fubm90YXRpb25zKSkKbG1fZXhwdCA8LSBjcmVhdGVfZXhwdChtZXRhZGF0YT0ic2FtcGxlX3NoZWV0cy9hbGxfc2FtcGxlcy54bHN4IiwKICAgICAgICAgICAgICAgICAgICAgICBnZW5lX2luZm89bG1fYW5ub3RhdGlvbnMsCiAgICAgICAgICAgICAgICAgICAgICAgZmlsZV9jb2x1bW49ImxtYWpvcmZpbGUiKQojIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGFtYXN0ICAgICAgIGluZjEyICAgaW5mMTRkICAgICB1bmluZjEyaHIgIHVuaW5mMTRkICBwcm8KIyNsbV9leHB0IDwtIHNldF9leHB0X2NvbG9ycyhleHB0PWxtX2V4cHQsIGNvbG9ycz1jKCJkYXJrZ3JheSIsICJncmF5IiwgImRhcmtyZWQiLCAicGluayIsICJkYXJrYmx1ZSIsICJibHVlIikpCmxtX2V4cHQgPC0gc2V0X2V4cHRfY29sb3JzKGV4cHQ9bG1fZXhwdCwgY29sb3JzPWMoImdyYXkiLCAiZGFya3JlZCIsICJkYXJrYmx1ZSIsICJwaW5rIiwgImJsdWUiLCAiZGFya2dyZWVuIikpCmxpYnJhcnkoQmlvYmFzZSkKaGVhZChleHBycyhsbV9leHB0JGV4cHJlc3Npb25zZXQpKQpoZWFkKGZEYXRhKGxtX2V4cHQkZXhwcmVzc2lvbnNldCkpCmhlYWQocERhdGEobG1fZXhwdCRleHByZXNzaW9uc2V0KSkKYGBgCgpgYGB7ciBzYXZlbWV9CmlmICghaXNUUlVFKGdldDAoInNraXBfbG9hZCIpKSkgewogIHBhbmRlcjo6cGFuZGVyKHNlc3Npb25JbmZvKCkpCiAgbWVzc2FnZShwYXN0ZTAoIlRoaXMgaXMgaHBnbHRvb2xzIGNvbW1pdDogIiwgZ2V0X2dpdF9jb21taXQoKSkpCiAgdGhpc19zYXZlIDwtIHBhc3RlMChnc3ViKHBhdHRlcm49IlxcLlJtZCIsIHJlcGxhY2U9IiIsIHg9cm1kX2ZpbGUpLCAiLXYiLCB2ZXIsICIucmRhLnh6IikKICBtZXNzYWdlKHBhc3RlMCgiU2F2aW5nIHRvICIsIHRoaXNfc2F2ZSkpCiAgdG1wIDwtIHNtKHNhdmVtZShmaWxlbmFtZT10aGlzX3NhdmUpKQp9CmBgYAo=