There are a few methods of importing annotation data into R. I will attempt some of them in preparation for loading them into the L.major RNASeq data.
AnnotationHub is a newer service and has promise to be an excellent top-level resource for gathering annotation data.
tmp <- sm(library(AnnotationHub))
ah = sm(AnnotationHub())
orgdbs <- sm(query(ah, "OrgDb"))
lm_orgdb <- sm(query(ah, c("OrgDB", "Leishmania")))
lm_orgdb
## AnnotationHub with 4 records
## # snapshotDate(): 2017-10-27
## # $dataprovider: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/
## # $species: Leishmania donovani, Leishmania major_strain_Friedlin, Leis...
## # $rdataclass: OrgDb
## # additional mcols(): taxonomyid, genome, description,
## # coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## # tags, rdatapath, sourceurl, sourcetype
## # retrieve records with, e.g., 'object[["AH59612"]]'
##
## title
## AH59612 | org.Leishmania_major_strain_Friedlin.eg.sqlite
## AH59675 | org.Leishmania_mexicana_MHOM|GT|2001|U1103.eg.sqlite
## AH59676 | org.Leishmania_donovani.eg.sqlite
## AH59684 | org.Leishmania_panamensis.eg.sqlite
lm_orgdb <- ah[["AH59612"]]
## loading from cache '/home/trey//.AnnotationHub/66358'
lm_orgdb
## OrgDb object:
## | DBSCHEMAVERSION: 2.1
## | DBSCHEMA: NOSCHEMA_DB
## | ORGANISM: Leishmania major_strain_Friedlin
## | SPECIES: Leishmania major_strain_Friedlin
## | CENTRALID: GID
## | Taxonomy ID: 347515
## | Db type: OrgDb
## | Supporting package: AnnotationDbi
##
## Please see: help('select') for usage information
## Holy crap it worked!
lm_annotv1 <- load_orgdb_annotations(lm_orgdb,
keytype="entrezid",
fields=c("entrezid", "alias", "genename", "refseq", "symbol"))
## Unable to find TYPE in the db, removing it.
## Unable to find TXSTRAND in the db, removing it.
## Unable to find TXSTART in the db, removing it.
## Unable to find TXEND in the db, removing it.
## Extracted all gene ids.
## 'select()' returned 1:many mapping between keys and columns
summary(lm_annotv1)
## Length Class Mode
## genes 6 data.frame list
## transcripts 0 -none- NULL
lm_annotv1 <- lm_annotv1[["genes"]]
head(lm_annotv1)
## entrezid genename chr alias refseq
## X12980396 12980396 ncRNA 20 LMJF_20_snoRNA0211 XR_002460236.1
## X12980396.1 12980396 ncRNA 20 LMJF_20_snoRNA0211 XR_002460237.1
## X12980396.2 12980396 ncRNA 20 LM20Cs1C1.1 XR_002460236.1
## X12980396.3 12980396 ncRNA 20 LM20Cs1C1.1 XR_002460237.1
## X12980397 12980397 ncRNA 20 LMJF_20_snoRNA0220 XR_002460308.1
## X12980397.1 12980397 ncRNA 20 LMJF_20_snoRNA0220 XR_002460309.1
## symbol
## X12980396 LM20Cs1C1.1
## X12980396.1 LM20Cs1C1.1
## X12980396.2 LM20Cs1C1.1
## X12980396.3 LM20Cs1C1.1
## X12980397 LM20Cs1C1.10
## X12980397.1 LM20Cs1C1.10
A completely separate and competing annotation source is biomart.
lm_annotv2 <- sm(load_biomart_annotations(species="lmajor", host="protists.ensembl.org"))$annotation
head(lm_annotv2)
## transcriptID geneID
## LmjF.01.0010.mRNA LmjF.01.0010:mRNA LmjF.01.0010
## LmjF.01.0020.mRNA LmjF.01.0020:mRNA LmjF.01.0020
## LmjF.01.0030.mRNA LmjF.01.0030:mRNA LmjF.01.0030
## LmjF.01.0040.mRNA LmjF.01.0040:mRNA LmjF.01.0040
## LmjF.01.0050.mRNA LmjF.01.0050:mRNA LmjF.01.0050
## LmjF.01.0060.mRNA LmjF.01.0060:mRNA LmjF.01.0060
## Description
## LmjF.01.0010.mRNA hypothetical protein, unknown function.[Source:GeneDB;Acc:LmjF.01.0010]
## LmjF.01.0020.mRNA hypothetical protein, conserved.[Source:GeneDB;Acc:LmjF.01.0020]
## LmjF.01.0030.mRNA MCAK-like kinesin, putative.[Source:GeneDB;Acc:LmjF.01.0030]
## LmjF.01.0040.mRNA hypothetical protein, unknown function.[Source:GeneDB;Acc:LmjF.01.0040]
## LmjF.01.0050.mRNA carboxylase, putative.[Source:GeneDB;Acc:LmjF.01.0050]
## LmjF.01.0060.mRNA hypothetical protein, conserved.[Source:GeneDB;Acc:LmjF.01.0060]
## Type length chromosome strand start end
## LmjF.01.0010.mRNA protein_coding 999 1 -1 3704 4702
## LmjF.01.0020.mRNA protein_coding 1650 1 -1 5790 7439
## LmjF.01.0030.mRNA protein_coding 2007 1 -1 9061 11067
## LmjF.01.0040.mRNA protein_coding 570 1 -1 12073 12642
## LmjF.01.0050.mRNA protein_coding 1998 1 -1 15025 17022
## LmjF.01.0060.mRNA protein_coding 750 1 -1 18137 18886
lm_ontology <- sm(load_biomart_go("lmajor", host="protists.ensembl.org"))
The hpgltools package has some improved methods for collecting annotation information directly from the eupathdb webservices api. It does this by first downloading all the available data for a given species and then creating a sqlite orgdb instance from them.
The creation of orgdbs takes a long time, so here is an example invocation.
testing_major <- make_eupath_organismdbi("major")
Assuming the above packages got created, we may load them and extract the annotation data.
major_names <- get_eupath_pkgnames("major")
## Starting metadata download.
## Finished metadata download.
## Found the following hits: Leishmania major strain Friedlin, Leishmania major strain LV39c5, Leishmania major strain SD 75.1, choosing the first.
major_names$orgdb
## [1] "org.Lmajor.Friedlin.v36.eg.db"
wanted_fields <- c("cds_length", "chromosome", "entrez_gene_id" , "gene_name_or_symbol",
"gene_strand", "gid", "go_go_id", "go_go_term_name", "go_ontology",
"interpro_description" ,"interpro_e_value", "type_gene_type")
lm_org <- load_orgdb_annotations("org.Lmajor.Friedlin.v36.eg.db", keytype="gid", fields=wanted_fields)$genes
## Unable to find GENENAME, setting it to GENE_NAME_OR_SYMBOL.
## Unable to find TYPE in the db, removing it.
## Unable to find CHR in the db, removing it.
## Unable to find TXSTRAND in the db, removing it.
## Unable to find TXSTART in the db, removing it.
## Unable to find TXEND in the db, removing it.
## Extracted all gene ids.
## 'select()' returned 1:many mapping between keys and columns
knitr::kable(head(lm_org))
gid | gene_name_or_symbol | cds_length | chromosome | entrez_gene_id | gene_strand | go_go_id | go_go_term_name | go_ontology | interpro_description | interpro_e_value | type_gene_type | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
LmjF.01.0010 | LmjF.01.0010 | 999.0 | 1 | reverse | NA | NA | NA | Protein of unknown function (DUF2946) | 7.7e-18 | protein coding | ||
LmjF.01.0010.1 | LmjF.01.0010 | 999.0 | 1 | reverse | NA | NA | NA | Prokaryotic membrane lipoprotein lipid attachment site profile | 5.0 | protein coding | ||
LmjF.01.0020 | LmjF.01.0020 | 1650.0 | 1 | reverse | NA | NA | NA | Endonuclease/Exonuclease/phosphatase family | 1.3e-10 | protein coding | ||
LmjF.01.0020.1 | LmjF.01.0020 | 1650.0 | 1 | reverse | NA | NA | NA | DNase I-like | 7.9e-21 | protein coding | ||
LmjF.01.0030 | LmjF.01.0030 | KIN13-1 | 2007.0 | 1 | reverse | GO:0007018 | microtubule-based movement | Biological Process | Kinesin motor domain | 1.2e-94 | protein coding | |
LmjF.01.0030.1 | LmjF.01.0030 | KIN13-1 | 2007.0 | 1 | reverse | GO:0007018 | microtubule-based movement | Biological Process | Kinesin motor domain profile | 43.0 | protein coding |
In contrast, it is possible to load most annotations of interest directly from the gff files used in the alignments.
## The old way of getting genome/annotation data
lm_gff <- "reference/lmajor.gff.gz"
lm_gff_annotations <- load_gff_annotations(lm_gff, type="gene")
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Returning a df with 24 columns and 9379 rows.
rownames(lm_gff_annotations) <- make.names(lm_gff_annotations$Name, unique=TRUE)
head(lm_gff_annotations)
## seqnames start end width strand source type score phase
## LmjF.01.0010 LmjF.01 3704 4702 999 - TriTrypDB gene NA NA
## LmjF.01.0020 LmjF.01 5790 7439 1650 - TriTrypDB gene NA NA
## LmjF.01.0030 LmjF.01 9061 11067 2007 - TriTrypDB gene NA NA
## LmjF.01.0040 LmjF.01 12073 12642 570 - TriTrypDB gene NA NA
## LmjF.01.0050 LmjF.01 15025 17022 1998 - TriTrypDB gene NA NA
## LmjF.01.0060 LmjF.01 18137 18886 750 - TriTrypDB gene NA NA
## ID Name
## LmjF.01.0010 LmjF.01.0010 LmjF.01.0010
## LmjF.01.0020 LmjF.01.0020 LmjF.01.0020
## LmjF.01.0030 LmjF.01.0030 LmjF.01.0030
## LmjF.01.0040 LmjF.01.0040 LmjF.01.0040
## LmjF.01.0050 LmjF.01.0050 LmjF.01.0050
## LmjF.01.0060 LmjF.01.0060 LmjF.01.0060
## description size web_id
## LmjF.01.0010 hypothetical+protein,+unknown+function 999 LmjF.01.0010
## LmjF.01.0020 hypothetical+protein,+conserved 1650 LmjF.01.0020
## LmjF.01.0030 Kinesin-13+1,+putative+(KIN13-1) 2007 LmjF.01.0030
## LmjF.01.0040 hypothetical+protein,+unknown+function 570 LmjF.01.0040
## LmjF.01.0050 carboxylase,+putative 1998 LmjF.01.0050
## LmjF.01.0060 hypothetical+protein,+conserved 750 LmjF.01.0060
## molecule_type organism_name translation_table topology
## LmjF.01.0010 <NA> <NA> <NA> <NA>
## LmjF.01.0020 <NA> <NA> <NA> <NA>
## LmjF.01.0030 <NA> <NA> <NA> <NA>
## LmjF.01.0040 <NA> <NA> <NA> <NA>
## LmjF.01.0050 <NA> <NA> <NA> <NA>
## LmjF.01.0060 <NA> <NA> <NA> <NA>
## localization Dbxref locus_tag
## LmjF.01.0010 <NA> LmjF.01.0010
## LmjF.01.0020 <NA> LmjF.01.0020
## LmjF.01.0030 <NA> LmjF.01.0030
## LmjF.01.0040 <NA> LmjF.01.0040
## LmjF.01.0050 <NA> LmjF.01.0050
## LmjF.01.0060 <NA> LmjF.01.0060
## Alias
## LmjF.01.0010 321438052, 389592307, LmjF1.0010, LmjF01.0010, LmjF.01.0010, LmjF01.0010:pep, LmjF01.0010:mRNA
## LmjF.01.0020 321438053, 389592309, LmjF1.0020, LmjF01.0020, LmjF.01.0020, LmjF01.0020:pep, LmjF01.0020:mRNA
## LmjF.01.0030 KIN13-1, Kif-13-1, 321438054, 389592311, LmjF1.0030, LmjF01.0030, LmjF.01.0030, LmjF01.0030:pep, LmjF01.0030:mRNA
## LmjF.01.0040 321438055, 389592313, LmjF1.0040, LmjF01.0040, LmjF.01.0040, LmjF01.0040:pep, LmjF01.0040:mRNA
## LmjF.01.0050 321438056, 389592315, LmjF1.0050, LmjF01.0050, LmjF.01.0050, LmjF01.0050:pep, LmjF01.0050:mRNA
## LmjF.01.0060 321438057, 389592317, LmjF1.0060, LmjF01.0060, LmjF.01.0060, LmjF01.0060:pep, LmjF01.0060:mRNA
## Parent Ontology_term
## LmjF.01.0010
## LmjF.01.0020
## LmjF.01.0030
## LmjF.01.0040
## LmjF.01.0050
## LmjF.01.0060
In the following block we create an expressionset using the sample sheet and the annotations.
Annoyingly, the gff annotations are keyed in a peculiar fashion. Therefore I need to do a little work to merge them.
lm_annotations <- lm_annotv2
rownames(lm_annotations) <- paste0("exon_", rownames(lm_annotations), "1")
rownames(lm_annotations) <- gsub(pattern="mRNA", replacement="", x=rownames(lm_annotations))
lm_expt <- create_expt(metadata="sample_sheets/all_samples.xlsx",
gene_info=lm_annotations,
file_column="lmajorfile")
## Reading the sample metadata.
## The sample definitions comprises: 24, 25 rows, columns.
## Reading count tables.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0840/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0841/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0842/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0843/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0844/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0845/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0846/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0847/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0848/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0849/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0850/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0851/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0852/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0853/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0854/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0855/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0856/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0857/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0858/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0859/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0860/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0861/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0862/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/lmajor_sacks_2017/preprocessing/hpgl0863/outputs/tophat_lmajor/accepted_paired.count.xz contains 9470 rows and merges to 9470 rows.
## Finished reading count tables.
## Matched 8324 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## amast inf12 inf14d uninf12hr uninf14d pro
##lm_expt <- set_expt_colors(expt=lm_expt, colors=c("darkgray", "gray", "darkred", "pink", "darkblue", "blue"))
lm_expt <- set_expt_colors(expt=lm_expt, colors=c("gray", "darkred", "darkblue", "pink", "blue", "darkgreen"))
library(Biobase)
head(exprs(lm_expt$expressionset))
## HPGL0840 HPGL0841 HPGL0842 HPGL0843 HPGL0844 HPGL0845
## exon_LmjF.01.0010.1 30 22 10 21 2 3
## exon_LmjF.01.0020.1 10 2 12 16 9 1
## exon_LmjF.01.0030.1 34 47 26 19 15 26
## exon_LmjF.01.0040.1 1 0 8 0 0 0
## exon_LmjF.01.0050.1 50 63 61 45 25 11
## exon_LmjF.01.0060.1 19 15 6 13 3 12
## HPGL0846 HPGL0847 HPGL0848 HPGL0849 HPGL0850 HPGL0851
## exon_LmjF.01.0010.1 26 10 12 28 0 0
## exon_LmjF.01.0020.1 0 18 10 57 1 4
## exon_LmjF.01.0030.1 21 58 40 124 0 0
## exon_LmjF.01.0040.1 1 9 5 7 0 0
## exon_LmjF.01.0050.1 16 41 62 85 78 0
## exon_LmjF.01.0060.1 27 27 34 44 0 0
## HPGL0852 HPGL0853 HPGL0854 HPGL0855 HPGL0856 HPGL0857
## exon_LmjF.01.0010.1 0 0 0 0 13 38
## exon_LmjF.01.0020.1 0 0 8 2 30 17
## exon_LmjF.01.0030.1 7 0 3 10 52 51
## exon_LmjF.01.0040.1 0 0 0 0 4 6
## exon_LmjF.01.0050.1 0 0 0 0 20 48
## exon_LmjF.01.0060.1 0 0 0 0 10 17
## HPGL0858 HPGL0859 HPGL0860 HPGL0861 HPGL0862 HPGL0863
## exon_LmjF.01.0010.1 18 18 0 0 11 0
## exon_LmjF.01.0020.1 19 9 40 8 69 9
## exon_LmjF.01.0030.1 83 58 19 41 8 184
## exon_LmjF.01.0040.1 1 6 14 12 6 38
## exon_LmjF.01.0050.1 58 38 10 30 34 4
## exon_LmjF.01.0060.1 29 34 22 12 29 0
head(fData(lm_expt$expressionset))
## transcriptID geneID
## exon_LmjF.01.0010.1 LmjF.01.0010:mRNA LmjF.01.0010
## exon_LmjF.01.0020.1 LmjF.01.0020:mRNA LmjF.01.0020
## exon_LmjF.01.0030.1 LmjF.01.0030:mRNA LmjF.01.0030
## exon_LmjF.01.0040.1 LmjF.01.0040:mRNA LmjF.01.0040
## exon_LmjF.01.0050.1 LmjF.01.0050:mRNA LmjF.01.0050
## exon_LmjF.01.0060.1 LmjF.01.0060:mRNA LmjF.01.0060
## Description
## exon_LmjF.01.0010.1 hypothetical protein, unknown function.[Source:GeneDB;Acc:LmjF.01.0010]
## exon_LmjF.01.0020.1 hypothetical protein, conserved.[Source:GeneDB;Acc:LmjF.01.0020]
## exon_LmjF.01.0030.1 MCAK-like kinesin, putative.[Source:GeneDB;Acc:LmjF.01.0030]
## exon_LmjF.01.0040.1 hypothetical protein, unknown function.[Source:GeneDB;Acc:LmjF.01.0040]
## exon_LmjF.01.0050.1 carboxylase, putative.[Source:GeneDB;Acc:LmjF.01.0050]
## exon_LmjF.01.0060.1 hypothetical protein, conserved.[Source:GeneDB;Acc:LmjF.01.0060]
## Type length chromosome strand start end
## exon_LmjF.01.0010.1 protein_coding 999 1 -1 3704 4702
## exon_LmjF.01.0020.1 protein_coding 1650 1 -1 5790 7439
## exon_LmjF.01.0030.1 protein_coding 2007 1 -1 9061 11067
## exon_LmjF.01.0040.1 protein_coding 570 1 -1 12073 12642
## exon_LmjF.01.0050.1 protein_coding 1998 1 -1 15025 17022
## exon_LmjF.01.0060.1 protein_coding 750 1 -1 18137 18886
head(pData(lm_expt$expressionset))
## dnarnanumber projectid rmlid inst organism
## HPGL0840 RNA10165 HS_157 10845 HS Mouse/Leshmania
## HPGL0841 RNA10166 HS_157 10851 HS Mouse/Leshmania
## HPGL0842 RNA10167 HS_157 10857 HS Mouse/Leshmania
## HPGL0843 RNA10168 HS_157 10863 HS Mouse/Leshmania
## HPGL0844 RNA10169 HS_157 10846 HS Mouse/Leshmania
## HPGL0845 RNA10170 HS_157 10852 HS Mouse/Leshmania
## investigator batch condition sampleid
## HPGL0840 SangLee/David Sacks/A.Sher a promastigote HPGL0840
## HPGL0841 SangLee/David Sacks/A.Sher b promastigote HPGL0841
## HPGL0842 SangLee/David Sacks/A.Sher c promastigote HPGL0842
## HPGL0843 SangLee/David Sacks/A.Sher d promastigote HPGL0843
## HPGL0844 SangLee/David Sacks/A.Sher a amastigote HPGL0844
## HPGL0845 SangLee/David Sacks/A.Sher b amastigote HPGL0845
## investigatorsample librarystartdate preppedby
## HPGL0840 Promastigote1 42830 KB
## HPGL0841 Promastigote2 42830 KB
## HPGL0842 Promastigote3 42830 KB
## HPGL0843 Promastigote4 42830 KB
## HPGL0844 Amastigote1 42830 KB
## HPGL0845 Amastigote2 42830 KB
## librarytype
## HPGL0840 SMARTer Stranded Total RNA - Pico Mammalian_300pg-starting
## HPGL0841 SMARTer Stranded Total RNA - Pico Mammalian_300pg-starting
## HPGL0842 SMARTer Stranded Total RNA - Pico Mammalian_300pg-starting
## HPGL0843 SMARTer Stranded Total RNA - Pico Mammalian_300pg-starting
## HPGL0844 SMARTer Stranded Total RNA - Pico Mammalian_300pg-starting
## HPGL0845 SMARTer Stranded Total RNA - Pico Mammalian_300pg-starting
## averagesize titrationamt sequencelength lane indexmid
## HPGL0840 340 10 100/8/8/100 1,2 D501-D701
## HPGL0841 310 10 100/8/8/100 1,2 D501-D707
## HPGL0842 340 10 100/8/8/100 1,2 D502-D701
## HPGL0843 320 10 100/8/8/100 1,2 D502-D707
## HPGL0844 310 10 100/8/8/100 1,2 D501-D702
## HPGL0845 355 10 100/8/8/100 1,2 D501-D708
## startrundate
## HPGL0840 4/7/2017; 4/11/2017
## HPGL0841 4/7/2017; 4/11/2017
## HPGL0842 4/7/2017; 4/11/2017
## HPGL0843 4/7/2017; 4/11/2017
## HPGL0844 4/7/2017; 4/11/2017
## HPGL0845 4/7/2017; 4/11/2017
## runname
## HPGL0840 170407_SN406_0395_Ahjncnbcxy ;170411_SN406_0396_BHJNCCBCXY
## HPGL0841 170407_SN406_0395_Ahjncnbcxy ;170411_SN406_0396_BHJNCCBCXY
## HPGL0842 170407_SN406_0395_Ahjncnbcxy ;170411_SN406_0396_BHJNCCBCXY
## HPGL0843 170407_SN406_0395_Ahjncnbcxy ;170411_SN406_0396_BHJNCCBCXY
## HPGL0844 170407_SN406_0395_Ahjncnbcxy ;170411_SN406_0396_BHJNCCBCXY
## HPGL0845 170407_SN406_0395_Ahjncnbcxy ;170411_SN406_0396_BHJNCCBCXY
## samplenumber mousefile
## HPGL0840 840 <NA>
## HPGL0841 841 <NA>
## HPGL0842 842 <NA>
## HPGL0843 843 <NA>
## HPGL0844 844 <NA>
## HPGL0845 845 <NA>
## lmajorfile
## HPGL0840 preprocessing/hpgl0840/outputs/tophat_lmajor/accepted_paired.count.xz
## HPGL0841 preprocessing/hpgl0841/outputs/tophat_lmajor/accepted_paired.count.xz
## HPGL0842 preprocessing/hpgl0842/outputs/tophat_lmajor/accepted_paired.count.xz
## HPGL0843 preprocessing/hpgl0843/outputs/tophat_lmajor/accepted_paired.count.xz
## HPGL0844 preprocessing/hpgl0844/outputs/tophat_lmajor/accepted_paired.count.xz
## HPGL0845 preprocessing/hpgl0845/outputs/tophat_lmajor/accepted_paired.count.xz
## lmajor395
## HPGL0840 preprocessing/run0395/hpgl0840/outputs/bowtie2_lmajor/hpgl0840-trimmed.count.xz
## HPGL0841 <NA>
## HPGL0842 preprocessing/run0395/hpgl0842/outputs/bowtie2_lmajor/hpgl0842-trimmed.count.xz
## HPGL0843 preprocessing/run0395/hpgl0843/outputs/bowtie2_lmajor/hpgl0843-trimmed.count.xz
## HPGL0844 preprocessing/run0395/hpgl0844/outputs/bowtie2_lmajor/hpgl0844-trimmed.count.xz
## HPGL0845 preprocessing/run0395/hpgl0845/outputs/bowtie2_lmajor/hpgl0845-trimmed.count.xz
## mouse395
## HPGL0840 preprocessing/run0395/hpgl0840/outputs/kallisto_mmusculus/abundance.count.gz
## HPGL0841 preprocessing/run0395/hpgl0841/outputs/kallisto_mmusculus/abundance.count.gz
## HPGL0842 preprocessing/run0395/hpgl0842/outputs/kallisto_mmusculus/abundance.count.gz
## HPGL0843 preprocessing/run0395/hpgl0843/outputs/kallisto_mmusculus/abundance.count.gz
## HPGL0844 preprocessing/run0395/hpgl0844/outputs/kallisto_mmusculus/abundance.count.gz
## HPGL0845 preprocessing/run0395/hpgl0845/outputs/kallisto_mmusculus/abundance.count.gz
## file
## HPGL0840 null
## HPGL0841 null
## HPGL0842 null
## HPGL0843 null
## HPGL0844 null
## HPGL0845 null
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 2a0661d6e37f8a3d8831eb3bbd6347c0d9c4b3b7
## R> packrat::restore()
## This is hpgltools commit: Thu Mar 29 16:59:07 2018 -0400: 2a0661d6e37f8a3d8831eb3bbd6347c0d9c4b3b7
## Saving to 01_annotation_lmajor-v20180402.rda.xz