There are a few methods of importing annotation data into R. I will attempt some of them in preparation for loading them into the S.cerevisiae RNASeq data.
AnnotationHub is a newer service and has promise to be an excellent top-level resource for gathering annotation data.
tmp <- sm(library(AnnotationHub))
ah = sm(AnnotationHub())
orgdbs <- sm(query(ah, "OrgDb"))
sc_orgdb <- sm(query(ah, c("OrgDB", "Saccharomyces"))) ## AH49589 | org.Sc.sgd.db.sqlite
sc_orgdb
## AnnotationHub with 7 records
## # snapshotDate(): 2017-10-27
## # $dataprovider: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/
## # $species: Saccharomyces cerevisiae, Saccharomyces eubayanus, Schizosaccharomyces cry...
## # $rdataclass: OrgDb
## # additional mcols(): taxonomyid, genome, description, coordinate_1_based,
## # maintainer, rdatadateadded, preparerclass, tags, rdatapath, sourceurl,
## # sourcetype
## # retrieve records with, e.g., 'object[["AH57980"]]'
##
## title
## AH57980 | org.Sc.sgd.db.sqlite
## AH59735 | org.Schizosaccharomyces_pombe.eg.sqlite
## AH59859 | org.Saccharomyces_eubayanus.eg.sqlite
## AH59874 | org.Schizosaccharomyces_cryophilus_OY26.eg.sqlite
## AH59893 | org.Schizosaccharomyces_octosporus_yFS286.eg.sqlite
## AH59899 | org.Zygosaccharomyces_rouxii.eg.sqlite
## AH59913 | org.Schizosaccharomyces_japonicus_yFS275.eg.sqlite
sc_orgdb <- ah[["AH57980"]]
## loading from cache '/home/trey//.AnnotationHub/64726'
sc_orgdb
## OrgDb object:
## | DBSCHEMAVERSION: 2.1
## | Db type: OrgDb
## | Supporting package: AnnotationDbi
## | DBSCHEMA: YEAST_DB
## | ORGANISM: Saccharomyces cerevisiae
## | SPECIES: Yeast
## | YGSOURCENAME: Yeast Genome
## | YGSOURCEURL: http://downloads.yeastgenome.org/
## | YGSOURCEDATE: 14-Jan-2017
## | CENTRALID: ORF
## | TAXID: 559292
## | KEGGSOURCENAME: KEGG GENOME
## | KEGGSOURCEURL: ftp://ftp.genome.jp/pub/kegg/genomes
## | KEGGSOURCEDATE: 2011-Mar15
## | GOSOURCENAME: Gene Ontology
## | GOSOURCEURL: ftp://ftp.geneontology.org/pub/go/godatabase/archive/latest-lite/
## | GOSOURCEDATE: 2017-Nov01
## | EGSOURCEDATE: 2017-Nov6
## | EGSOURCENAME: Entrez Gene
## | EGSOURCEURL: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA
## | ENSOURCEDATE: 2017-Aug23
## | ENSOURCENAME: Ensembl
## | ENSOURCEURL: ftp://ftp.ensembl.org/pub/current_fasta
## | UPSOURCENAME: Uniprot
## | UPSOURCEURL: http://www.UniProt.org/
## | UPSOURCEDATE: Tue Nov 7 21:11:11 2017
##
## Please see: help('select') for usage information
## Holy crap it worked!
sc_annotv1 <- load_orgdb_annotations(
sc_orgdb,
fields=c("alias", "description", "entrezid", "genename", "sgd"))
## Unable to find TYPE in the db, removing it.
## Unable to find CHR in the db, removing it.
## Unable to find TXSTRAND in the db, removing it.
## Unable to find TXSTART in the db, removing it.
## Unable to find TXEND in the db, removing it.
## Extracted all gene ids.
## 'select()' returned 1:many mapping between keys and columns
sc_annotv1 <- sc_annotv1[["genes"]]
head(sc_annotv1)
## ensembl genename alias
## YAL068C YAL068C PAU8 seripauperin PAU8
## YAL068C.1 YAL068C PAU9 seripauperin PAU9
## YAL068C.2 YAL068C PAU11 seripauperin PAU11
## YGL261C YGL261C PAU8 seripauperin PAU8
## YGL261C.1 YGL261C PAU9 seripauperin PAU9
## YGL261C.2 YGL261C PAU11 seripauperin PAU11
## description
## YAL068C Protein of unknown function; member of the seripauperin multigene family encoded mainly in subtelomeric regions
## YAL068C.1 Protein of unknown function; member of the seripauperin multigene family encoded mainly in subtelomeric regions; SWAT-GFP and mCherry fusion proteins localize to the endoplasmic reticulum and vacuole respectively
## YAL068C.2 Putative protein of unknown function; member of the seripauperin multigene family encoded mainly in subtelomeric regions; mRNA expression appears to be regulated by SUT1 and UPC2
## YGL261C Protein of unknown function; member of the seripauperin multigene family encoded mainly in subtelomeric regions
## YGL261C.1 Protein of unknown function; member of the seripauperin multigene family encoded mainly in subtelomeric regions; SWAT-GFP and mCherry fusion proteins localize to the endoplasmic reticulum and vacuole respectively
## YGL261C.2 Putative protein of unknown function; member of the seripauperin multigene family encoded mainly in subtelomeric regions; mRNA expression appears to be regulated by SUT1 and UPC2
## entrezid sgd
## YAL068C 851229 S000002142
## YAL068C.1 852163 S000007592
## YAL068C.2 852630 S000003230
## YGL261C 851229 S000002142
## YGL261C.1 852163 S000007592
## YGL261C.2 852630 S000003230
require.auto("TxDb.Scerevisiae.UCSC.sacCer3.sgdGene")
## [1] 0
tmp <- sm(library(TxDb.Scerevisiae.UCSC.sacCer3.sgdGene))
sc_txdb <- TxDb.Scerevisiae.UCSC.sacCer3.sgdGene
There is a non-zero chance we will want to use the actual genome sequence along with these annotations. The BSGenome packages provide that functionality.
tt <- sm(require.auto("BSgenome.Scerevisiae.UCSC.sacCer3"))
A completely separate and competing annotation source is biomart.
sc_annotv2 <- sm(load_biomart_annotations("scerevisiae"))
sc_annotv2 <- sc_annotv2[["annotation"]]
head(sc_annotv2)
## transcriptID geneID
## X15S_rRNA 15S_rRNA 15S_rRNA
## X21S_rRNA 21S_rRNA 21S_rRNA
## HRA1 HRA1 HRA1
## ICR1 ICR1 ICR1
## LSR1 LSR1 LSR1
## NME1 NME1 NME1
## Description
## X15S_rRNA Ribosomal RNA of the small mitochondrial ribosomal subunit; MSU1 allele suppresses ochre stop mutations in mitochondrial protein-coding genes [Source:SGD;Acc:S000007287]
## X21S_rRNA Mitochondrial 21S rRNA; intron encodes the I-SceI DNA endonuclease [Source:SGD;Acc:S000007288]
## HRA1 Non-protein-coding RNA; substrate of RNase P, possibly involved in rRNA processing, specifically maturation of 20S precursor into the mature 18S rRNA [Source:SGD;Acc:S000119380]
## ICR1 Long intergenic regulatory ncRNA; has a key role in regulating transcription of the nearby protein-coding ORF FLO11; initiated far upstream from FLO11 and transcribed across much of the large promoter of FLO11, repressing FLO11 transcription in cis [Source:SGD;Acc:S000132612]
## LSR1 U2 spliceosomal RNA (U2 snRNA), component of the spliceosome; pairs with the branchpoint sequence; functionally equivalent to mammalian U2 snRNA; stress-induced pseudouridylations at positions 56 and 93 may contribute to regulation of splicing [Source:SGD;Acc:S000006478]
## NME1 RNA component of RNase MRP; RNase MRP cleaves pre-rRNA and has a role in cell cycle-regulated degradation of daughter cell-specific mRNAs; human ortholog is implicated in cartilage-hair hypoplasia (CHH) [Source:SGD;Acc:S000007436]
## Type length chromosome strand start end
## X15S_rRNA rRNA NA Mito 1 6546 8194
## X21S_rRNA rRNA NA Mito 1 58009 62447
## HRA1 ncRNA NA I 1 99305 99868
## ICR1 ncRNA NA IX -1 393884 397082
## LSR1 snRNA NA II -1 680688 681862
## NME1 snoRNA NA XIV 1 585587 585926
sc_ontology <- sm(load_biomart_go("scerevisiae"))
sc_ontology <- sc_ontology[["go"]]
head(sc_ontology)
## ID GO
## 1 YHR055C GO:0046872
## 2 YHR055C GO:0005829
## 3 YHR055C GO:0016209
## 4 YHR055C GO:0004784
## 5 YHR055C GO:0019430
## 6 YHR055C GO:0005507
In contrast, it is possible to load most annotations of interest directly from the gff files used in the alignments.
## The old way of getting genome/annotation data
sc_gff <- "reference/scerevisiae.gff.gz"
sc_gff_annotations <- load_gff_annotations(sc_gff, type="gene")
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=TRUE)
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo=FALSE)
## Trying attempt: rtracklayer::import.gff2(gff, sequenceRegionsAsSeqinfo=TRUE)
## Had a successful gff import with rtracklayer::import.gff2(gff, sequenceRegionsAsSeqinfo=TRUE)
## Returning a df with 18 columns and 7050 rows.
rownames(sc_gff_annotations) <- make.names(sc_gff_annotations$transcript_name, unique=TRUE)
head(sc_gff_annotations)
## seqnames start end width strand source type score phase exon_number
## YAL069W I 335 646 312 + protein_coding gene NA 0 1
## YAL068W.A I 538 789 252 + protein_coding gene NA 0 1
## PAU8 I 1810 2169 360 - protein_coding gene NA 0 1
## YAL067W.A I 2480 2704 225 + protein_coding gene NA 0 1
## SEO1 I 7238 9016 1779 - protein_coding gene NA 0 1
## YAL066W I 10091 10396 306 + protein_coding gene NA 0 1
## gene_id ID p_id protein_id transcript_id transcript_name tss_id
## YAL069W YAL069W YAL069W P3633 YAL069W YAL069W YAL069W TSS1128
## YAL068W.A YAL068W-A YAL068W-A P5377 YAL068W-A YAL068W-A YAL068W-A TSS5439
## PAU8 YAL068C PAU8 P6023 YAL068C YAL068C PAU8 TSS249
## YAL067W.A YAL067W-A YAL067W-A P4547 YAL067W-A YAL067W-A YAL067W-A TSS1248
## SEO1 YAL067C SEO1 P5747 YAL067C YAL067C SEO1 TSS5464
## YAL066W YAL066W YAL066W P1766 YAL066W YAL066W YAL066W TSS2674
## seqedit
## YAL069W <NA>
## YAL068W.A <NA>
## PAU8 <NA>
## YAL067W.A <NA>
## SEO1 <NA>
## YAL066W <NA>
In the following block we create an expressionset using the sample sheet and the annotations.
Annoyingly, the gff annotations are keyed in a peculiar fashion. Therefore I need to do a little work to merge them.
## Start by making locations for the biomart data
sc_annotv2[["fwd_location"]] <- paste0(sc_annotv2[["chromosome"]], "_", sc_annotv2[["start"]])
sc_annotv2[["rev_location"]] <- paste0(sc_annotv2[["chromosome"]], "_", sc_annotv2[["end"]])
## Do the same for the gff annotations
sc_gff_annotations[["fwd_location"]] <- paste0(sc_gff_annotations[["seqnames"]], "_", sc_gff_annotations[["start"]])
sc_gff_annotations[["rev_location"]] <- paste0(sc_gff_annotations[["seqnames"]], "_", sc_gff_annotations[["end"]])
sc_gff_annotations[["gff_rowname"]] <- rownames(sc_gff_annotations)
## Now merge them.
sc_fwd_annotations <- merge(sc_annotv2, sc_gff_annotations, by="fwd_location")
sc_rev_annotations <- merge(sc_annotv2, sc_gff_annotations, by="rev_location")
colnames(sc_fwd_annotations) <- c("location","transcriptID","geneID", "Description",
"Type", "length", "chromosome", "strand.x", "start.x",
"end.x", "location.x", "seqnames",
"start.y", "end.y", "width", "strand.y", "source", "type",
"score", "phase", "exon_number", "gene_id", "ID", "p_id",
"protein_id", "transcript_id", "transcript_name", "tss_id",
"seqedit", "location.y", "gff_rowname")
colnames(sc_rev_annotations) <- colnames(sc_fwd_annotations)
sc_all_annotations <- rbind(sc_fwd_annotations, sc_rev_annotations)
rownames(sc_all_annotations) <- make.names(sc_all_annotations[["gff_rowname"]], unique=TRUE)
sc_all_annotations <- sc_all_annotations[, c("transcriptID", "geneID", "Description", "Type",
"length", "chromosome", "strand.x", "start.x", "end.x",
"tss_id")]
colnames(sc_all_annotations) <- c("transcriptID", "geneID", "Description", "Type", "length",
"chromosome", "strand", "start", "end", "tss_id")
sc_all_annotations[["location"]] <- paste0(sc_all_annotations[["chromosome"]], "_", sc_all_annotations[["start"]], "_", sc_all_annotations[["end"]])
sc2_expt <- create_expt(
metadata="sample_sheets/all_samples.xlsx",
gene_info=sc_all_annotations,
file_column="bt2file")
## Reading the sample metadata.
## Reading count tables.
## TESTME: FALSE THERE?
## The header is false
## pre: /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0774/outputs/bowtie2_scerevisiae/hpgl0774_forward-trimmed.count.xz FALSE
## first
## second
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0774/outputs/bowtie2_scerevisiae/hpgl0774_forward-trimmed.count.xz contains 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0775/outputs/bowtie2_scerevisiae/hpgl0775_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0776/outputs/bowtie2_scerevisiae/hpgl0776_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0777/outputs/bowtie2_scerevisiae/hpgl0777_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0778/outputs/bowtie2_scerevisiae/hpgl0778_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0779/outputs/bowtie2_scerevisiae/hpgl0779_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0780/outputs/bowtie2_scerevisiae/hpgl0780_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0781/outputs/bowtie2_scerevisiae/hpgl0781_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0782/outputs/bowtie2_scerevisiae/hpgl0782_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0783/outputs/bowtie2_scerevisiae/hpgl0783_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0784/outputs/bowtie2_scerevisiae/hpgl0784_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0785/outputs/bowtie2_scerevisiae/hpgl0785_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0786/outputs/bowtie2_scerevisiae/hpgl0786_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0787/outputs/bowtie2_scerevisiae/hpgl0787_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0788/outputs/bowtie2_scerevisiae/hpgl0788_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/scerevisiae_cbf5_2017/preprocessing/v2/hpgl0789/outputs/bowtie2_scerevisiae/hpgl0789_forward-trimmed.count.xz contains 7131 rows and merges to 7131 rows.
## Matched 6540 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
head(exprs(sc2_expt$expressionset))
## hpgl0774 hpgl0775 hpgl0776 hpgl0777 hpgl0778 hpgl0779 hpgl0780 hpgl0781
## X15S_rRNA 0 0 0 0 0 0 0 0
## X21S_rRNA 0 0 0 0 0 0 0 0
## AAC1 536 477 743 443 634 188 763 414
## AAC3 126 216 93 765 152 154 102 738
## AAD10 1784 1928 2327 3869 2172 994 2472 3551
## AAD14 1054 901 1222 1863 1106 836 1307 1588
## hpgl0782 hpgl0783 hpgl0784 hpgl0785 hpgl0786 hpgl0787 hpgl0788 hpgl0789
## X15S_rRNA 0 0 0 0 0 0 0 0
## X21S_rRNA 0 0 0 0 0 0 0 0
## AAC1 175 145 140 237 124 142 141 181
## AAC3 295 119 341 542 210 118 438 1071
## AAD10 365 589 1476 1593 352 542 1782 2082
## AAD14 542 766 1580 1814 439 795 1924 2333
head(fData(sc2_expt$expressionset))
## transcriptID geneID
## X15S_rRNA undefined undefined
## X21S_rRNA undefined undefined
## AAC1 YMR056C YMR056C
## AAC3 YBR085W YBR085W
## AAD10 YJR155W YJR155W
## AAD14 YNL331C YNL331C
## Description
## X15S_rRNA undefined
## X21S_rRNA undefined
## AAC1 Mitochondrial inner membrane ADP/ATP translocator; exchanges cytosolic ADP for mitochondrially synthesized ATP; phosphorylated; Aac1p is a minor isoform while Pet9p is the major ADP/ATP translocator; relocalizes from mitochondrion to cytoplasm upon DNA replication stress [Source:SGD;Acc:S000004660]
## AAC3 Mitochondrial inner membrane ADP/ATP translocator; exchanges cytosolic ADP for mitochondrially synthesized ATP; expressed under anaerobic conditions; similar to Aac1p; has roles in maintenance of viability and in respiration; AAC3 has a paralog, PET9, that arose from the whole genome duplication [Source:SGD;Acc:S000000289]
## AAD10 Putative aryl-alcohol dehydrogenase; similar to P. chrysosporium aryl-alcohol dehydrogenase; mutational analysis has not yet revealed a physiological role; members of the AAD gene family comprise three pairs (AAD3 + AAD15, AAD6/AAD16 + AAD4, AAD10 + AAD14) whose two genes are more related to one another than to other members of the family [Source:SGD;Acc:S000003916]
## AAD14 Putative aryl-alcohol dehydrogenase; similar to P. chrysosporium aryl-alcohol dehydrogenase; mutational analysis has not yet revealed a physiological role; members of the AAD gene family comprise three pairs (AAD3 + AAD15, AAD6/AAD16 + AAD4, AAD10 + AAD14) whose two genes are more related to one another than to other members of the family [Source:SGD;Acc:S000005275]
## Type length chromosome strand start end tss_id
## X15S_rRNA undefined undefined undefined undefined undefined undefined undefined
## X21S_rRNA undefined undefined undefined undefined undefined undefined undefined
## AAC1 protein_coding 930 XIII -1 387315 388244 TSS5132
## AAC3 protein_coding 924 II 1 415983 416906 TSS1609
## AAD10 protein_coding 867 X 1 727405 728271 TSS5024
## AAD14 protein_coding 1131 XIV -1 16118 17248 TSS6941
## location
## X15S_rRNA undefined
## X21S_rRNA undefined
## AAC1 XIII_387315_388244
## AAC3 II_415983_416906
## AAD10 X_727405_728271
## AAD14 XIV_16118_17248
head(pData(sc2_expt$expressionset))
## sampleid strain condition batch originalbatch tube cbf5igv upf1igv
## hpgl0774 hpgl0774 yJD1524 wtc_wtu r a f wt wt
## hpgl0775 hpgl0775 yJD1525 mtc_wtu r a f mut wt
## hpgl0776 hpgl0776 yJD1745 wtc_mtu r a f wt mut
## hpgl0777 hpgl0777 yJD1746 mtc_mtu r a f mut mut
## hpgl0778 hpgl0778 yJD1524 wtc_wtu r b g wt wt
## hpgl0779 hpgl0779 yJD1525 mtc_wtu r b g mut wt
## incubationtime
## hpgl0774 18h
## hpgl0775 18h
## hpgl0776 18h
## hpgl0777 18h
## hpgl0778 18h
## hpgl0779 18h
## genotype
## hpgl0774 wt ade2-1 can1-100 his3-11 leu2-3, 112 trp1-1 ura3-1 cbf5::TRP1 + CBF5 on pRS313
## hpgl0775 d95a ade2-1 can1-100 his3-11 leu2-3, 112 trp1-1 ura3-1 cbf5::TRP1 + CBF5 D95A on pRS313
## hpgl0776 wt ade2-1 can1-100 his3-11 leu2-3, 112 trp1-1 ura3-1 cbf5::TRP1 upf1::LEU2 + CBF5 on pRS313 (yJD1524 upf1Δ)
## hpgl0777 d95a ade2-1 can1-100 his3-11 leu2-3, 112 trp1-1 ura3-1 cbf5::TRP1 upf1::LEU2 + CBF5 D95A on pRS313 (yJD1525 upf1Δ)
## hpgl0778 wt ade2-1 can1-100 his3-11 leu2-3, 112 trp1-1 ura3-1 cbf5::TRP1 + CBF5 on pRS313
## hpgl0779 d95a ade2-1 can1-100 his3-11 leu2-3, 112 trp1-1 ura3-1 cbf5::TRP1 + CBF5 D95A on pRS313
## conc bttotalreads bttotalmapped btleftmapped btrightmapped bowtiefile
## hpgl0774 NA NA NA NA NA <NA>
## hpgl0775 NA NA NA NA NA <NA>
## hpgl0776 NA NA NA NA NA <NA>
## hpgl0777 NA NA NA NA NA <NA>
## hpgl0778 NA NA NA NA NA <NA>
## hpgl0779 NA NA NA NA NA <NA>
## bt2file
## hpgl0774 preprocessing/v2/hpgl0774/outputs/bowtie2_scerevisiae/hpgl0774_forward-trimmed.count.xz
## hpgl0775 preprocessing/v2/hpgl0775/outputs/bowtie2_scerevisiae/hpgl0775_forward-trimmed.count.xz
## hpgl0776 preprocessing/v2/hpgl0776/outputs/bowtie2_scerevisiae/hpgl0776_forward-trimmed.count.xz
## hpgl0777 preprocessing/v2/hpgl0777/outputs/bowtie2_scerevisiae/hpgl0777_forward-trimmed.count.xz
## hpgl0778 preprocessing/v2/hpgl0778/outputs/bowtie2_scerevisiae/hpgl0778_forward-trimmed.count.xz
## hpgl0779 preprocessing/v2/hpgl0779/outputs/bowtie2_scerevisiae/hpgl0779_forward-trimmed.count.xz
## intronfile
## hpgl0774 preprocessing/v2/hpgl0774/outputs/bowtie2_scerevisiae/introns.count.xz
## hpgl0775 preprocessing/v2/hpgl0775/outputs/bowtie2_scerevisiae/introns.count.xz
## hpgl0776 preprocessing/v2/hpgl0776/outputs/bowtie2_scerevisiae/introns.count.xz
## hpgl0777 preprocessing/v2/hpgl0777/outputs/bowtie2_scerevisiae/introns.count.xz
## hpgl0778 preprocessing/v2/hpgl0778/outputs/bowtie2_scerevisiae/introns.count.xz
## hpgl0779 preprocessing/v2/hpgl0779/outputs/bowtie2_scerevisiae/introns.count.xz
## allfile
## hpgl0774 preprocessing/v2/hpgl0774/outputs/bowtie2_scerevisiae/hpgl0774_forward-trimmed.count.xz
## hpgl0775 preprocessing/v2/hpgl0775/outputs/bowtie2_scerevisiae/hpgl0775_forward-trimmed.count.xz
## hpgl0776 preprocessing/v2/hpgl0776/outputs/bowtie2_scerevisiae/hpgl0776_forward-trimmed.count.xz
## hpgl0777 preprocessing/v2/hpgl0777/outputs/bowtie2_scerevisiae/hpgl0777_forward-trimmed.count.xz
## hpgl0778 preprocessing/v2/hpgl0778/outputs/bowtie2_scerevisiae/hpgl0778_forward-trimmed.count.xz
## hpgl0779 preprocessing/v2/hpgl0779/outputs/bowtie2_scerevisiae/hpgl0779_forward-trimmed.count.xz
## file
## hpgl0774 null
## hpgl0775 null
## hpgl0776 null
## hpgl0777 null
## hpgl0778 null
## hpgl0779 null
pander::pander(sessionInfo())
R version 3.4.3 (2017-11-30)
**Platform:** x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: stats4, parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: hpgltools(v.2017.10), TxDb.Scerevisiae.UCSC.sacCer3.sgdGene(v.3.2.2), GenomicFeatures(v.1.30.3), GenomicRanges(v.1.30.1), GenomeInfoDb(v.1.14.0), AnnotationDbi(v.1.40.0), IRanges(v.2.12.0), S4Vectors(v.0.16.0), Biobase(v.2.38.0), AnnotationHub(v.2.10.1) and BiocGenerics(v.0.24.0)
loaded via a namespace (and not attached): bitops(v.1.0-6), matrixStats(v.0.53.0), devtools(v.1.13.4), bit64(v.0.9-7), RColorBrewer(v.1.1-2), progress(v.1.1.2), httr(v.1.3.1), rprojroot(v.1.3-2), backports(v.1.1.2), tools(v.3.4.3), R6(v.2.2.2), DBI(v.0.7), lazyeval(v.0.2.1), colorspace(v.1.3-2), withr(v.2.1.1), prettyunits(v.1.0.2), RMySQL(v.0.10.13.9000), bit(v.1.1-12), curl(v.3.1), compiler(v.3.4.3), xml2(v.1.2.0), DelayedArray(v.0.4.1), rtracklayer(v.1.38.3), scales(v.0.5.0), commonmark(v.1.4), stringr(v.1.2.0), digest(v.0.6.15), Rsamtools(v.1.30.0), rmarkdown(v.1.8), XVector(v.0.18.0), base64enc(v.0.1-3), pkgconfig(v.2.0.1), htmltools(v.0.3.6), rlang(v.0.1.6), RSQLite(v.2.0), BiocInstaller(v.1.28.0), shiny(v.1.0.5), BiocParallel(v.1.12.0), RCurl(v.1.95-4.10), magrittr(v.1.5), GenomeInfoDbData(v.1.0.0), Matrix(v.1.2-12), Rcpp(v.0.12.15), munsell(v.0.4.3), stringi(v.1.1.6), yaml(v.2.1.16), SummarizedExperiment(v.1.8.1), zlibbioc(v.1.24.0), plyr(v.1.8.4), grid(v.3.4.3), blob(v.1.1.0), lattice(v.0.20-35), Biostrings(v.2.46.0), pander(v.0.6.1), knitr(v.1.19), pillar(v.1.1.0), codetools(v.0.2-15), biomaRt(v.2.34.2), XML(v.3.98-1.9), evaluate(v.0.10.1), data.table(v.1.10.4-3), httpuv(v.1.3.5), foreach(v.1.4.4), gtable(v.0.2.0), assertthat(v.0.2.0), ggplot2(v.2.2.1), openxlsx(v.4.0.17), mime(v.0.5), xtable(v.1.8-2), roxygen2(v.6.0.1), tibble(v.1.4.2), iterators(v.1.0.9), GenomicAlignments(v.1.14.1), memoise(v.1.1.0) and interactiveDisplayBase(v.1.16.0)
message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 323d5703935c97a779b33794616768dafb9419d9
## R> packrat::restore()
## This is hpgltools commit: Tue Feb 13 15:46:11 2018 -0500: 323d5703935c97a779b33794616768dafb9419d9
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to 01_annotation-v20180212.rda.xz
tmp <- sm(saveme(filename=this_save))