There are a few methods of importing annotation data into R. The following are two attempts, the second is currently being used in these analyses.
AnnotationHub is a newer service and has promise to be an excellent top-level resource for gathering annotation data.
tt <- sm(library(AnnotationHub))
ah <- AnnotationHub()
## snapshotDate(): 2020-04-27
orgdbs <- query(ah, "OrgDB")
annot_lm <- query(ah, c("OrgDB", "Friedlin"))
lm_name <- names(annot_lm)
annot_lm <- annot_lm[[lm_name[[2]]]]
## downloading 1 resources
## retrieving 1 resource
## loading from cache
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
txtdbs <- query(ah, "TxDb")
## AH48429 appears to be panamensis
##annot_lp <- annot_lp[["AH48429"]]
annot_lp <- query(ah, c("OrgDB", "panamensis"))
lp_name <- names(annot_lp)
annot_lp <- annot_lp[[lp_name[[2]]]]
## downloading 1 resources
## retrieving 1 resource
## loading from cache
Since this document was originally written, I have made substantial changes to how I create, load, and manipulate the eupathdb annotation data. As a result, this needs to be significantly reworked.
AnnotationHub is the new and fancier version of what OrganismDb does. Keith already made these for the parasites though, lets try and use one of those.
testing_panamensis <- make_eupath_organismdbi("panamensis")
testing_braziliensis <- make_eupath_organismdbi("braziliensis")
testing_donovani <- make_eupath_organismdbi("donovani")
testing_mexicana <- make_eupath_organismdbi("mexicana")
testing_major <- make_eupath_organismdbi("major")
testing_crith <- make_eupath_organismdbi("Crithidia")
Assuming the above packages got created, we may load them and extract the annotation data.
major_names <- get_eupath_pkgnames("major")
## Error in get_eupath_pkgnames("major"): could not find function "get_eupath_pkgnames"
major_names$orgdb
## Error in eval(expr, envir, enclos): object 'major_names' not found
wanted_fields <- c("cds_length", "chromosome", "entrez_gene_id" , "gene_name_or_symbol",
"gene_strand", "gid", "go_go_id", "go_go_term_name", "go_ontology",
"interpro_description" ,"interpro_e_value", "type_gene_type")
lm_org <- load_orgdb_annotations(major_names$orgdb, keytype="gid", fields=wanted_fields)
## Error in load_orgdb_annotations(major_names$orgdb, keytype = "gid", fields = wanted_fields): object 'major_names' not found
panamensis_names <- get_eupath_pkgnames("panamensis")
## Error in get_eupath_pkgnames("panamensis"): could not find function "get_eupath_pkgnames"
panamensis_names$orgdb
## Error in eval(expr, envir, enclos): object 'panamensis_names' not found
lp_org <- load_orgdb_annotations(panamensis_names$orgdb, keytype="gid", fields=wanted_fields)
## Error in load_orgdb_annotations(panamensis_names$orgdb, keytype = "gid", : object 'panamensis_names' not found
braziliensis_names <- get_eupath_pkgnames("braziliensis")
## Error in get_eupath_pkgnames("braziliensis"): could not find function "get_eupath_pkgnames"
braziliensis_names$orgdb
## Error in eval(expr, envir, enclos): object 'braziliensis_names' not found
lb_org <- load_orgdb_annotations(braziliensis_names$orgdb, keytype="gid", fields=wanted_fields)
## Error in load_orgdb_annotations(braziliensis_names$orgdb, keytype = "gid", : object 'braziliensis_names' not found
donovani_names <- get_eupath_pkgnames("donovani")
## Error in get_eupath_pkgnames("donovani"): could not find function "get_eupath_pkgnames"
donovani_names$orgdb
## Error in eval(expr, envir, enclos): object 'donovani_names' not found
ld_org <- load_orgdb_annotations(donovani_names$orgdb, keytype="gid", fields=wanted_fields)
## Error in load_orgdb_annotations(donovani_names$orgdb, keytype = "gid", : object 'donovani_names' not found
mexicana_names <- get_eupath_pkgnames("mexicana")
## Error in get_eupath_pkgnames("mexicana"): could not find function "get_eupath_pkgnames"
mexicana_names$orgdb
## Error in eval(expr, envir, enclos): object 'mexicana_names' not found
lmex_org <- load_orgdb_annotations(mexicana_names$orgdb, keytype="gid", fields=wanted_fields)
## Error in load_orgdb_annotations(mexicana_names$orgdb, keytype = "gid", : object 'mexicana_names' not found
fasciculata_names <- get_eupath_pkgnames("rithidia")
## Error in get_eupath_pkgnames("rithidia"): could not find function "get_eupath_pkgnames"
fasciculata_names$orgdb
## Error in eval(expr, envir, enclos): object 'fasciculata_names' not found
cf_org <- load_orgdb_annotations(fasciculata_names$orgdb, keytype="gid", fields=wanted_fields)
## Error in load_orgdb_annotations(fasciculata_names$orgdb, keytype = "gid", : object 'fasciculata_names' not found
In contrast, it is possible to load most annotations of interest directly from the gff files used in the alignments. More in-depth information for the human transcriptome may be extracted from biomart.
## The old way of getting genome/annotation data
lp_gff <- "reference/lpanamensis.gff"
lb_gff <- "reference/lbraziliensis.gff"
hs_gff <- "reference/hsapiens.gtf"
lp_fasta <- "reference/lpanamensis.fasta.xz"
lb_fasta <- "reference/lbraziliensis.fasta.xz"
hs_fasta <- "reference/hsapiens.fasta.xz"
lp_annotations <- sm(load_gff_annotations(lp_gff, type="gene"))
rownames(lp_annotations) <- paste0("exon_", lp_annotations$web_id, ".1")
lb_annotations <- sm(load_gff_annotations(lb_gff, type="gene"))
hs_gff_annot <- sm(load_gff_annotations(hs_gff, id_col="gene_id"))
hs_annotations <- sm(load_biomart_annotations())$annotation
hs_annotations$ID <- hs_annotations$geneID
rownames(hs_annotations) <- make.names(hs_annotations[["ensembl_gene_id"]], unique=TRUE)
## Try using biomart
hs_go_biomart <- sm(load_biomart_go())
## or the org.Hs.eg.db sqlite database
tt <- sm(library("Homo.sapiens"))
hs <- Homo.sapiens
##hs_go_ensembl <- load_orgdb_go(hs, hs_annotations$geneID)
##dim(hs_go_biomart)
##dim(hs_go_ensembl)
##hs_goids <- hs_go_biomart
## While testing, I called this desc, that will need to change.
##lp_tooltips <- make_tooltips(lp_annotations)
##lb_tooltips <- make_tooltips(lb_annotations)
lp_lengths <- lp_annotations[, c("ID", "width")]
lb_lengths <- lb_annotations[, c("ID", "width")]
hs_lengths <- hs_annotations[, c("geneID", "length")]
## Error in `[.data.frame`(hs_annotations, , c("geneID", "length")): undefined columns selected
lp_goids <- read.csv(file="reference/lpan_go.txt.xz", sep="\t", header=FALSE)
lb_goids <- read.csv(file="reference/lbraz_go.txt.xz", sep="\t", header=FALSE)
colnames(lp_goids) <- c("ID","GO","ont","name","source","tag")
colnames(lb_goids) <- c("ID","GO","ont","name","source","tag")
The macrophage experiment has samples across 2 contexts, the host and parasite. The following block sets up one experiment for each. If you open the all_samples-species.xlsx files, you will note immediately that a few different attempts were made at ascertaining the most likely experimental factors that contributed to the readily apparent batch effects.
Keep in mind that if I change the experimental design with new annotations, I must therefore regenerate the following.
hs_final_annotations <- hs_annotations
hs_final_annotations <- hs_final_annotations[, c("ensembl_transcript_id", "ensembl_gene_id",
"hgnc_symbol", "description", "gene_biotype")]
note <- "New experimental design factors by snp added 2016-09-20"
hs_expt <- create_expt("sample_sheets/all_samples-combined.xlsx",
gene_info=hs_final_annotations,
file_column="humanfile",
notes=note)
## Reading the sample metadata.
## Dropped 1 rows from the sample metadata because they were blank.
## The sample definitions comprises: 50 rows(samples) and 56 columns(metadata fields).
## Reading count tables.
## Reading count files with read.table().
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0241/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0242/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0243/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0244/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0245/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0246/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0247/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0248/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0315/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0316/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0317/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0318/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0319/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0320/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0321/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0322/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0630/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0631/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0632/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0633/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0634/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0635/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0636/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0637/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0638/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0639/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0640/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0641/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0642/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0643/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0644/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0645/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0646/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0647/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0648/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0649/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0650/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0651/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0652/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0653/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0654/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0655/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0656/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0657/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0658/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0659/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0660/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0661/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0662/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0663/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## Finished reading count data.
## Matched 43897 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 51041 rows and 50 columns.
head(hs_expt$design, n=1)
## sampleid pathogenstrain experimentname tubelabel alias
## HPGL0241 HPGL0241 none macrophage TM130-Nil (Blue label) Nil
## condition batch anotherbatch snpclade snpcladev2 snpcladev3
## HPGL0241 uninf a a undef undef undef
## pathogenstrain1 label donor time pctmappedparasite pctcategory
## HPGL0241 none uninf_1 d130 undef undef 0
## state sourcelab expperson pathogen host hostcelltype
## HPGL0241 uninfected Ade Adriana none Human Human macs
## noofhostcells infectionperiodhpitimeofharvest moiexposure
## HPGL0241 Max 2 mill 2h - 24h chase period <NA>
## parasitespercell pctinf rnangul rnaqcpassed libraryconst libqcpassed
## HPGL0241 unknown unknown 468 Y Wanderson Y
## index descriptonandremarks observation lowercaseid
## HPGL0241 1 Uninfected human macrophages <NA> hpgl0241
## humanfile
## HPGL0241 preprocessing/hpgl0241/outputs/tophat_hsapiens/accepted_paired.count.xz
## parasitefile bcftable salmonreads hssalmonmapped hssalmonmaprate
## HPGL0241 undef undef 46628648 26156539 0.561
## lpsalmonmapped lpsalmonmaprate tophatpairs hstophataligned hstophatpct
## HPGL0241 NA NA 46319335 40905961 0.8831
## hstophatmulti hstophatdiscordant hstophatconcordantpct lptophataligned
## HPGL0241 1374099 1430888 0.8522 NA
## lptophatpct lptophatmulti lptophatdiscordant lpconcordantpct
## HPGL0241 NA NA NA NA
## variantpositions file
## HPGL0241 NA null
cds_entries <- fData(hs_expt)
cds_entries <- cds_entries[["Type"]] == "protein_coding"
hs_cds_expt <- hs_expt
hs_cds_expt$expressionset <- hs_cds_expt$expressionset[cds_entries, ]
new_cds_entries <- fData(hs_cds_expt)
parasite_expt <- sm(create_expt("sample_sheets/all_samples-combined.xlsx",
gene_info=lp_annotations, file_column="parasitefile"))
head(parasite_expt$design, n=3)
## sampleid pathogenstrain experimentname tubelabel alias
## HPGL0242 HPGL0242 s2271 macrophage TM130-2271 Self-Healing
## HPGL0243 HPGL0243 s2272 macrophage TM130-2272 Self-Healing
## HPGL0244 HPGL0244 s5433 macrophage TM130-5433 Chronic
## condition batch anotherbatch snpclade snpcladev2 snpcladev3
## HPGL0242 sh a a white whitepink right
## HPGL0243 sh a a white whitepink right
## HPGL0244 chr a a blue_self blue left
## pathogenstrain1 label donor time pctmappedparasite pctcategory
## HPGL0242 s2271 sh_2271 d130 undef 30 3
## HPGL0243 s2272 sh_2272 d130 undef 30 3
## HPGL0244 s5433 chr_5433 d130 undef 15 1
## state sourcelab expperson pathogen host hostcelltype
## HPGL0242 self_heal Ade Adriana Lp Human Human macs
## HPGL0243 self_heal Ade Adriana Lp Human Human macs
## HPGL0244 chronic Ade Adriana Lp Human Human macs
## noofhostcells infectionperiodhpitimeofharvest moiexposure
## HPGL0242 Max 2 mill 2h - 24h chase period 0.0486111111111111
## HPGL0243 Max 2 mill 2h - 24h chase period 0.0486111111111111
## HPGL0244 Max 2 mill 2h - 24h chase period 0.0486111111111111
## parasitespercell pctinf rnangul rnaqcpassed libraryconst libqcpassed
## HPGL0242 unknown unknown 276 Y Wanderson Y
## HPGL0243 unknown unknown 532 Y Wanderson Y
## HPGL0244 unknown unknown 261 Y Wanderson Y
## index descriptonandremarks observation lowercaseid
## HPGL0242 8 Infected human macrophages. <NA> hpgl0242
## HPGL0243 10 Infected human macrophages <NA> hpgl0243
## HPGL0244 27 Infected human macrophages <NA> hpgl0244
## humanfile
## HPGL0242 preprocessing/hpgl0242/outputs/tophat_hsapiens/accepted_paired.count.xz
## HPGL0243 preprocessing/hpgl0243/outputs/tophat_hsapiens/accepted_paired.count.xz
## HPGL0244 preprocessing/hpgl0244/outputs/tophat_hsapiens/accepted_paired.count.xz
## parasitefile
## HPGL0242 preprocessing/hpgl0242/outputs/tophat_lpanamensis/accepted_paired.count.xz
## HPGL0243 preprocessing/hpgl0243/outputs/tophat_lpanamensis/accepted_paired.count.xz
## HPGL0244 preprocessing/hpgl0244/outputs/tophat_lpanamensis/accepted_paired.count.xz
## bcftable salmonreads
## HPGL0242 preprocessing/outputs/hpgl0242_parsed_count.txt 42742857
## HPGL0243 preprocessing/outputs/hpgl0243_parsed_count.txt 46796079
## HPGL0244 preprocessing/outputs/hpgl0244_parsed_count.txt 47150925
## hssalmonmapped hssalmonmaprate lpsalmonmapped lpsalmonmaprate
## HPGL0242 17945935 0.4199 8023463 0.18771
## HPGL0243 21046460 0.4497 6823750 0.14582
## HPGL0244 25281958 0.5362 3761371 0.07977
## tophatpairs hstophataligned hstophatpct hstophatmulti
## HPGL0242 42612353 25394266 0.5959 869649
## HPGL0243 47344642 31160297 0.6582 1000248
## HPGL0244 46925604 36379602 0.7753 1070964
## hstophatdiscordant hstophatconcordantpct lptophataligned lptophatpct
## HPGL0242 784620 0.5775 13117819 0.3078
## HPGL0243 924296 0.6386 11581460 0.2446
## HPGL0244 991929 0.7541 5755998 0.1227
## lptophatmulti lptophatdiscordant lpconcordantpct variantpositions file
## HPGL0242 350277 263923 0.3016 3930 null
## HPGL0243 319338 245169 0.2394 NA null
## HPGL0244 154830 116414 0.1202 85981 null
Table S1 is going to be a summary of the metadata in all_samples-combined This may also include some of the numbers regarding mapping %, etc.
Wanted columns: * Sample ID: HPGLxxxx * Donor Code: TM130 or PG1xx * Cell Type: Macrophage or PBMC * Infection Status: Infected or Uninfected * Disease Outcome: Chronic or Self-Healing or NA * Batch: A or B (macrophage); NA for PBMC * Number of reads that passed Illumina filter * Number of reads after trimming * Number of reads mapped - human * % reads mapped - human * Number of reads mapped - L.panamensis * % reads mapped - L.panamensis
Use the Tcruzi colors. * A1 is a large title: “Macrophage Samples” * Row 2 is the blue column headings * 3-m contains Macrophage metadata * m+1 is blank * m+2 is a large title: “PBMC Samples” * m+3-n contains PBMC metadata
At this point, we should have everything necessary to perform the various analyses of the 4 sub-experiments. So save the current data for reuse elsewhere.
The experimental design is available here.
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 71e9e0510e6a865fc024d8cf57090f2f02602a35
## This is hpgltools commit: Fri Jun 26 13:06:16 2020 -0400: 71e9e0510e6a865fc024d8cf57090f2f02602a35
## Saving to index-v20180822.rda.xz