There are a few methods of importing annotation data into R. The following are two attempts, the second is currently being used in these analyses.
meta <- EuPathDB::download_eupath_metadata(webservice="tritrypdb")
lm_entry <- EuPathDB::get_eupath_entry(species="Leishmania major", metadata=meta)
## Found the following hits: Leishmania major strain Friedlin, Leishmania major strain LV39c5, Leishmania major strain SD 75.1, choosing the first.
## Using: Leishmania major strain Friedlin.
## Found the following hits: Leishmania panamensis MHOM/COL/81/L13, Leishmania panamensis strain MHOM/PA/94/PSC-1, choosing the first.
## Using: Leishmania panamensis MHOM/COL/81/L13.
## Found: Leishmania mexicana MHOM/GT/2001/U1103
## Found: Leishmania amazonensis MHOM/BR/71973/M2269
## Found the following hits: Leishmania braziliensis MHOM/BR/75/M2904, Leishmania braziliensis MHOM/BR/75/M2904 2019, choosing the first.
## Using: Leishmania braziliensis MHOM/BR/75/M2904.
## Found the following hits: Leishmania donovani BPK282A1, Leishmania donovani CL-SL, Leishmania donovani strain BHU 1220, Leishmania donovani strain LV9, choosing the first.
## Using: Leishmania donovani BPK282A1.
## Found: Crithidia fasciculata strain Cf-Cl
## org.Lpanamensis.MHOMCOL81L13.v46.eg.db is already installed.
## org.Lbraziliensis.MHOMBR75M2904.v46.eg.db is already installed.
## org.Ldonovani.BPK282A1.v46.eg.db is already installed.
## org.Lmexicana.MHOMGT2001U1103.v46.eg.db is already installed.
## org.Lmajor.Friedlin.v46.eg.db is already installed.
## org.Cfasciculata.Cf.Cl.v46.eg.db is already installed.
Assuming the above packages got created, we may load them and extract the annotation data.
wanted_fields <- c("annot_cds_length", "annot_chromosome", "annot_gene_entrez_id",
"annot_gene_name", "annot_strand", "gid", "go_go_id",
"go_go_term_name", "go_ontology",
"interpro_description" ,"interpro_e_value", "type_gene_type")
lm_org <- sm(EuPathDB::load_eupath_annotations(query=lm_entry))
lp_org <- sm(EuPathDB::load_eupath_annotations(query=lp_entry))
lb_org <- sm(EuPathDB::load_eupath_annotations(query=lb_entry))
ld_org <- sm(EuPathDB::load_eupath_annotations(query=ld_entry))
lmex_org <- sm(EuPathDB::load_eupath_annotations(query=lmex_entry))
cf_ort <- sm(EuPathDB::load_eupath_annotations(query=crit_entry))
In contrast, it is possible to load most annotations of interest directly from the gff files used in the alignments. More in-depth information for the human transcriptome may be extracted from biomart.
## The old way of getting genome/annotation data
lp_gff <- "reference/lpanamensis.gff"
lb_gff <- "reference/lbraziliensis.gff"
hs_gff <- "reference/hsapiens.gtf"
lp_fasta <- "reference/lpanamensis.fasta.xz"
lb_fasta <- "reference/lbraziliensis.fasta.xz"
hs_fasta <- "reference/hsapiens.fasta.xz"
lp_annotations <- sm(load_gff_annotations(lp_gff, type="gene"))
rownames(lp_annotations) <- paste0("exon_", lp_annotations$web_id, ".1")
lb_annotations <- sm(load_gff_annotations(lb_gff, type="gene"))
hs_gff_annot <- sm(load_gff_annotations(hs_gff, id_col="gene_id"))
hs_annotations <- sm(load_biomart_annotations())$annotation
hs_annotations$ID <- hs_annotations$geneID
rownames(hs_annotations) <- make.names(hs_annotations[["ensembl_gene_id"]], unique=TRUE)
dim(hs_annotations)
## [1] 197995 12
hs_size_dist <- plot_histogram(hs_annotations[["cds_length"]])
hs_size_dist +
ggplot2::scale_x_continuous(limits=c(0, 20000))
## Warning: Removed 103681 rows containing non-finite values (stat_bin).
## Warning: Removed 103681 rows containing non-finite values (stat_density).
## Warning: Removed 2 rows containing missing values (geom_bar).
Maria Adelaida requested adding the xCell cell types to the data.
## Length Class Mode
## spill 3 -none- list
## spill.array 3 -none- list
## signatures 489 GeneSetCollection list
## genes 10808 -none- character
## Loading required package: annotate
## Loading required package: XML
## Loading required package: graph
##
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
##
## addNode
## setName: aDC%HPCA%1.txt
## geneIds: C1QA, C1QB, ..., CCL22 (total: 8)
## geneIdType: Null
## collectionType: Null
## setIdentifier: PEDS-092FVH8-LT:623:Tue Jun 6 14:36:33 2017:2
## description:
## organism:
## pubMedIds:
## urls:
## contributor:
## setVersion: 0.0.1
## creationDate:
## [1] "aDC%HPCA%1.txt" "aDC%HPCA%2.txt"
## [3] "aDC%HPCA%3.txt" "aDC%IRIS%1.txt"
## [5] "aDC%IRIS%2.txt" "aDC%IRIS%3.txt"
## [7] "Adipocytes%ENCODE%1.txt" "Adipocytes%ENCODE%2.txt"
## [9] "Adipocytes%ENCODE%3.txt" "Adipocytes%FANTOM%1.txt"
## Here we see that the signatures are encoded as 3 element lists, the first element is the
## cell type, followed by source, followed by replicate.txt.
cell_types <- unlist(lapply(strsplit(x=names(sigs), split="%"), function(x) { x[[1]] }))
cell_sources <- unlist(lapply(strsplit(x=names(sigs), split="%"), function(x) { x[[2]] }))
type_fact <- as.factor(cell_types)
types <- levels(type_fact)
celltypes_to_genes <- list()
for (c in 1:length(types)) {
type <- types[c]
idx <- cell_types == type
set <- sigs[idx]
genes <- set %>%
geneIds() %>%
unlist()
celltypes_to_genes[[type]] <- as.character(genes)
}
genes_to_celltypes <- Biobase::reverseSplit(celltypes_to_genes)
g2c_df <- data.frame(row.names=unique(names(genes_to_celltypes)))
g2c_df[["found"]] <- 0
for (c in 1:length(celltypes_to_genes)) {
celltype_name <- names(celltypes_to_genes)[[c]]
message("Starting ", c, ": ", celltype_name)
celltype_column <- as.data.frame(celltypes_to_genes[[c]])
colnames(celltype_column) <- celltype_name
rownames(celltype_column) <- make.names(celltype_column[[1]], unique=TRUE)
celltype_column[[1]] <- TRUE
g2c_df <- merge(g2c_df, celltype_column, by="row.names", all.x=TRUE)
rownames(g2c_df) <- g2c_df[[1]]
g2c_df <- g2c_df[, -1]
}
## Starting 1: aDC
## Starting 2: Adipocytes
## Starting 3: Astrocytes
## Starting 4: B-cells
## Starting 5: Basophils
## Starting 6: CD4+ memory T-cells
## Starting 7: CD4+ naive T-cells
## Starting 8: CD4+ T-cells
## Starting 9: CD4+ Tcm
## Starting 10: CD4+ Tem
## Starting 11: CD8+ naive T-cells
## Starting 12: CD8+ T-cells
## Starting 13: CD8+ Tcm
## Starting 14: CD8+ Tem
## Starting 15: cDC
## Starting 16: Chondrocytes
## Starting 17: Class-switched memory B-cells
## Starting 18: CLP
## Starting 19: CMP
## Starting 20: DC
## Starting 21: Endothelial cells
## Starting 22: Eosinophils
## Starting 23: Epithelial cells
## Starting 24: Erythrocytes
## Starting 25: Fibroblasts
## Starting 26: GMP
## Starting 27: Hepatocytes
## Starting 28: HSC
## Starting 29: iDC
## Starting 30: Keratinocytes
## Starting 31: ly Endothelial cells
## Starting 32: Macrophages
## Starting 33: Macrophages M1
## Starting 34: Macrophages M2
## Starting 35: Mast cells
## Starting 36: Megakaryocytes
## Starting 37: Melanocytes
## Starting 38: Memory B-cells
## Starting 39: MEP
## Starting 40: Mesangial cells
## Starting 41: Monocytes
## Starting 42: MPP
## Starting 43: MSC
## Starting 44: mv Endothelial cells
## Starting 45: Myocytes
## Starting 46: naive B-cells
## Starting 47: Neurons
## Starting 48: Neutrophils
## Starting 49: NK cells
## Starting 50: NKT
## Starting 51: Osteoblast
## Starting 52: pDC
## Starting 53: Pericytes
## Starting 54: Plasma cells
## Starting 55: Platelets
## Starting 56: Preadipocytes
## Starting 57: pro B-cells
## Starting 58: Sebocytes
## Starting 59: Skeletal muscle
## Starting 60: Smooth muscle
## Starting 61: Tgd cells
## Starting 62: Th1 cells
## Starting 63: Th2 cells
## Starting 64: Tregs
## found aDC Adipocytes Astrocytes B-cells Basophils CD4+ memory T-cells
## A1CF 0 NA NA NA NA NA NA
## A4GALT 0 NA NA NA NA NA NA
## AAAS 0 NA NA NA NA NA NA
## AADAC 0 NA NA NA NA NA NA
## AAK1 0 NA NA NA NA NA NA
## AAMP 0 NA NA NA NA NA TRUE
## CD4+ naive T-cells CD4+ T-cells CD4+ Tcm CD4+ Tem CD8+ naive T-cells
## A1CF NA NA NA NA NA
## A4GALT NA NA NA NA NA
## AAAS NA NA NA NA NA
## AADAC NA NA NA NA NA
## AAK1 TRUE TRUE TRUE NA NA
## AAMP NA NA NA NA NA
## CD8+ T-cells CD8+ Tcm CD8+ Tem cDC Chondrocytes
## A1CF NA NA NA NA NA
## A4GALT NA NA NA NA NA
## AAAS NA NA NA NA NA
## AADAC NA NA NA NA NA
## AAK1 TRUE NA TRUE NA NA
## AAMP NA NA NA NA NA
## Class-switched memory B-cells CLP CMP DC Endothelial cells Eosinophils
## A1CF NA NA NA NA NA NA
## A4GALT NA NA NA NA NA NA
## AAAS NA NA NA NA NA NA
## AADAC NA NA NA NA NA NA
## AAK1 NA NA NA NA NA NA
## AAMP NA NA NA NA NA NA
## Epithelial cells Erythrocytes Fibroblasts GMP Hepatocytes HSC iDC
## A1CF NA NA NA NA TRUE NA NA
## A4GALT TRUE NA NA NA NA NA NA
## AAAS NA NA NA NA NA NA NA
## AADAC NA NA NA NA TRUE NA NA
## AAK1 NA NA NA NA NA NA NA
## AAMP NA NA NA NA NA NA NA
## Keratinocytes ly Endothelial cells Macrophages Macrophages M1
## A1CF NA NA NA NA
## A4GALT NA NA NA NA
## AAAS NA NA NA NA
## AADAC NA NA NA NA
## AAK1 NA NA NA NA
## AAMP NA NA NA NA
## Macrophages M2 Mast cells Megakaryocytes Melanocytes Memory B-cells MEP
## A1CF NA NA NA NA NA NA
## A4GALT NA NA NA NA NA NA
## AAAS NA NA NA NA NA NA
## AADAC NA NA NA NA NA NA
## AAK1 NA NA NA NA NA NA
## AAMP NA NA NA NA NA NA
## Mesangial cells Monocytes MPP MSC mv Endothelial cells Myocytes
## A1CF NA NA NA NA NA NA
## A4GALT NA NA NA NA NA NA
## AAAS NA NA NA TRUE NA NA
## AADAC NA NA NA NA NA NA
## AAK1 NA NA NA NA NA NA
## AAMP NA NA NA NA NA NA
## naive B-cells Neurons Neutrophils NK cells NKT Osteoblast pDC Pericytes
## A1CF NA NA NA NA NA NA NA NA
## A4GALT NA NA NA NA NA NA NA NA
## AAAS NA NA NA NA NA NA NA NA
## AADAC NA NA NA NA NA NA NA NA
## AAK1 NA NA NA NA NA NA NA NA
## AAMP NA NA NA NA NA NA NA NA
## Plasma cells Platelets Preadipocytes pro B-cells Sebocytes
## A1CF NA NA NA NA NA
## A4GALT NA NA NA NA NA
## AAAS NA NA NA NA NA
## AADAC NA NA NA NA NA
## AAK1 NA NA NA NA NA
## AAMP NA NA NA NA NA
## Skeletal muscle Smooth muscle Tgd cells Th1 cells Th2 cells Tregs
## A1CF NA NA NA NA NA NA
## A4GALT NA NA NA NA NA NA
## AAAS NA NA NA NA NA NA
## AADAC NA NA NA NA NA NA
## AAK1 NA NA NA NA NA NA
## AAMP NA NA NA NA NA NA
Annotation for gene ontologies may be gathered from a similarly large number of sources. The following are a couple.
## Try using biomart
hs_go <- sm(load_biomart_go())
## or the org.Hs.eg.db sqlite database
tt <- sm(library("Homo.sapiens"))
hs <- Homo.sapiens
##hs_go_ensembl <- load_orgdb_go(hs, hs_annotations$geneID)
##dim(hs_go_biomart)
##dim(hs_go_ensembl)
##hs_goids <- hs_go_biomart
## While testing, I called this desc, that will need to change.
##lp_tooltips <- make_tooltips(lp_annotations)
##lb_tooltips <- make_tooltips(lb_annotations)
lp_lengths <- lp_annotations[, c("ID", "width")]
lb_lengths <- lb_annotations[, c("ID", "width")]
hs_lengths <- hs_annotations[, c("ensembl_gene_id", "cds_length")]
colnames(hs_lengths) <- c("ID", "width")
lp_goids <- read.csv(file="reference/lpan_go.txt.xz", sep="\t", header=FALSE)
lb_goids <- read.csv(file="reference/lbraz_go.txt.xz", sep="\t", header=FALSE)
colnames(lp_goids) <- c("ID","GO","ont","name","source","tag")
colnames(lb_goids) <- c("ID","GO","ont","name","source","tag")
The macrophage experiment has samples across 2 contexts, the host and parasite. The following block sets up one experiment for each. If you open the all_samples-species.xlsx files, you will note immediately that a few different attempts were made at ascertaining the most likely experimental factors that contributed to the readily apparent batch effects.
Keep in mind that if I change the experimental design with new annotations, I must therefore regenerate the following.
hs_final_annotations <- hs_annotations
hs_final_annotations <- hs_final_annotations[, c("ensembl_transcript_id", "ensembl_gene_id", "cds_length",
"hgnc_symbol", "description", "gene_biotype")]
hs_final_annotations$rn <- rownames(hs_final_annotations)
note <- "New experimental design factors by snp added 2016-09-20"
hs_final_annotations <- merge(hs_final_annotations, g2c_df,
by.x="hgnc_symbol", by.y="row.names", all.x=TRUE)
rownames(hs_final_annotations) <- hs_final_annotations$rn
hs_final_annotations$rn <- NULL
na_idx <- is.na(hs_final_annotations$xcell_types)
hs_final_annotations[na_idx, "xcell_types"] <- ""
hs_macr <- create_expt(
metadata=glue::glue("sample_sheets/macrophage_samples_{ver}.xlsx"),
gene_info=hs_final_annotations,
file_column="humanfile",
notes=note)
## Reading the sample metadata.
## Dropped 1 rows from the sample metadata because they were blank.
## The sample definitions comprises: 11 rows(samples) and 56 columns(metadata fields).
## Reading count tables.
## Reading count files with read.table().
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0241/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0242/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0243/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0244/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0245/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0246/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0247/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0248/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0637/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0638/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0639/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## Finished reading count data.
## Matched 43897 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 51041 rows and 11 columns.
hs_annotations <- fData(hs_macr)
undef_idx <- hs_annotations == "undefined"
hs_annotations[undef_idx] <- FALSE
fData(hs_macr[["expressionset"]]) <- hs_annotations
knitr::kable(head(hs_macr$design, n=1))
sampleid | pathogenstrain | experimentname | tubelabel | alias | condition | batch | anotherbatch | snpclade | snpcladev2 | snpcladev3 | pathogenstrain1 | label | donor | time | pctmappedparasite | pctcategory | state | sourcelab | expperson | pathogen | host | hostcelltype | noofhostcells | infectionperiodhpitimeofharvest | moiexposure | parasitespercell | pctinf | rnangul | rnaqcpassed | libraryconst | libqcpassed | index | descriptonandremarks | observation | lowercaseid | humanfile | parasitefile | bcftable | salmonreads | hssalmonmapped | hssalmonmaprate | lpsalmonmapped | lpsalmonmaprate | tophatpairs | hstophataligned | hstophatpct | hstophatmulti | hstophatdiscordant | hstophatconcordantpct | lptophataligned | lptophatpct | lptophatmulti | lptophatdiscordant | lpconcordantpct | variantpositions | file | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
HPGL0241 | HPGL0241 | none | macrophage | TM130-Nil (Blue label) | Nil | uninf | a | a | undef | undef | undef | none | uninf_2 | d130 | undef | undef | 0 | uninfected | Ade | Adriana | none | Human | Human macs | Max 2 mill | 2h - 24h chase period | NA | unknown | unknown | 468 | Y | Wanderson | Y | 1 | Uninfected human macrophages | NA | hpgl0241 | preprocessing/hpgl0241/outputs/tophat_hsapiens/accepted_paired.count.xz | undef | undef | 46628648 | 26156539 | 0.561 | NA | NA | 46319335 | 40905961 | 0.8831 | 1374099 | 1430888 | 0.8522 | NA | NA | NA | NA | NA | NA | null |
##cds_entries <- fData(hs_macr)
##cds_entries <- cds_entries[["gene_biotype"]] == "protein_coding"
##hs_cds_macr <- hs_macr
##hs_cds_macr$expressionset <- hs_cds_macr$expressionset[cds_entries, ]
##new_cds_entries <- fData(hs_cds_macr)
hs_cds_macr <- exclude_genes_expt(hs_macr, method="keep",
column="gene_biotype",
patterns="protein_coding")
## Before removal, there were 51041 entries.
## Now there are 18847 entries.
## Percent kept: 93.209, 95.095, 95.545, 96.588, 95.913, 96.249, 95.758, 96.106, 96.506, 96.391, 95.414
## Percent removed: 6.791, 4.905, 4.455, 3.412, 4.087, 3.751, 4.242, 3.894, 3.494, 3.609, 4.586
lp_macr <- sm(create_expt(
metadata=glue::glue("sample_sheets/macrophage_samples_{ver}.xlsx"),
gene_info=lp_annotations, file_column="parasitefile"))
knitr::kable(head(lp_macr$design, n=3),
caption="The first three rows of the parasite experimental design.")
sampleid | pathogenstrain | experimentname | tubelabel | alias | condition | batch | anotherbatch | snpclade | snpcladev2 | snpcladev3 | pathogenstrain1 | label | donor | time | pctmappedparasite | pctcategory | state | sourcelab | expperson | pathogen | host | hostcelltype | noofhostcells | infectionperiodhpitimeofharvest | moiexposure | parasitespercell | pctinf | rnangul | rnaqcpassed | libraryconst | libqcpassed | index | descriptonandremarks | observation | lowercaseid | humanfile | parasitefile | bcftable | salmonreads | hssalmonmapped | hssalmonmaprate | lpsalmonmapped | lpsalmonmaprate | tophatpairs | hstophataligned | hstophatpct | hstophatmulti | hstophatdiscordant | hstophatconcordantpct | lptophataligned | lptophatpct | lptophatmulti | lptophatdiscordant | lpconcordantpct | variantpositions | file | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
HPGL0242 | HPGL0242 | s2271 | macrophage | TM130-2271 | Self-Healing | sh | a | a | white | whitepink | right | s2271 | sh_2271 | d130 | undef | 30 | 3 | self_heal | Ade | Adriana | Lp | Human | Human macs | Max 2 mill | 2h - 24h chase period | 0.0486 | unknown | unknown | 276 | Y | Wanderson | Y | 8 | Infected human macrophages. | NA | hpgl0242 | preprocessing/hpgl0242/outputs/tophat_hsapiens/accepted_paired.count.xz | preprocessing/hpgl0242/outputs/tophat_lpanamensis/accepted_paired.count.xz | preprocessing/outputs/hpgl0242_parsed_count.txt | 42742857 | 17945935 | 0.4199 | 8023463 | 0.1877 | 42612353 | 25394266 | 0.5959 | 869649 | 784620 | 0.5775 | 13117819 | 0.3078 | 350277 | 263923 | 0.3016 | 3930 | null |
HPGL0243 | HPGL0243 | s2272 | macrophage | TM130-2272 | Self-Healing | sh | a | a | white | whitepink | right | s2272 | sh_2272 | d130 | undef | 30 | 3 | self_heal | Ade | Adriana | Lp | Human | Human macs | Max 2 mill | 2h - 24h chase period | 0.0486 | unknown | unknown | 532 | Y | Wanderson | Y | 10 | Infected human macrophages | NA | hpgl0243 | preprocessing/hpgl0243/outputs/tophat_hsapiens/accepted_paired.count.xz | preprocessing/hpgl0243/outputs/tophat_lpanamensis/accepted_paired.count.xz | preprocessing/outputs/hpgl0243_parsed_count.txt | 46796079 | 21046460 | 0.4497 | 6823750 | 0.1458 | 47344642 | 31160297 | 0.6582 | 1000248 | 924296 | 0.6386 | 11581460 | 0.2446 | 319338 | 245169 | 0.2394 | NA | null |
HPGL0244 | HPGL0244 | s5433 | macrophage | TM130-5433 | Chronic | chr | a | a | blue_self | blue | left | s5433 | chr_5433 | d130 | undef | 15 | 1 | chronic | Ade | Adriana | Lp | Human | Human macs | Max 2 mill | 2h - 24h chase period | 0.0486 | unknown | unknown | 261 | Y | Wanderson | Y | 27 | Infected human macrophages | NA | hpgl0244 | preprocessing/hpgl0244/outputs/tophat_hsapiens/accepted_paired.count.xz | preprocessing/hpgl0244/outputs/tophat_lpanamensis/accepted_paired.count.xz | preprocessing/outputs/hpgl0244_parsed_count.txt | 47150925 | 25281958 | 0.5362 | 3761371 | 0.0798 | 46925604 | 36379602 | 0.7753 | 1070964 | 991929 | 0.7541 | 5755998 | 0.1227 | 154830 | 116414 | 0.1202 | 85981 | null |
Table S1 is going to be a summary of the metadata in all_samples-combined This may also include some of the numbers regarding mapping %, etc.
Wanted columns:
This table is maintained as an excel file in the sample_sheets/ directory.
This document is concerned with analyzing RNAseq data of human and parasite during the infection of human macrophages. A few different strains of L. panamensis were used; the experiment is therefore segregated into groups named ‘self-healing’, ‘chronic’, and ‘uninfected’. Two separate sets of libraries were generated, one earlier set with greater coverage, and a later set including only 1 uninfected sample, and 2-3 chronic samples.
The following subset operation extract the samples used for the macrophage experiment. The next three lines then change the colors from the defaults.
new_colors <- c("#009900", "#990000", "#000099")
names(new_colors) <- c("uninf", "chr", "sh")
hs_macr <- set_expt_colors(hs_macr, colors=new_colors)
## The new colors are a character, changing according to condition.
labels <- as.character(pData(hs_macr)[["label"]])
hs_macr <- set_expt_samplenames(hs_macr, labels)
hs_cds_macr <- set_expt_colors(hs_cds_macr, colors=new_colors)
## The new colors are a character, changing according to condition.
In our meeting this morning (20190708), Najib and Maria Adelaida asked for how the data looks depending on batch methodology.
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 6993 low-count genes (11854 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 3 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
## Writing the image to: cpm_quant_filter_pca.png and calling dev.off().
limma_batch <- normalize_expt(hs_cds_macr, filter=TRUE,
convert="cpm", norm="quant",
transform="log2", batch="limma")
## This function will replace the expt$expressionset slot with:
## log2(limma(cpm(quant(cbcb(data)))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Step 1: performing count filter with option: cbcb
## Removing 6993 low-count genes (11854 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 3 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with limma.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 128654 entries are x>1: 99%.
## batch_counts: Before batch/surrogate estimation, 3 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 1737 entries are 0<x<1: 1%.
## The be method chose 1 surrogate variables.
## batch_counts: Using limma's removeBatchEffect to remove batch effect.
## If you receive a warning: 'NANs produced', one potential reason is that the data was quantile normalized.
## There are 2 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Writing the image to: lcqf_limma.png and calling dev.off().
pca_batch <- normalize_expt(hs_cds_macr, filter=TRUE,
convert="cpm", norm="quant",
transform="log2", batch="pca")
## This function will replace the expt$expressionset slot with:
## log2(pca(cpm(quant(cbcb(data)))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Step 1: performing count filter with option: cbcb
## Removing 6993 low-count genes (11854 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 3 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with pca.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 128654 entries are x>1: 99%.
## batch_counts: Before batch/surrogate estimation, 3 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 1737 entries are 0<x<1: 1%.
## The be method chose 1 surrogate variables.
## Attempting pca surrogate estimation with 1 surrogate.
## There are 1 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Writing the image to: lcqf_pca.png and calling dev.off().
svaseq_batch <- normalize_expt(hs_cds_macr, filter=TRUE,
convert="cpm",
transform="log2", batch="svaseq")
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 6993 low-count genes (11854 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 77 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 128099 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 77 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 2218 entries are 0<x<1: 2%.
## The be method chose 1 surrogate variables.
## Attempting svaseq estimation with 1 surrogate.
## There are 2 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Writing the image to: lcqf_svaseq.png and calling dev.off().
sva_batch <- normalize_expt(hs_cds_macr, filter=TRUE,
convert="cpm",
transform="log2", batch="fsva")
## This function will replace the expt$expressionset slot with:
## log2(fsva(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 6993 low-count genes (11854 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 77 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with fsva.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 128099 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 77 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 2218 entries are 0<x<1: 2%.
## The be method chose 1 surrogate variables.
## Attempting fsva surrogate estimation with 1 surrogate.
## There are 2 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Writing the image to: lcqf_sva.png and calling dev.off().
combat_batch <- normalize_expt(hs_cds_macr, filter=TRUE,
convert="cpm",
transform="log2", batch="combat")
## This function will replace the expt$expressionset slot with:
## log2(combat(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 6993 low-count genes (11854 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 77 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with combat.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 128099 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 77 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 2218 entries are 0<x<1: 2%.
## The be method chose 1 surrogate variables.
## batch_counts: Using combat with a prior, no scaling, and a null model.
## There are 13 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Writing the image to: lcqf_combat.png and calling dev.off().
combatnp_batch <- normalize_expt(hs_cds_macr, filter=TRUE,
convert="cpm",
transform="log2", batch="combat_noprior")
## This function will replace the expt$expressionset slot with:
## log2(combat_noprior(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 6993 low-count genes (11854 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 77 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with combat_noprior.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 128099 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 77 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 2218 entries are 0<x<1: 2%.
## The be method chose 1 surrogate variables.
## batch_counts: Using combat without a prior and no scaling.
## This takes a long time!
## There are 18 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Writing the image to: lcqf_combatnp.png and calling dev.off().
Figure S1 should include nice versions of the sample metrics. The normalization chosen is batch-in-model.
First, however, we will make some plots of the raw data.
Sample names are going to be ‘infectionstate_strainnumber’ : chr_7721
fig_s1 <- sm(write_expt(
hs_cds_macr, norm="raw", violin=FALSE, convert="cpm",
transform="log2", batch="svaseq", filter=TRUE,
excel=paste0("excel/figure_s1_sample_estimation-v", ver, ".xlsx")))
## Writing the image to: fig_s1a_libsizes.tif and calling dev.off().
## Writing the image to: fig_s1b_heatmap.tif and calling dev.off().
## Writing the image to: fig_s1c_raw_pca.tif and calling dev.off().
## Writing the image to: fig_1a_norm_pca.tif and calling dev.off().
The most likely batch method from the above is svaseq. Let us perform differential expression analyses with it along with a few other methods.
hs_contrasts <- list(
"macro_chr-sh" = c("chr","sh"),
"macro_chr-nil" = c("chr","uninf"),
"macro_sh-nil" = c("sh", "uninf"))
## Set up the data used in the comparative contrast sets.
Print a reminder of what we can expect when doing this with no batch information.
hs_macr_lowfilt <- sm(normalize_expt(hs_cds_macr, filter=TRUE))
hs_lowfilt_pca <- sm(plot_pca(hs_cds_macr, transform="log2"))
hs_lowfilt_pca$plot
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Assuming no batch in model for testing pca.
## Not putting labels on the PC plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
## wow, all tools including basic agree almost completely
medians_by_condition <- hs_macr_nobatch$basic$medians
excel_file <- glue::glue("excel/{rundate}_hs_macr_nobatch_contr-v{ver}.xlsx")
hs_macr_nobatch_tables <- sm(combine_de_tables(hs_macr_nobatch,
excel=excel_file,
keepers=hs_contrasts,
extra_annot=medians_by_condition))
hs_lowfilt_batch_pca <- sm(plot_pca(hs_cds_macr, transform="log2",
convert="cpm", batch="limma", filter=TRUE))
hs_lowfilt_batch_pca$plot
In this attempt, we add a batch factor in the experimental model and see how it does.
## Here just let all_pairwise run on filtered data and do its normal ~ 0 + condition + batch analyses
hs_macr_batch <- all_pairwise(input=hs_cds_macr, batch=TRUE, limma_method="robust")
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Using limma's removeBatchEffect to visualize with(out) batch inclusion.
## Not putting labels on the PC plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
medians_by_condition <- hs_macr_batch$basic$medians
excel_file <- glue::glue("excel/{rundate}_hs_macr_batchmodel_contr-v{ver}.xlsx")
hs_macr_batch_tables <- combine_de_tables(
hs_macr_batch,
keepers=hs_contrasts,
extra_annot=medians_by_condition,
excel=excel_file)
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on 1/3: macro_chr-sh which is: chr/sh.
## Found inverse table with sh_vs_chr
## Working on 2/3: macro_chr-nil which is: chr/uninf.
## Found inverse table with uninf_vs_chr
## Working on 3/3: macro_sh-nil which is: sh/uninf.
## Found inverse table with uninf_vs_sh
## Adding venn plots for macro_chr-sh.
## Limma expression coefficients for macro_chr-sh; R^2: 0.994; equation: y = 0.999x + 0.0129
## Deseq expression coefficients for macro_chr-sh; R^2: 0.987; equation: y = 0.959x + 0.31
## Edger expression coefficients for macro_chr-sh; R^2: 0.992; equation: y = 0.987x + 0.138
## Adding venn plots for macro_chr-nil.
## Limma expression coefficients for macro_chr-nil; R^2: 0.994; equation: y = 0.999x + 0.0129
## Deseq expression coefficients for macro_chr-nil; R^2: 0.987; equation: y = 0.959x + 0.31
## Edger expression coefficients for macro_chr-nil; R^2: 0.992; equation: y = 0.987x + 0.138
## Adding venn plots for macro_sh-nil.
## Limma expression coefficients for macro_sh-nil; R^2: 0.994; equation: y = 0.999x + 0.0129
## Deseq expression coefficients for macro_sh-nil; R^2: 0.987; equation: y = 0.959x + 0.31
## Edger expression coefficients for macro_sh-nil; R^2: 0.992; equation: y = 0.987x + 0.138
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/20200710_hs_macr_batchmodel_contr-v20200706.xlsx.
excel_file <- glue::glue("excel/{rundate}_hs_macr_batchmodel_sig-v{ver}.xlsx")
hs_macr_batch_sig <- extract_significant_genes(
hs_macr_batch_tables, excel=excel_file,
according_to="deseq")
## Writing a legend of columns.
## Printing significant genes to the file: excel/20200710_hs_macr_batchmodel_sig-v20200706.xlsx
## 1/3: Creating significant table up_deseq_macro_chr-sh
## 2/3: Creating significant table up_deseq_macro_chr-nil
## The up table macro_sh-nil is empty.
## The down table macro_sh-nil is empty.
## Adding significance bar plots.
hs_lowfilt_sva_pca <- sm(plot_pca(hs_cds_macr, transform="log2",
convert="cpm", model_batch="svaseq", filter=TRUE))
hs_lowfilt_sva_pca$plot
In this attempt, we tell all pairwise to invoke svaseq.
## batch_counts: Before batch/surrogate estimation, 130219 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 77 entries are x==0: 0%.
## The be method chose 1 surrogate variables.
## Attempting svaseq estimation with 1 surrogate.
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (11854 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 77 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 128099 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 77 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 2218 entries are 0<x<1: 2%.
## The be method chose 1 surrogate variables.
## Attempting svaseq estimation with 1 surrogate.
## There are 2 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Not putting labels on the PC plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
medians_by_condition <- hs_macr_sva$basic$medians
excel_file <- glue::glue("excel/{rundate}_hs_macr_svamodel_contr-v{ver}.xlsx")
hs_macr_sva_tables <- combine_de_tables(
hs_macr_sva,
keepers=hs_contrasts,
extra_annot=medians_by_condition,
excel=excel_file)
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on 1/3: macro_chr-sh which is: chr/sh.
## Found inverse table with sh_vs_chr
## Working on 2/3: macro_chr-nil which is: chr/uninf.
## Found inverse table with uninf_vs_chr
## Working on 3/3: macro_sh-nil which is: sh/uninf.
## Found inverse table with uninf_vs_sh
## Adding venn plots for macro_chr-sh.
## Limma expression coefficients for macro_chr-sh; R^2: 0.976; equation: y = 0.974x + 0.129
## Deseq expression coefficients for macro_chr-sh; R^2: 0.975; equation: y = 0.958x + 0.344
## Edger expression coefficients for macro_chr-sh; R^2: 0.975; equation: y = 0.958x + 0.27
## Adding venn plots for macro_chr-nil.
## Limma expression coefficients for macro_chr-nil; R^2: 0.976; equation: y = 0.974x + 0.129
## Deseq expression coefficients for macro_chr-nil; R^2: 0.975; equation: y = 0.958x + 0.344
## Edger expression coefficients for macro_chr-nil; R^2: 0.975; equation: y = 0.958x + 0.27
## Adding venn plots for macro_sh-nil.
## Limma expression coefficients for macro_sh-nil; R^2: 0.976; equation: y = 0.974x + 0.129
## Deseq expression coefficients for macro_sh-nil; R^2: 0.975; equation: y = 0.958x + 0.344
## Edger expression coefficients for macro_sh-nil; R^2: 0.975; equation: y = 0.958x + 0.27
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/20200710_hs_macr_svamodel_contr-v20200706.xlsx.
excel_file <- glue::glue("excel/{rundate}_hs_macr_svamodel_sig-v{ver}.xlsx")
hs_macr_sva_sig <- extract_significant_genes(
hs_macr_sva_tables, excel=excel_file,
according_to="deseq")
## Writing a legend of columns.
## Printing significant genes to the file: excel/20200710_hs_macr_svamodel_sig-v20200706.xlsx
## 1/3: Creating significant table up_deseq_macro_chr-sh
## 2/3: Creating significant table up_deseq_macro_chr-nil
## 3/3: Creating significant table up_deseq_macro_sh-nil
## Adding significance bar plots.
As you know, combat actually changes the data, so it will require a slightly different setup.
## This function will replace the expt$expressionset slot with:
## combat_noprior(cpm(cbcb(data)))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 6993 low-count genes (11854 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: not transforming the data.
## Step 5: doing batch correction with combat_noprior.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 128099 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 77 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 2218 entries are 0<x<1: 2%.
## The be method chose 1 surrogate variables.
## batch_counts: Using combat without a prior and no scaling.
## This takes a long time!
## There are 133 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
In this attempt, we tell all pairwise to invoke svaseq.
hs_macr_combat <- all_pairwise(input=combatnp_input, model_batch=FALSE,
force=TRUE, limma_method="robust")
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Assuming no batch in model for testing pca.
## Not putting labels on the PC plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
excel_file <- glue::glue("excel/{rundate}_hs_macr_combat_contr-v{ver}.xlsx")
hs_macr_combat_tables <- combine_de_tables(
hs_macr_combat,
keepers=hs_contrasts,
extra_annot=medians_by_condition,
excel=excel_file)
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on 1/3: macro_chr-sh which is: chr/sh.
## Found inverse table with sh_vs_chr
## Working on 2/3: macro_chr-nil which is: chr/uninf.
## Found inverse table with uninf_vs_chr
## Working on 3/3: macro_sh-nil which is: sh/uninf.
## Found inverse table with uninf_vs_sh
## Adding venn plots for macro_chr-sh.
## Limma expression coefficients for macro_chr-sh; R^2: 0.98; equation: y = 0.995x - 0.00121
## Deseq expression coefficients for macro_chr-sh; R^2: 0.979; equation: y = 0.934x + 0.334
## Edger expression coefficients for macro_chr-sh; R^2: 0.979; equation: y = 0.935x + 0.361
## Adding venn plots for macro_chr-nil.
## Limma expression coefficients for macro_chr-nil; R^2: 0.98; equation: y = 0.995x - 0.00121
## Deseq expression coefficients for macro_chr-nil; R^2: 0.979; equation: y = 0.934x + 0.334
## Edger expression coefficients for macro_chr-nil; R^2: 0.979; equation: y = 0.935x + 0.361
## Adding venn plots for macro_sh-nil.
## Limma expression coefficients for macro_sh-nil; R^2: 0.98; equation: y = 0.995x - 0.00121
## Deseq expression coefficients for macro_sh-nil; R^2: 0.979; equation: y = 0.934x + 0.334
## Edger expression coefficients for macro_sh-nil; R^2: 0.979; equation: y = 0.935x + 0.361
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/20200710_hs_macr_combat_contr-v20200706.xlsx.
excel_file <- glue::glue("excel/{rundate}_hs_macr_combat_sig-v{ver}.xlsx")
hs_macr_combat_sig <- extract_significant_genes(
hs_macr_combat_tables, excel=excel_file,
according_to="deseq")
## Writing a legend of columns.
## Printing significant genes to the file: excel/20200710_hs_macr_combat_sig-v20200706.xlsx
## 1/3: Creating significant table up_deseq_macro_chr-sh
## 2/3: Creating significant table up_deseq_macro_chr-nil
## The up table macro_sh-nil is empty.
## The down table macro_sh-nil is empty.
## Adding significance bar plots.
## Testing method: limma.
## Adding method: limma to the set.
## Testing method: deseq.
## Adding method: deseq to the set.
## Testing method: edger.
## Adding method: edger to the set.
## Starting method limma, table macro_chr-sh.
## Starting method limma, table macro_chr-nil.
## Starting method limma, table macro_sh-nil.
## Starting method deseq, table macro_chr-sh.
## Starting method deseq, table macro_chr-nil.
## Starting method deseq, table macro_sh-nil.
## Warning in cor(x = fs[["x"]], y = fs[["y"]], method = cor_method): the standard
## deviation is zero
## Starting method edger, table macro_chr-sh.
## Starting method edger, table macro_chr-nil.
## Starting method edger, table macro_sh-nil.
## macro_chr-sh macro_chr-nil macro_sh-nil
## 0.6481 0.6568 0.9817
## Testing method: limma.
## Adding method: limma to the set.
## Testing method: deseq.
## Adding method: deseq to the set.
## Testing method: edger.
## Adding method: edger to the set.
## Starting method limma, table macro_chr-sh.
## Starting method limma, table macro_chr-nil.
## Starting method limma, table macro_sh-nil.
## Starting method deseq, table macro_chr-sh.
## Starting method deseq, table macro_chr-nil.
## Starting method deseq, table macro_sh-nil.
## Starting method edger, table macro_chr-sh.
## Starting method edger, table macro_chr-nil.
## Starting method edger, table macro_sh-nil.
## macro_chr-sh macro_chr-nil macro_sh-nil
## 0.6972 0.3393 0.4762
## Testing method: limma.
## Adding method: limma to the set.
## Testing method: deseq.
## Adding method: deseq to the set.
## Testing method: edger.
## Adding method: edger to the set.
## Starting method limma, table macro_chr-sh.
## Starting method limma, table macro_chr-nil.
## Starting method limma, table macro_sh-nil.
## Starting method deseq, table macro_chr-sh.
## Starting method deseq, table macro_chr-nil.
## Starting method deseq, table macro_sh-nil.
## Starting method edger, table macro_chr-sh.
## Starting method edger, table macro_chr-nil.
## Starting method edger, table macro_sh-nil.
## macro_chr-sh macro_chr-nil macro_sh-nil
## 0.9207 0.9153 0.8649
## Testing method: limma.
## Adding method: limma to the set.
## Testing method: deseq.
## Adding method: deseq to the set.
## Testing method: edger.
## Adding method: edger to the set.
## Starting method limma, table macro_chr-sh.
## Starting method limma, table macro_chr-nil.
## Starting method limma, table macro_sh-nil.
## Starting method deseq, table macro_chr-sh.
## Starting method deseq, table macro_chr-nil.
## Starting method deseq, table macro_sh-nil.
## Starting method edger, table macro_chr-sh.
## Starting method edger, table macro_chr-nil.
## Starting method edger, table macro_sh-nil.
## macro_chr-sh macro_chr-nil macro_sh-nil
## 0.6693 0.3243 0.3767
batchmodel_volcano <- plot_volcano_de(
table=hs_macr_batch_tables[["data"]][["macro_chr-sh"]],
color_by="state",
fc_col="deseq_logfc",
p_col="deseq_adjp",
shapes_by_state=FALSE,
line_position="top")
## The color list must have 4, setting it to the default.
svamodel_volcano <- plot_volcano_de(
table=hs_macr_sva_tables[["data"]][["macro_chr-sh"]],
color_by="state",
fc_col="deseq_logfc",
p_col="deseq_adjp",
shapes_by_state=FALSE,
line_position="top")
## The color list must have 4, setting it to the default.
## Working on contrast 1: sh_vs_chr.
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Loading required package: edgeR
## Loading required package: limma
##
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## SS=3,3 SS=5,5 SS=7,7 SS=10,10
## 0.2 0.51 0.54 0.55 NaN
## 0.5 0.61 0.62 0.63 NaN
## 1 0.67 0.67 0.67 NaN
## 2 0.72 0.71 0.71 NaN
## 5 0.77 0.75 0.74 NaN
## 10 0.80 0.78 0.77 NaN
## Working on contrast 2: uninf_vs_chr.
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## SS=3,3 SS=5,5 SS=7,7 SS=10,10
## 0.2 0.54 0.56 0.56 NaN
## 0.5 0.63 0.64 0.64 NaN
## 1 0.69 0.68 0.68 NaN
## 2 0.73 0.72 0.71 NaN
## 5 0.78 0.76 0.75 NaN
## 10 0.81 0.79 0.77 NaN
## Working on contrast 3: uninf_vs_sh.
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in 1:Nreps: numerical expression has 4 elements: only the first used
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, power.lo[, j], stratas, power.hi[, j], length =
## 0.05, : zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, TD.lo[, j], stratas, TD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(stratas, FD.lo[, j], stratas, FD.hi[, j], length = 0.05, :
## zero-length arrow is of indeterminate angle and so skipped
## SS=3,3 SS=5,5 SS=7,7 SS=10,10
## 0.2 0.54 0.55 0.57 NaN
## 0.5 0.63 0.64 0.64 NaN
## 1 0.69 0.68 0.68 NaN
## 2 0.73 0.71 0.71 NaN
## 5 0.77 0.75 0.75 NaN
## 10 0.80 0.79 0.78 NaN
Recall that I made the variables ‘hs_macr_sva_sig’ and ‘hs_macr_sva_abun’ to hold the results of the most significantly changed and abundant genes.
gProfiler is my favorite tool for ontology searching, however they recently had a big update and split their code. The new version has all sorts of cool toys, but as of the last time I tried it, did not work. Thus the following is still using the older methods.
lfs_up <- hs_macr_sva_sig[["deseq"]][["ups"]][["macro_chr-sh"]]
lfs_down <- hs_macr_sva_sig[["deseq"]][["downs"]][["macro_chr-sh"]]
up_gp <- simple_gprofiler(sig_genes=lfs_up, species="hsapiens")
## Performing gProfiler GO search of 108 genes against hsapiens.
## GO search found 48 hits.
## Performing gProfiler KEGG search of 108 genes against hsapiens.
## KEGG search found 0 hits.
## Performing gProfiler REAC search of 108 genes against hsapiens.
## REAC search found 1 hits.
## Performing gProfiler MI search of 108 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 108 genes against hsapiens.
## TF search found 2 hits.
## Performing gProfiler CORUM search of 108 genes against hsapiens.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 108 genes against hsapiens.
## HP search found 0 hits.
## Performing gProfiler GO search of 55 genes against hsapiens.
## GO search found 10 hits.
## Performing gProfiler KEGG search of 55 genes against hsapiens.
## KEGG search found 2 hits.
## Performing gProfiler REAC search of 55 genes against hsapiens.
## REAC search found 0 hits.
## Performing gProfiler MI search of 55 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 55 genes against hsapiens.
## TF search found 2 hits.
## Performing gProfiler CORUM search of 55 genes against hsapiens.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 55 genes against hsapiens.
## HP search found 1 hits.
## Using the row names of your table.
## Found 108 genes out of 108 from the sig_genes in the go_db.
## Found 108 genes out of 108 from the sig_genes in the length_db.
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
## Using the row names of your table.
## Found 55 genes out of 55 from the sig_genes in the go_db.
## Found 55 genes out of 55 from the sig_genes in the length_db.
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
## simple_topgo(): Set densities=TRUE for ontology density plots.
## simple_topgo(): Set densities=TRUE for ontology density plots.
## Testing available OrgDb keytypes for the best mapping to entrez.
## Chose keytype: OMIM for all genes because it had 130 out of 61217 genes.
## Chose keytype: ENSEMBL for sig genes because it had 108 out of 108 genes.
## Unable to find the fold-change column in the de table, not doing gsea.
## Calculating GO groups.
## Found 151 MF, 527 BP, and 979 CC hits.
## Calculating enriched GO groups.
## Found 15 MF, 93 BP, and 9 CC enriched hits.
## Found 107 matches between the significant gene list and kegg universe.
## Performing KEGG analyses.
## Reading KEGG annotation online:
##
## Reading KEGG annotation online:
## Found 0 KEGG enriched hits.
## Plotting results.
## Testing available OrgDb keytypes for the best mapping to entrez.
## Chose keytype: OMIM for all genes because it had 130 out of 61217 genes.
## Chose keytype: ENSEMBL for sig genes because it had 55 out of 55 genes.
## Unable to find the fold-change column in the de table, not doing gsea.
## Calculating GO groups.
## Found 151 MF, 527 BP, and 979 CC hits.
## Calculating enriched GO groups.
## Found 16 MF, 18 BP, and 7 CC enriched hits.
## Found 54 matches between the significant gene list and kegg universe.
## Performing KEGG analyses.
## Found 0 KEGG enriched hits.
## Plotting results.
R version 4.0.0 (2020-04-24)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_US.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_US.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_US.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.UTF-8 and LC_IDENTIFICATION=C
attached base packages: grid, stats4, parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: Rgraphviz(v.2.32.0), topGO(v.2.40.0), SparseM(v.1.78), edgeR(v.3.30.3), limma(v.3.44.3), ruv(v.0.9.7.1), Homo.sapiens(v.1.3.1), TxDb.Hsapiens.UCSC.hg19.knownGene(v.3.2.2), org.Hs.eg.db(v.3.11.4), GO.db(v.3.11.4), OrganismDbi(v.1.30.0), GenomicFeatures(v.1.40.0), GenomicRanges(v.1.40.0), GenomeInfoDb(v.1.24.2), GSEABase(v.1.50.1), graph(v.1.66.0), annotate(v.1.66.0), XML(v.3.99-0.4), xCell(v.1.1.0), org.Cfasciculata.Cf.Cl.v46.eg.db(v.2020.07), org.Lmexicana.MHOMGT2001U1103.v46.eg.db(v.2020.07), org.Ldonovani.BPK282A1.v46.eg.db(v.2020.07), org.Lbraziliensis.MHOMBR75M2904.v46.eg.db(v.2020.07), org.Lpanamensis.MHOMCOL81L13.v46.eg.db(v.2020.07), org.Lmajor.Friedlin.v46.eg.db(v.2020.07), AnnotationDbi(v.1.50.1), IRanges(v.2.22.2), S4Vectors(v.0.26.1), futile.logger(v.1.4.3), hpgltools(v.1.0), testthat(v.2.3.2), Biobase(v.2.48.0) and BiocGenerics(v.0.34.0)
loaded via a namespace (and not attached): rappdirs(v.0.3.1), rtracklayer(v.1.48.0), AnnotationForge(v.1.30.1), R.methodsS3(v.1.8.0), tidyr(v.1.1.0), ggplot2(v.3.3.2), bit64(v.0.9-7), knitr(v.1.29), R.utils(v.2.9.2), DelayedArray(v.0.14.0), data.table(v.1.12.8), KEGGREST(v.1.28.0), RCurl(v.1.98-1.2), doParallel(v.1.0.15), generics(v.0.0.2), preprocessCore(v.1.50.0), callr(v.3.4.3), cowplot(v.1.0.0), lambda.r(v.1.2.4), usethis(v.1.6.1), RSQLite(v.2.2.0), europepmc(v.0.4), rBiopaxParser(v.2.28.0), bit(v.1.1-15.2), enrichplot(v.1.8.1), xml2(v.1.3.2), httpuv(v.1.5.4), SummarizedExperiment(v.1.18.1), assertthat(v.0.2.1), viridis(v.0.5.1), xfun(v.0.15), hms(v.0.5.3), evaluate(v.0.14), promises(v.1.1.1), DEoptimR(v.1.0-8), fansi(v.0.4.1), progress(v.1.2.2), caTools(v.1.18.0), dbplyr(v.1.4.4), geneplotter(v.1.66.0), igraph(v.1.2.5), DBI(v.1.1.0), purrr(v.0.3.4), ellipsis(v.0.3.1), dplyr(v.1.0.0), backports(v.1.1.8), biomaRt(v.2.44.1), vctrs(v.0.3.1), remotes(v.2.1.1), withr(v.2.2.0), ggforce(v.0.3.2), triebeard(v.0.3.0), robustbase(v.0.93-6), AnnotationHubData(v.1.18.0), GenomicAlignments(v.1.24.0), prettyunits(v.1.1.1), DOSE(v.3.14.0), crayon(v.1.3.4), genefilter(v.1.70.0), pkgconfig(v.2.0.3), labeling(v.0.3), tweenr(v.1.0.1), nlme(v.3.1-148), pkgload(v.1.1.0), devtools(v.2.3.0), rlang(v.0.4.6), lifecycle(v.0.2.0), downloader(v.0.4), BiocFileCache(v.1.12.0), directlabels(v.2020.6.17), AnnotationHub(v.2.20.0), rprojroot(v.1.3-2), polyclip(v.1.10-0), GSVA(v.1.36.2), matrixStats(v.0.56.0), Matrix(v.1.2-18), urltools(v.1.7.3), boot(v.1.3-25), base64enc(v.0.1-3), geneLenDataBase(v.1.24.0), ggridges(v.0.5.2), processx(v.3.4.3), png(v.0.1-7), viridisLite(v.0.3.0), bitops(v.1.0-6), R.oo(v.1.23.0), KernSmooth(v.2.23-17), pander(v.0.6.3), Biostrings(v.2.56.0), blob(v.1.2.1), stringr(v.1.4.0), qvalue(v.2.20.0), gProfileR(v.0.7.0), gridGraphics(v.0.5-0), scales(v.1.1.1), memoise(v.1.1.0), magrittr(v.1.5), plyr(v.1.8.6), gplots(v.3.0.4), gdata(v.2.18.0), zlibbioc(v.1.34.0), compiler(v.4.0.0), scatterpie(v.0.1.4), RColorBrewer(v.1.1-2), lme4(v.1.1-23), DESeq2(v.1.28.1), Rsamtools(v.2.4.0), cli(v.2.0.2), XVector(v.0.28.0), ps(v.1.3.3), formatR(v.1.7), MASS(v.7.3-51.6), mgcv(v.1.8-31), tidyselect(v.1.1.0), stringi(v.1.4.6), EuPathDB(v.1.6.0), highr(v.0.8), yaml(v.2.2.1), GOSemSim(v.2.14.0), askpass(v.1.1), locfit(v.1.5-9.4), ggrepel(v.0.8.2), biocViews(v.1.56.1), fastmatch(v.1.1-0), tools(v.4.0.0), rstudioapi(v.0.11), foreach(v.1.5.0), gridExtra(v.2.3), Rtsne(v.0.15), farver(v.2.0.3), ggraph(v.2.0.3), digest(v.0.6.25), rvcheck(v.0.1.8), BiocManager(v.1.30.10), pracma(v.2.2.9), shiny(v.1.5.0), quadprog(v.1.5-8), Rcpp(v.1.0.5), BiocVersion(v.3.11.1), later(v.1.1.0.1), httr(v.1.4.1), colorspace(v.1.4-1), rvest(v.0.3.5), fs(v.1.4.2), splines(v.4.0.0), RBGL(v.1.64.0), statmod(v.1.4.34), PROPER(v.1.20.0), graphlayouts(v.0.7.0), shinythemes(v.1.1.2), ggplotify(v.0.0.5), sessioninfo(v.1.1.1), xtable(v.1.8-4), jsonlite(v.1.7.0), nloptr(v.1.2.2.2), futile.options(v.1.0.1), tidygraph(v.1.2.0), corpcor(v.1.6.9), Vennerable(v.3.1.0.9000), R6(v.2.4.1), RUnit(v.0.4.32), pillar(v.1.4.4), htmltools(v.0.5.0), mime(v.0.9), glue(v.1.4.1), fastmap(v.1.0.1), minqa(v.1.2.4), clusterProfiler(v.3.16.0), BiocParallel(v.1.22.0), interactiveDisplayBase(v.1.26.3), codetools(v.0.2-16), fgsea(v.1.14.0), pkgbuild(v.1.0.8), lattice(v.0.20-41), tibble(v.3.0.2), sva(v.3.36.0), pbkrtest(v.0.4-8.6), BiasedUrn(v.1.07), curl(v.4.3), colorRamps(v.2.3), gtools(v.3.8.2), zip(v.2.0.4), openxlsx(v.4.1.5), openssl(v.1.4.2), survival(v.3.2-3), rmarkdown(v.2.3), desc(v.1.2.0), munsell(v.0.5.0), fastcluster(v.1.1.25), DO.db(v.2.9), GenomeInfoDbData(v.1.2.3), goseq(v.1.40.0), iterators(v.1.0.12), variancePartition(v.1.18.2), reshape2(v.1.4.4) and gtable(v.0.3.0)
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 29c7dde0a24277fa95ac712188385a347c14c6f1
## This is hpgltools commit: Thu Jul 9 10:37:52 2020 -0400: 29c7dde0a24277fa95ac712188385a347c14c6f1
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to macrophage_all_analyses_202007-v20200706.rda.xz