I do not yet know much of the background of these samples. My understanding is that varying titers of vaccine to a balanced set of male and female mice.
I chose to use the ~ 2020 mm38_100 genome.
<- load_biomart_annotations(species = "mmusculus")[["annotation"]] mm_annot
## The biomart annotations file already exists, loading from it.
rownames(mm_annot) <- make.names(mm_annot[["ensembl_gene_id"]], unique = TRUE)
<- grepl(x = rownames(mm_annot), pattern = "\\.")
drop_tx <- mm_annot[!drop_tx, ]
mm_annot <- load_gff_annotations("~/libraries/genome/mm38_100.gff") mm_gff
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo = TRUE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo = TRUE)
## Returning a df with 32 columns and 3899382 rows.
<- !is.na(mm_gff[["gene_id"]])
mm_gff_idx <- mm_gff[mm_gff_idx, ]
mm_gff rownames(mm_gff) <- make.names(mm_gff[["gene_id"]], unique = TRUE)
<- merge(mm_annot, mm_gff, by = "row.names", all.x = TRUE)
annotations rownames(annotations) <- annotations[["Row.names"]]
"Row.names"]] <- NULL
annotations[[rownames(annotations) <- paste0("gene:", rownames(annotations))
<- create_expt("sample_sheets/initial_metadata_20220221.xlsx",
mm_expt gene_info = annotations)
## Reading the sample metadata.
## The sample definitions comprises: 12 rows(samples) and 12 columns(metadata fields).
## Matched 25661 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 25760 features and 12 samples.
<- gsub(x = rownames(exprs(mm_expt)), pattern = "^gene:", replacement = "")
fixed_ids <- set_expt_genenames(mm_expt, ids = fixed_ids)
mm_expt
<- normalize_expt(mm_expt, transform = "log2",
mm_norm convert = "cpm", norm = "quant", filter = TRUE)
## Removing 14542 low-count genes (11218 remaining).
plot_pca(mm_norm)$plot
<- normalize_expt(mm_expt, transform = "log2", norm = "quant",
mm_nb filter = TRUE, batch = "combat")
## Removing 14542 low-count genes (11218 remaining).
## Setting 48 low elements to zero.
## transform_counts: Found 48 values equal to 0, adding 1 to the matrix.
plot_pca(mm_nb)$plot
<- subset_expt(mm_expt, subset = "batch=='female'") mm_female
## subset_expt(): There were 12, now there are 6 samples.
<- normalize_expt(mm_female, filter = TRUE, transform = "log2",
mm_female_norm norm = "quant", convert = "cpm")
## Removing 14751 low-count genes (11009 remaining).
plot_pca(mm_female_norm)$plot
<- all_pairwise(mm_female, filter = TRUE, model_batch = FALSE) female_de
##
## COPS:FliC PBS
## 3 3
<- list("vaccination" = c("COPSFliC", "PBS"))
keeper <- combine_de_tables(
female_table
female_de,keeper = keeper,
excel = "excel/mm_female_de-v202204.xlsx")
## Deleting the file excel/mm_female_de-v202204.xlsx before writing the tables.
<- extract_significant_genes(lfc = 0.6,
female_sig
female_table,excel = "excel/mm_female_sig-v202204.xlsx",
according_to = "deseq")
## Deleting the file excel/mm_female_sig-v202204.xlsx before writing the tables.
<- female_sig[["deseq"]][["ups"]][["vaccination"]]
ups rownames(ups) <- gsub(x = rownames(ups), pattern = "gene:", replacement = "")
<- female_sig[["deseq"]][["downs"]][["vaccination"]]
downs rownames(downs) <- gsub(x = rownames(downs), pattern = "gene:", replacement = "")
<- simple_gprofiler(ups, species = "mmusculus") up_gp
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
## Error in gprofiler2::gost(query = gene_ids, organism = species, evcodes = evcodes, :
## Bad request, response code 400
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
"pvalue_plots"]][["bpp_plot_over"]] up_gp[[
## NULL
"pvalue_plots"]][["mfp_plot_over"]] up_gp[[
## NULL
"pvalue_plots"]][["reactome_plot_over"]] up_gp[[
## NULL
"pvalue_plots"]][["kegg_plot_over"]] up_gp[[
## NULL
"pvalue_plots"]][["tf_plot_over"]] up_gp[[
## NULL
"pvalue_plots"]][["hp_plot_over"]] up_gp[[
## NULL
<- simple_gprofiler(downs, species = "mmusculus") down_gp
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
## Error in gprofiler2::gost(query = gene_ids, organism = species, evcodes = evcodes, :
## Bad request, response code 400
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
"pvalue_plots"]][["bpp_plot_over"]] down_gp[[
## NULL
"pvalue_plots"]][["mfp_plot_over"]] down_gp[[
## NULL
"pvalue_plots"]][["reactome_plot_over"]] down_gp[[
## NULL
"pvalue_plots"]][["kegg_plot_over"]] down_gp[[
## NULL
"pvalue_plots"]][["tf_plot_over"]] down_gp[[
## NULL
"pvalue_plots"]][["hp_plot_over"]] down_gp[[
## NULL
pp(file = "images/down_gprofiler_bp.png",
image = down_gp[["pvalue_plots"]][["bpp_plot_over"]])
dev.off()
## png
## 2
pp(file = "images/down_gprofiler_mf.png",
image = down_gp[["pvalue_plots"]][["mfp_plot_over"]])
dev.off()
## png
## 2
pp(file = "images/down_gprofiler_reactome.png",
image = down_gp[["pvalue_plots"]][["reactome_plot_over"]])
dev.off()
## png
## 2
pp(file = "images/down_gprofiler_kegg.png",
image = down_gp[["pvalue_plots"]][["kegg_plot_over"]])
dev.off()
## png
## 2
pp(file = "images/down_gprofiler_tf.png",
image = down_gp[["pvalue_plots"]][["tf_plot_over"]], height = 16, width = 8)
dev.off()
## png
## 2
pp(file = "images/down_gprofiler_hp.png",
image = down_gp[["pvalue_plots"]][["hp_plot_over"]], height = 12, width = 8)
dev.off()
## png
## 2
## down_cp <- simple_clusterprofiler(downs, orgdb="org.Mm.eg.db")
<- set_expt_conditions(mm_expt, fact = "batch") %>%
mm_mf_expt set_expt_batches(fact = "titerfactor")
##
## female male
## 6 6
##
## low much some
## 7 2 3
<- all_pairwise(mm_mf_expt, model_batch = "svaseq", filter = TRUE) mm_mf_de
##
## female male
## 6 6
## Removing 0 low-count genes (11218 remaining).
## Setting 46 low elements to zero.
## transform_counts: Found 46 values equal to 0, adding 1 to the matrix.
<- combine_de_tables(
mm_mf_table excel = glue::glue("excel/mf_comparison-v{ver}.xlsx")) mm_mf_de,
## Deleting the file excel/mf_comparison-v20230331.xlsx before writing the tables.
<- load_gmt_signatures(signatures = "reference/m2.all.v2022.1.Mm.entrez.gmt")
broad_m2
<- simple_gsva(mm_expt, signatures = broad_m2, orgdb = "org.Mm.eg.db") mm_gsva
## Converting the rownames() of the expressionset to ENTREZID.
## 4028 ENSEMBL ID's didn't have a matching ENTEREZ ID. Dropping them now.
## Before conversion, the expressionset has 25760 entries.
## After conversion, the expressionset has 22010 entries.
<- get_sig_gsva_categories(
mm_gsva_scored
mm_gsva,excel = glue::glue("excel/mm_m2_gsva-v{ver}.xlsx"))
## Length Class Mode
## title 1 -none- character
## notes 1 -none- character
## initial_metadata 12 data.frame list
## expressionset 1 ExpressionSet S4
## design 12 data.frame list
## conditions 12 factor numeric
## batches 12 factor numeric
## samplenames 12 -none- character
## colors 12 -none- character
## state 5 -none- list
## libsize 12 -none- numeric
## study 1 -none- character
## researcher 1 -none- character
## Starting limma pairwise comparison.
## libsize was not specified, this parameter has profound effects on limma's result.
## Using the libsize from expt$libsize.
## Limma step 1/6: choosing model.
## Assuming this data is similar to a micro array and not performign voom.
## Limma step 3/6: running lmFit with method: ls.
## Limma step 4/6: making and fitting contrasts with no intercept. (~ 0 + factors)
## Limma step 5/6: Running eBayes with robust = FALSE and trend = FALSE.
## Limma step 6/6: Writing limma outputs.
## Limma step 6/6: 1/1: Creating table: PBS_vs_COPSFliC. Adjust = BH
## Limma step 6/6: 1/2: Creating table: COPSFliC. Adjust = BH
## Limma step 6/6: 2/2: Creating table: PBS. Adjust = BH
## Length Class Mode
## title 1 -none- character
## notes 1 -none- character
## initial_metadata 12 data.frame list
## expressionset 1 ExpressionSet S4
## design 12 data.frame list
## conditions 12 factor numeric
## batches 12 factor numeric
## samplenames 12 -none- character
## colors 12 -none- character
## state 5 -none- list
## libsize 12 -none- numeric
## study 1 -none- character
## researcher 1 -none- character
## The factor COPS:FliC has 6 rows.
## The factor PBS has 6 rows.
## Testing each factor against the others.
## Scoring COPS:FliC against everything else.
## Scoring PBS against everything else.
## Deleting the file excel/mm_m2_gsva-v20230331.xlsx before writing the tables.
::pander(sessionInfo()) pander
R version 4.2.0 (2022-04-22)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_US.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_US.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_US.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.UTF-8 and LC_IDENTIFICATION=C
attached base packages: stats4, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: org.Hs.eg.db(v.3.16.0), AnnotationDbi(v.1.60.0), org.Mm.eg.db(v.3.16.0), ruv(v.0.9.7.1), hpgltools(v.1.0), testthat(v.3.1.6), reticulate(v.1.28), SummarizedExperiment(v.1.28.0), GenomicRanges(v.1.50.2), GenomeInfoDb(v.1.34.9), IRanges(v.2.32.0), S4Vectors(v.0.36.1), MatrixGenerics(v.1.10.0), matrixStats(v.0.63.0), Biobase(v.2.58.0) and BiocGenerics(v.0.44.0)
loaded via a namespace (and not attached): rappdirs(v.0.3.3), rtracklayer(v.1.58.0), R.methodsS3(v.1.8.2), tidyr(v.1.3.0), ggplot2(v.3.4.1), clusterGeneration(v.1.3.7), bit64(v.4.0.5), knitr(v.1.42), irlba(v.2.3.5.1), R.utils(v.2.12.2), DelayedArray(v.0.24.0), data.table(v.1.14.6), KEGGREST(v.1.38.0), RCurl(v.1.98-1.10), doParallel(v.1.0.17), generics(v.0.1.3), ScaledMatrix(v.1.6.0), preprocessCore(v.1.60.2), GenomicFeatures(v.1.50.4), callr(v.3.7.3), RhpcBLASctl(v.0.23-42), cowplot(v.1.1.1), usethis(v.2.1.6), RSQLite(v.2.2.20), shadowtext(v.0.1.2), bit(v.4.0.5), enrichplot(v.1.18.3), xml2(v.1.3.3), httpuv(v.1.6.8), assertthat(v.0.2.1), viridis(v.0.6.2), xfun(v.0.37), hms(v.1.1.2), jquerylib(v.0.1.4), IHW(v.1.26.0), evaluate(v.0.20), promises(v.1.2.0.1), DEoptimR(v.1.0-11), fansi(v.1.0.4), restfulr(v.0.0.15), progress(v.1.2.2), caTools(v.1.18.2), dbplyr(v.2.3.0), geneplotter(v.1.76.0), igraph(v.1.4.0), DBI(v.1.1.3), htmlwidgets(v.1.6.1), purrr(v.1.0.1), ellipsis(v.0.3.2), crosstalk(v.1.2.0), dplyr(v.1.1.0), backports(v.1.4.1), gprofiler2(v.0.2.1), annotate(v.1.76.0), aod(v.1.3.2), sparseMatrixStats(v.1.10.0), biomaRt(v.2.54.0), SingleCellExperiment(v.1.20.0), vctrs(v.0.5.2), remotes(v.2.4.2), cachem(v.1.0.6), withr(v.2.5.0), ggforce(v.0.4.1), HDO.db(v.0.99.1), robustbase(v.0.95-0), GenomicAlignments(v.1.34.0), treeio(v.1.22.0), fdrtool(v.1.2.17), prettyunits(v.1.1.1), DOSE(v.3.24.2), ape(v.5.6-2), lazyeval(v.0.2.2), crayon(v.1.5.2), genefilter(v.1.80.3), slam(v.0.1-50), labeling(v.0.4.2), edgeR(v.3.40.2), pkgconfig(v.2.0.3), tweenr(v.2.0.2), nlme(v.3.1-162), pkgload(v.1.3.2), devtools(v.2.4.5), rlang(v.1.0.6), lifecycle(v.1.0.3), miniUI(v.0.1.1.1), downloader(v.0.4), filelock(v.1.0.2), BiocFileCache(v.2.6.0), rsvd(v.1.0.5), rprojroot(v.2.0.3), polyclip(v.1.10-4), GSVA(v.1.46.0), graph(v.1.76.0), Matrix(v.1.5-3), aplot(v.0.1.9), lpsymphony(v.1.26.3), Rhdf5lib(v.1.20.0), boot(v.1.3-28.1), processx(v.3.8.0), png(v.0.1-8), viridisLite(v.0.4.1), rjson(v.0.2.21), bitops(v.1.0-7), R.oo(v.1.25.0), gson(v.0.0.9), rhdf5filters(v.1.10.0), KernSmooth(v.2.23-20), pander(v.0.6.5), Biostrings(v.2.66.0), DelayedMatrixStats(v.1.20.0), blob(v.1.2.3), stringr(v.1.5.0), qvalue(v.2.30.0), remaCor(v.0.0.11), gridGraphics(v.0.5-1), beachmat(v.2.14.0), scales(v.1.2.1), memoise(v.2.0.1), GSEABase(v.1.60.0), magrittr(v.2.0.3), plyr(v.1.8.8), gplots(v.3.1.3), zlibbioc(v.1.44.0), compiler(v.4.2.0), scatterpie(v.0.1.8), BiocIO(v.1.8.0), RColorBrewer(v.1.1-3), lme4(v.1.1-31), DESeq2(v.1.38.3), Rsamtools(v.2.14.0), cli(v.3.6.0), XVector(v.0.38.0), urlchecker(v.1.0.1), patchwork(v.1.1.2), ps(v.1.7.2), MASS(v.7.3-58.2), mgcv(v.1.8-41), tidyselect(v.1.2.0), stringi(v.1.7.12), highr(v.0.10), yaml(v.2.3.7), GOSemSim(v.2.24.0), BiocSingular(v.1.14.0), locfit(v.1.5-9.7), ggrepel(v.0.9.3), grid(v.4.2.0), sass(v.0.4.5), fastmatch(v.1.1-3), tools(v.4.2.0), parallel(v.4.2.0), rstudioapi(v.0.14), foreach(v.1.5.2), gridExtra(v.2.3), farver(v.2.1.1), ggraph(v.2.1.0), digest(v.0.6.31), shiny(v.1.7.4), Rcpp(v.1.0.10), broom(v.1.0.3), later(v.1.3.0), httr(v.1.4.4), Rdpack(v.2.4), colorspace(v.2.1-0), brio(v.1.1.3), XML(v.3.99-0.13), fs(v.1.6.1), splines(v.4.2.0), RBGL(v.1.74.0), yulab.utils(v.0.0.6), PROPER(v.1.30.0), tidytree(v.0.4.2), graphlayouts(v.0.8.4), ggplotify(v.0.1.0), plotly(v.4.10.1), sessioninfo(v.1.2.2), xtable(v.1.8-4), jsonlite(v.1.8.4), nloptr(v.2.0.3), ggtree(v.3.6.2), tidygraph(v.1.2.3), corpcor(v.1.6.10), ggfun(v.0.0.9), Vennerable(v.3.1.0.9000), R6(v.2.5.1), RUnit(v.0.4.32), profvis(v.0.3.7), pillar(v.1.8.1), htmltools(v.0.5.4), mime(v.0.12), glue(v.1.6.2), fastmap(v.1.1.0), minqa(v.1.2.5), clusterProfiler(v.4.6.0), BiocParallel(v.1.32.5), codetools(v.0.2-19), fgsea(v.1.24.0), pkgbuild(v.1.4.0), mvtnorm(v.1.1-3), utf8(v.1.2.3), lattice(v.0.20-45), bslib(v.0.4.2), tibble(v.3.1.8), sva(v.3.46.0), pbkrtest(v.0.5.2), curl(v.5.0.0), gtools(v.3.9.4), zip(v.2.2.2), GO.db(v.3.16.0), openxlsx(v.4.2.5.2), survival(v.3.5-3), limma(v.3.54.1), rmarkdown(v.2.20), desc(v.1.4.2), munsell(v.0.5.0), fastcluster(v.1.2.3), rhdf5(v.2.42.0), GenomeInfoDbData(v.1.2.9), iterators(v.1.0.14), HDF5Array(v.1.26.0), variancePartition(v.1.28.4), reshape2(v.1.4.4), gtable(v.0.3.1) and rbibutils(v.2.2.13)
message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 465cd7740fb559303acb139de1d478adfc3f6612
## This is hpgltools commit: Fri Mar 24 14:37:21 2023 -0400: 465cd7740fb559303acb139de1d478adfc3f6612
<- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
this_save message(paste0("Saving to ", this_save))
## Saving to index-v20230331.rda.xz
<- sm(saveme(filename=this_save)) tmp
<- loadme(filename = this_save) tmp