I do not yet know much of the background of these samples. My understanding is that varying titers of vaccine to a balanced set of male and female mice.
I chose to use the ~ 2020 mm38_100 genome.
load_biomart_annotations(species="mmusculus")[["annotation"]] mm_annot <-
## The biomart annotations file already exists, loading from it.
rownames(mm_annot) <- make.names(mm_annot[["ensembl_gene_id"]], unique=TRUE)
grepl(x=rownames(mm_annot), pattern="\\.")
drop_tx <- mm_annot[!drop_tx, ]
mm_annot <- load_gff_annotations("~/libraries_fs/genome/mm38_100.gff") %>%
mm_gff <- filter(biotype == 'protein_coding')
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo = TRUE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo = TRUE)
## Returning a df with 32 columns and 3899382 rows.
!is.na(mm_gff[["gene_id"]])
mm_gff_idx <- mm_gff[mm_gff_idx, ]
mm_gff <-rownames(mm_gff) <- make.names(mm_gff[["gene_id"]], unique=TRUE)
dim(mm_gff)
## [1] 21936 32
merge(mm_annot, mm_gff, by="row.names", all.x=TRUE)
annotations <-rownames(annotations) <- annotations[["Row.names"]]
"Row.names"]] <- NULL
annotations[[rownames(annotations) <- paste0("gene:", rownames(annotations))
create_expt("sample_sheets/initial_metadata_20220221.xlsx", gene_info=annotations) mm_expt <-
## Reading the sample metadata.
## The sample definitions comprises: 12 rows(samples) and 12 columns(metadata fields).
## Matched 25661 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 25760 rows and 12 columns.
normalize_expt(mm_expt, transform="log2", convert="cpm", norm="quant", filter=TRUE) mm_norm <-
## Removing 14542 low-count genes (11218 remaining).
plot_pca(mm_norm)$plot
normalize_expt(mm_expt, transform="log2", convert="cpm", norm="quant", filter=TRUE, batch="svaseq") mm_nb <-
## Warning in normalize_expt(mm_expt, transform = "log2", convert = "cpm", :
## Quantile normalization and sva do not always play well together.
## Removing 14542 low-count genes (11218 remaining).
## batch_counts: Before batch/surrogate estimation, 0 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 1706 entries are 0<x<1: 1%.
## Setting 40 low elements to zero.
## transform_counts: Found 40 values equal to 0, adding 1 to the matrix.
plot_pca(mm_nb)$plot
It looks like the sample ‘9R’ is a significant outlier.
I sort of assume that mouse 9R will confuse any differential expression analysis.
all_pairwise(mm_expt, filter=TRUE, model_batch="svaseq") vaccine_de <-
## batch_counts: Before batch/surrogate estimation, 34 entries are x==0: 0%.
## Plotting a PCA before surrogate/batch inclusion.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## Removing 0 low-count genes (11218 remaining).
## batch_counts: Before batch/surrogate estimation, 34 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 1743 entries are 0<x<1: 1%.
## Setting 52 low elements to zero.
## transform_counts: Found 52 values equal to 0, adding 1 to the matrix.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
list("vaccinated" = c("COPSFliC", "PBS"))
keeper <- combine_de_tables(vaccine_de, keepers=keeper,
vaccine_table <-excel="excel/vaccine_table.xlsx")
## Deleting the file excel/vaccine_table.xlsx before writing the tables.
set_expt_conditions(mm_expt, fact="titerfactor")
mm_titer <- normalize_expt(mm_titer, transform="log2", convert="cpm", norm="quant", filter=TRUE) mm_titer_norm <-
## Removing 14542 low-count genes (11218 remaining).
plot_pca(mm_titer_norm)$plot
I am intrigued at how significantly the sex of the mice appears to dominate the variance among the samples.
set_expt_conditions(mm_expt, fact="batch") %>%
mm_sex <- set_expt_batches(fact="titerfactor")
all_pairwise(mm_sex) sex_de <-
## Plotting a PCA before surrogate/batch inclusion.
## Using limma's removeBatchEffect to visualize with(out) batch inclusion.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
combine_de_tables(sex_de, excel="excel/mm_sex.xlsx") mm_table <-
## Deleting the file excel/mm_sex.xlsx before writing the tables.
::pander(sessionInfo()) pander
R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_US.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_US.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_US.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.UTF-8 and LC_IDENTIFICATION=C
attached base packages: stats4, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: ruv(v.0.9.7.1), hpgltools(v.1.0), testthat(v.3.1.1), SummarizedExperiment(v.1.24.0), GenomicRanges(v.1.46.1), GenomeInfoDb(v.1.30.0), IRanges(v.2.28.0), S4Vectors(v.0.32.3), MatrixGenerics(v.1.6.0), matrixStats(v.0.61.0), Biobase(v.2.54.0) and BiocGenerics(v.0.40.0)
loaded via a namespace (and not attached): rappdirs(v.0.3.3), rtracklayer(v.1.54.0), R.methodsS3(v.1.8.1), tidyr(v.1.1.4), ggplot2(v.3.3.5), bit64(v.4.0.5), knitr(v.1.37), DelayedArray(v.0.20.0), R.utils(v.2.11.0), data.table(v.1.14.2), KEGGREST(v.1.34.0), RCurl(v.1.98-1.5), doParallel(v.1.0.16), generics(v.0.1.1), GenomicFeatures(v.1.46.1), preprocessCore(v.1.56.0), callr(v.3.7.0), usethis(v.2.1.5), RSQLite(v.2.2.9), shadowtext(v.0.1.0), bit(v.4.0.4), enrichplot(v.1.14.1), xml2(v.1.3.3), httpuv(v.1.6.4), assertthat(v.0.2.1), viridis(v.0.6.2), xfun(v.0.29), hms(v.1.1.1), jquerylib(v.0.1.4), evaluate(v.0.14), promises(v.1.2.0.1), IHW(v.1.22.0), DEoptimR(v.1.0-9), fansi(v.0.5.0), restfulr(v.0.0.13), progress(v.1.2.2), caTools(v.1.18.2), dbplyr(v.2.1.1), igraph(v.1.2.10), DBI(v.1.1.2), geneplotter(v.1.72.0), htmlwidgets(v.1.5.4), purrr(v.0.3.4), ellipsis(v.0.3.2), dplyr(v.1.0.7), backports(v.1.4.1), annotate(v.1.72.0), biomaRt(v.2.50.1), vctrs(v.0.3.8), remotes(v.2.4.2), cachem(v.1.0.6), withr(v.2.4.3), ggforce(v.0.3.3), robustbase(v.0.93-9), GenomicAlignments(v.1.30.0), treeio(v.1.18.1), fdrtool(v.1.2.17), prettyunits(v.1.1.1), DOSE(v.3.20.1), ape(v.5.6), lazyeval(v.0.2.2), crayon(v.1.4.2), genefilter(v.1.76.0), edgeR(v.3.36.0), pkgconfig(v.2.0.3), slam(v.0.1-49), labeling(v.0.4.2), tweenr(v.1.0.2), nlme(v.3.1-153), pkgload(v.1.2.4), devtools(v.2.4.3), rlang(v.0.4.12), lifecycle(v.1.0.1), downloader(v.0.4), filelock(v.1.0.2), BiocFileCache(v.2.2.0), rprojroot(v.2.0.2), polyclip(v.1.10-0), graph(v.1.72.0), Matrix(v.1.3-4), aplot(v.0.1.1), lpsymphony(v.1.22.0), boot(v.1.3-28), processx(v.3.5.2), png(v.0.1-7), viridisLite(v.0.4.0), rjson(v.0.2.20), bitops(v.1.0-7), R.oo(v.1.24.0), KernSmooth(v.2.23-20), pander(v.0.6.4), Biostrings(v.2.62.0), blob(v.1.2.2), stringr(v.1.4.0), qvalue(v.2.26.0), gridGraphics(v.0.5-1), scales(v.1.1.1), memoise(v.2.0.1), magrittr(v.2.0.1), plyr(v.1.8.6), gplots(v.3.1.1), zlibbioc(v.1.40.0), compiler(v.4.1.2), scatterpie(v.0.1.7), BiocIO(v.1.4.0), RColorBrewer(v.1.1-2), lme4(v.1.1-27.1), DESeq2(v.1.34.0), Rsamtools(v.2.10.0), cli(v.3.1.0), XVector(v.0.34.0), patchwork(v.1.1.1), ps(v.1.6.0), MASS(v.7.3-54), mgcv(v.1.8-38), tidyselect(v.1.1.1), stringi(v.1.7.6), highr(v.0.9), yaml(v.2.2.1), GOSemSim(v.2.20.0), locfit(v.1.5-9.4), ggrepel(v.0.9.1), grid(v.4.1.2), sass(v.0.4.0), fastmatch(v.1.1-3), tools(v.4.1.2), parallel(v.4.1.2), rstudioapi(v.0.13), foreach(v.1.5.1), gridExtra(v.2.3), farver(v.2.1.0), ggraph(v.2.0.5), digest(v.0.6.29), shiny(v.1.7.1), Rcpp(v.1.0.7), broom(v.0.7.10), later(v.1.3.0), httr(v.1.4.2), AnnotationDbi(v.1.56.2), colorspace(v.2.0-2), XML(v.3.99-0.8), fs(v.1.5.2), splines(v.4.1.2), yulab.utils(v.0.0.4), RBGL(v.1.70.0), PROPER(v.1.26.0), tidytree(v.0.3.6), graphlayouts(v.0.7.2), ggplotify(v.0.1.0), plotly(v.4.10.0), sessioninfo(v.1.2.2), xtable(v.1.8-4), jsonlite(v.1.7.2), nloptr(v.1.2.2.3), ggtree(v.3.2.1), tidygraph(v.1.2.0), corpcor(v.1.6.10), ggfun(v.0.0.4), R6(v.2.5.1), Vennerable(v.3.1.0.9000), pillar(v.1.6.4), htmltools(v.0.5.2), mime(v.0.12), glue(v.1.6.0), fastmap(v.1.1.0), minqa(v.1.2.4), clusterProfiler(v.4.2.1), BiocParallel(v.1.28.3), codetools(v.0.2-18), fgsea(v.1.20.0), pkgbuild(v.1.3.1), utf8(v.1.2.2), lattice(v.0.20-45), bslib(v.0.3.1), tibble(v.3.1.6), sva(v.3.42.0), pbkrtest(v.0.5.1), curl(v.4.3.2), gtools(v.3.9.2), zip(v.2.2.0), GO.db(v.3.14.0), openxlsx(v.4.2.5), survival(v.3.2-13), limma(v.3.50.0), rmarkdown(v.2.11), desc(v.1.4.0), munsell(v.0.5.0), DO.db(v.2.9), GenomeInfoDbData(v.1.2.7), iterators(v.1.0.13), variancePartition(v.1.24.0), reshape2(v.1.4.4) and gtable(v.0.3.0)
message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset cc2fec1cb27347df846cacd2578dfa1a9312f14e
## This is hpgltools commit: Wed Feb 23 13:53:56 2022 -0500: cc2fec1cb27347df846cacd2578dfa1a9312f14e
paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
this_save <-message(paste0("Saving to ", this_save))
## Saving to index-v20220228.rda.xz
sm(saveme(filename=this_save)) tmp <-