Sadly, the authors of the various ontology tools I use (goseq/clusterprofiler/topgo/gostats/gprofiler) keep changing the input requirements and make it hard for me to keep up. Lets see how well I did.
I want to search the set of up/down genes using goseq, since they changed how they accept annotations, I am changing the goseq search to match the new methods. I think they will be better but am not yet certain.
go_db <- mm_go[["go"]]
deseq_sigup <- isc_mm_sig$deseq$ups[[1]]
deseq_sigdown <- isc_mm_sig$deseq$downs[[1]]
goseq_up <- simple_goseq(sig_genes=deseq_sigup, go_db=go_db, length_db=mm_lengths,
excel=paste0("excel/goseq_up-v", ver, ".xlsx"))
## Using the row names of your table.
## Found 193 genes out of 198 from the sig_genes in the go_db.
## Found 198 genes out of 198 from the sig_genes in the length_db.
##
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## Using GO.db to extract terms and categories.
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colMeans,
## colnames, colSums, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax,
## pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
## rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
## Writing data to: excel/goseq_up-v20170614.xlsx.
goseq_down <- simple_goseq(sig_genes=deseq_sigup, go_db=go_db, length_db=mm_lengths,
excel=paste0("excel/goseq_down-v", ver, ".xlsx"))
## Using the row names of your table.
## Found 193 genes out of 198 from the sig_genes in the go_db.
## Found 198 genes out of 198 from the sig_genes in the length_db.
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## simple_goseq(): Calculating q-values
## Using GO.db to extract terms and categories.
## simple_goseq(): Filling godata with terms, this is slow.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## simple_goseq(): Making pvalue plots for the ontologies.
## Writing data to: excel/goseq_down-v20170614.xlsx.
gprofiler_up <- simple_gprofiler(sig_genes=deseq_sigup, species="mmusculus",
first_col="deseq_logfc",
excel=paste0("excel/gprofiler_up-v", ver, ".xlsx"))
## Performing gProfiler GO search of 198 genes against mmusculus.
## GO search found 260 hits.
## Performing gProfiler KEGG search of 198 genes against mmusculus.
## KEGG search found 6 hits.
## Performing gProfiler REAC search of 198 genes against mmusculus.
## REAC search found 0 hits.
## Performing gProfiler MI search of 198 genes against mmusculus.
## MI search found 1 hits.
## Performing gProfiler TF search of 198 genes against mmusculus.
## TF search found 9 hits.
## Performing gProfiler CORUM search of 198 genes against mmusculus.
## CORUM search found 1 hits.
## Performing gProfiler HP search of 198 genes against mmusculus.
## HP search found 47 hits.
## Writing data to: excel/gprofiler_up-v20170614.xlsx.
## Finished writing data.
gprofiler_down <- simple_gprofiler(sig_genes=deseq_sigup, species="mmusculus",
first_col="deseq_logfc",
excel=paste0("excel/gprofiler_down-v", ver, ".xlsx"))
## Performing gProfiler GO search of 198 genes against mmusculus.
## GO search found 260 hits.
## Performing gProfiler KEGG search of 198 genes against mmusculus.
## KEGG search found 6 hits.
## Performing gProfiler REAC search of 198 genes against mmusculus.
## REAC search found 0 hits.
## Performing gProfiler MI search of 198 genes against mmusculus.
## MI search found 1 hits.
## Performing gProfiler TF search of 198 genes against mmusculus.
## TF search found 9 hits.
## Performing gProfiler CORUM search of 198 genes against mmusculus.
## CORUM search found 1 hits.
## Performing gProfiler HP search of 198 genes against mmusculus.
## HP search found 47 hits.
## Writing data to: excel/gprofiler_down-v20170614.xlsx.
## Finished writing data.
pander::pander(sessionInfo())
R version 3.4.4 (2018-03-15)
**Platform:** x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: grid, parallel, stats4, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: Vennerable(v.3.1.0.9000), Rgraphviz(v.2.22.0), graph(v.1.56.0), SparseM(v.1.77), topGO(v.2.30.1), GO.db(v.3.5.0), AnnotationDbi(v.1.40.0), IRanges(v.2.12.0), S4Vectors(v.0.16.0), Biobase(v.2.38.0), BiocGenerics(v.0.24.0) and hpgltools(v.2018.03)
loaded via a namespace (and not attached): minqa(v.1.2.4), colorspace(v.1.3-2), colorRamps(v.2.3), rprojroot(v.1.3-2), qvalue(v.2.10.0), htmlTable(v.1.11.2), XVector(v.0.18.0), GenomicRanges(v.1.30.3), base64enc(v.0.1-3), rstudioapi(v.0.7), roxygen2(v.6.0.1), ggrepel(v.0.7.0), bit64(v.0.9-7), xml2(v.1.2.0), codetools(v.0.2-15), splines(v.3.4.4), doParallel(v.1.0.11), robustbase(v.0.92-8), geneplotter(v.1.56.0), knitr(v.1.20), Formula(v.1.2-2), gProfileR(v.0.6.4), nloptr(v.1.0.4), Rsamtools(v.1.30.0), pbkrtest(v.0.4-7), annotate(v.1.56.1), cluster(v.2.0.6), geneLenDataBase(v.1.14.0), httr(v.1.3.1), compiler(v.3.4.4), backports(v.1.1.2), assertthat(v.0.2.0), Matrix(v.1.2-12), lazyeval(v.0.2.1), limma(v.3.34.9), prettyunits(v.1.0.2), acepack(v.1.4.1), htmltools(v.0.3.6), tools(v.3.4.4), gtable(v.0.2.0), GenomeInfoDbData(v.1.0.0), reshape2(v.1.4.3), Rcpp(v.0.12.16), Biostrings(v.2.46.0), gdata(v.2.18.0), nlme(v.3.1-131.1), rtracklayer(v.1.38.3), iterators(v.1.0.9), stringr(v.1.3.0), openxlsx(v.4.0.17), lme4(v.1.1-15), gtools(v.3.5.0), devtools(v.1.13.5), XML(v.3.98-1.10), DEoptimR(v.1.0-8), directlabels(v.2017.03.31), zlibbioc(v.1.24.0), MASS(v.7.3-49), scales(v.0.5.0.9000), RBGL(v.1.54.0), SummarizedExperiment(v.1.8.1), RColorBrewer(v.1.1-2), yaml(v.2.1.18), memoise(v.1.1.0), goseq(v.1.30.0), gridExtra(v.2.3), pander(v.0.6.1), ggplot2(v.2.2.1), biomaRt(v.2.34.2), rpart(v.4.1-13), latticeExtra(v.0.6-28), stringi(v.1.1.7), RSQLite(v.2.0), genefilter(v.1.60.0), foreach(v.1.4.4), RMySQL(v.0.10.14), checkmate(v.1.8.5), GenomicFeatures(v.1.30.3), caTools(v.1.17.1), BiocParallel(v.1.12.0), GenomeInfoDb(v.1.14.0), rlang(v.0.2.0.9001), pkgconfig(v.2.0.1), commonmark(v.1.4), matrixStats(v.0.53.1), bitops(v.1.0-6), evaluate(v.0.10.1), lattice(v.0.20-35), labeling(v.0.3), GenomicAlignments(v.1.14.1), htmlwidgets(v.1.0), bit(v.1.1-12), plyr(v.1.8.4), magrittr(v.1.5), variancePartition(v.1.8.1), DESeq2(v.1.18.1), R6(v.2.2.2), gplots(v.3.0.1), Hmisc(v.4.1-1), DelayedArray(v.0.4.1), DBI(v.0.8), pillar(v.1.2.1), foreign(v.0.8-69), withr(v.2.1.2), mgcv(v.1.8-23), survival(v.2.41-3), RCurl(v.1.95-4.10), nnet(v.7.3-12), tibble(v.1.4.2), KernSmooth(v.2.23-15), rmarkdown(v.1.9), progress(v.1.1.2), locfit(v.1.5-9.1), data.table(v.1.10.4-3), blob(v.1.1.0), digest(v.0.6.15), xtable(v.1.8-2), munsell(v.0.4.3), BiasedUrn(v.1.07) and quadprog(v.1.5-5)
message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 7de4503f6bb5724c28cce24af5dbee22bb1c0cae
## R> packrat::restore()
## This is hpgltools commit: Thu Apr 12 22:08:53 2018 -0400: 7de4503f6bb5724c28cce24af5dbee22bb1c0cae
message(paste0("Saving to ", savefile))
## Saving to gene_ontology_mmusculus_v20170614_v20170614.rda.xz
tmp <- sm(saveme(filename=savefile))