index.html preprocessing.html

1 Gene Ontology Searches, Mus musculus: 20170703

Sadly, the authors of the various ontology tools I use (goseq/clusterprofiler/topgo/gostats/gprofiler) keep changing the input requirements and make it hard for me to keep up. Lets see how well I did.

1.1 Searching with goseq

I want to search the set of up/down genes using goseq, since they changed how they accept annotations, I am changing the goseq search to match the new methods. I think they will be better but am not yet certain.

deseq_sigup <- isc_mm_sig$deseq$ups[[1]]
deseq_sigdown <- isc_mm_sig$deseq$downs[[1]]

goseq_up <- sm(simple_goseq(sig_genes=deseq_sigup, go_db=mm_go, length_db=mm_lengths))
goseq_down <- sm(simple_goseq(sig_genes=deseq_sigup, go_db=mm_go, length_db=mm_lengths))

gprofiler_up <- sm(simple_gprofiler(sig_genes=deseq_sigup, species="mmusculus",
                                    first_col="deseq_logfc"))
gprofiler_down <- sm(simple_gprofiler(sig_genes=deseq_sigup, species="mmusculus",
                                      first_col="deseq_logfc"))

goseq_up_write <- sm(write_goseq_data(goseq_up,
                                      excel=paste0("tables/goseq_deseq_sigup-v", ver, ".xlsx")))

goseq_down_write <- sm(write_goseq_data(goseq_down,
                                        excel=paste0("tables/goseq_deseq_sigdown-v", ver, ".xlsx")))

gprofiler_up_write <- sm(write_gprofiler_data(gprofiler_result=gprofiler_up,
                                              excel=paste0("tables/gprofiler_deseq_sigup-v", ver, ".xlsx")))
gprofiler_down_write <- sm(write_gprofiler_data(gprofiler_result=gprofiler_down,
                                                excel=paste0("tables/gprofiler_deseq_sigdown-v", ver, ".xlsx")))

index.html sample_estimation.html

pander::pander(sessionInfo())

R version 3.3.3 (2017-03-06)

**Platform:** x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C

attached base packages: grid, parallel, stats4, stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: Vennerable(v.3.1.0.9000), Rgraphviz(v.2.18.0), graph(v.1.52.0), SparseM(v.1.77), topGO(v.2.26.0), GO.db(v.3.4.0), AnnotationDbi(v.1.36.2), IRanges(v.2.8.2), S4Vectors(v.0.12.2), Biobase(v.2.34.0), BiocGenerics(v.0.20.0) and hpgltools(v.2017.01)

loaded via a namespace (and not attached): minqa(v.1.2.4), colorspace(v.1.3-2), colorRamps(v.2.3), rprojroot(v.1.2), qvalue(v.2.6.0), htmlTable(v.1.9), XVector(v.0.14.1), GenomicRanges(v.1.26.4), base64enc(v.0.1-3), roxygen2(v.6.0.1), ggrepel(v.0.6.5), bit64(v.0.9-7), xml2(v.1.1.1), codetools(v.0.2-15), splines(v.3.3.3), doParallel(v.1.0.10), robustbase(v.0.92-7), geneplotter(v.1.52.0), knitr(v.1.16), Formula(v.1.2-1), nloptr(v.1.0.4), gProfileR(v.0.6.1), Rsamtools(v.1.26.2), pbkrtest(v.0.4-7), annotate(v.1.52.1), cluster(v.2.0.6), geneLenDataBase(v.1.10.0), backports(v.1.1.0), Matrix(v.1.2-10), lazyeval(v.0.2.0), limma(v.3.30.13), acepack(v.1.4.1), htmltools(v.0.3.6), tools(v.3.3.3), gtable(v.0.2.0), reshape2(v.1.4.2), Rcpp(v.0.12.11), Biostrings(v.2.42.1), gdata(v.2.18.0), nlme(v.3.1-131), rtracklayer(v.1.34.2), iterators(v.1.0.8), stringr(v.1.2.0), openxlsx(v.4.0.17), testthat(v.1.0.2), lme4(v.1.1-13), gtools(v.3.5.0), devtools(v.1.13.2), XML(v.3.98-1.9), DEoptimR(v.1.0-8), zlibbioc(v.1.20.0), MASS(v.7.3-47), scales(v.0.4.1), RBGL(v.1.50.0), SummarizedExperiment(v.1.4.0), RColorBrewer(v.1.1-2), yaml(v.2.1.14), memoise(v.1.1.0), goseq(v.1.26.0), gridExtra(v.2.2.1), pander(v.0.6.0), ggplot2(v.2.2.1), biomaRt(v.2.30.0), rpart(v.4.1-11), latticeExtra(v.0.6-28), stringi(v.1.1.5), RSQLite(v.2.0), genefilter(v.1.56.0), foreach(v.1.4.3), checkmate(v.1.8.3), GenomicFeatures(v.1.26.4), caTools(v.1.17.1), BiocParallel(v.1.8.2), GenomeInfoDb(v.1.10.3), matrixStats(v.0.52.2), rlang(v.0.1.1), pkgconfig(v.2.0.1), commonmark(v.1.2), bitops(v.1.0-6), evaluate(v.0.10.1), lattice(v.0.20-35), GenomicAlignments(v.1.10.1), htmlwidgets(v.0.8), labeling(v.0.3), bit(v.1.1-12), plyr(v.1.8.4), magrittr(v.1.5), variancePartition(v.1.4.2), DESeq2(v.1.14.1), R6(v.2.2.2), gplots(v.3.0.1), Hmisc(v.4.0-3), DBI(v.0.7), foreign(v.0.8-69), withr(v.1.0.2), mgcv(v.1.8-17), survival(v.2.41-3), RCurl(v.1.95-4.8), nnet(v.7.3-12), tibble(v.1.3.3), crayon(v.1.3.2), KernSmooth(v.2.23-15), rmarkdown(v.1.6), locfit(v.1.5-9.1), data.table(v.1.10.4), blob(v.1.1.0), digest(v.0.6.12), xtable(v.1.8-2), munsell(v.0.4.3) and BiasedUrn(v.1.07)

this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to 04_gene_ontology_mmusculus-v20170703.rda.xz
tmp <- sm(saveme(filename=this_save))
LS0tCnRpdGxlOiAiSW5mZWN0ZWQgbWljZSB3aXRoIEl4b2RlcyBzY2FwdWxhcmlzIFJOQVNlcTogR2VuZSBPbnRvbG9neSBTZWFyY2hlcy4iCmF1dGhvcjogImF0YiBhYmVsZXdAZ21haWwuY29tIgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKIGh0bWxfZG9jdW1lbnQ6CiAgY29kZV9kb3dubG9hZDogdHJ1ZQogIGNvZGVfZm9sZGluZzogc2hvdwogIGZpZ19jYXB0aW9uOiB0cnVlCiAgZmlnX2hlaWdodDogNwogIGZpZ193aWR0aDogNwogIGhpZ2hsaWdodDogZGVmYXVsdAogIGtlZXBfbWQ6IGZhbHNlCiAgbW9kZTogc2VsZmNvbnRhaW5lZAogIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogIHNlbGZfY29udGFpbmVkOiB0cnVlCiAgdGhlbWU6IHJlYWRhYmxlCiAgdG9jOiB0cnVlCiAgdG9jX2Zsb2F0OgogICAgY29sbGFwc2VkOiBmYWxzZQogICAgc21vb3RoX3Njcm9sbDogZmFsc2UKLS0tCgo8c3R5bGU+CiAgYm9keSAubWFpbi1jb250YWluZXIgewogICAgbWF4LXdpZHRoOiAxNjAwcHg7Cn0KPC9zdHlsZT4KCmBgYHtyIG9wdGlvbnMsIGluY2x1ZGU9RkFMU0V9CiMjIFRoZXNlIGFyZSB0aGUgb3B0aW9ucyBJIHRlbmQgdG8gZmF2b3IKbGlicmFyeSgiaHBnbHRvb2xzIikKdHQgPC0gZGV2dG9vbHM6OmxvYWRfYWxsKCJ+L2hwZ2x0b29scyIpCmtuaXRyOjpvcHRzX2tuaXQkc2V0KHByb2dyZXNzPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgIHZlcmJvc2U9VFJVRSwKICAgICAgICAgICAgICAgICAgICAgd2lkdGg9OTAsCiAgICAgICAgICAgICAgICAgICAgIGVjaG89VFJVRSkKa25pdHI6Om9wdHNfY2h1bmskc2V0KGVycm9yPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgICBmaWcud2lkdGg9OCwKICAgICAgICAgICAgICAgICAgICAgIGZpZy5oZWlnaHQ9OCwKICAgICAgICAgICAgICAgICAgICAgIGRwaT05NikKb2xkX29wdGlvbnMgPC0gb3B0aW9ucyhkaWdpdHM9NCwKICAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICAgIGtuaXRyLmR1cGxpY2F0ZS5sYWJlbD0iYWxsb3ciKQpnZ3Bsb3QyOjp0aGVtZV9zZXQoZ2dwbG90Mjo6dGhlbWVfYncoYmFzZV9zaXplPTEwKSkKc2V0LnNlZWQoMSkKdmVyIDwtICIyMDE3MDcwMyIKcHJldmlvdXNfZmlsZSA8LSAiMDNfZGlmZmVyZW50aWFsX2V4cHJlc3Npb25fbW11c2N1bHVzLlJtZCIKCnRtcCA8LSBzbShsb2FkbWUoZmlsZW5hbWU9cGFzdGUwKGdzdWIocGF0dGVybj0iXFwuUm1kIiwgcmVwbGFjZT0iIiwgeD1wcmV2aW91c19maWxlKSwgIi12IiwgdmVyLCAiLnJkYS54eiIpKSkKCnJtZF9maWxlIDwtICIwNF9nZW5lX29udG9sb2d5X21tdXNjdWx1cy5SbWQiCmBgYAoKYGBge3IgcmVuZGVyaW5nLCBpbmNsdWRlPUZBTFNFLCBldmFsPUZBTFNFfQpybWFya2Rvd246OnJlbmRlcihybWRfZmlsZSkKCnJtYXJrZG93bjo6cmVuZGVyKHJtZF9maWxlLCBvdXRwdXRfZm9ybWF0PSJwZGZfZG9jdW1lbnQiLCBvdXRwdXRfb3B0aW9ucz1jKCJza2lwX2h0bWwiKSkKIyMgT3IgdG8gc2F2ZS9sb2FkIGxhcmdlIFJkYXRhIGZpbGVzLgpocGdsdG9vbHM6OjpzYXZlbWUoKQpocGdsdG9vbHM6Ojpsb2FkbWUoKQpybShsaXN0PWxzKCkpCmBgYAoKW2luZGV4Lmh0bWxdKGluZGV4Lmh0bWwpIFtwcmVwcm9jZXNzaW5nLmh0bWxdKHByZXByb2Nlc3NpbmcuaHRtbCkKCiMgR2VuZSBPbnRvbG9neSBTZWFyY2hlcywgTXVzIG11c2N1bHVzOiBgciB2ZXJgCgpTYWRseSwgdGhlIGF1dGhvcnMgb2YgdGhlIHZhcmlvdXMgb250b2xvZ3kgdG9vbHMgSSB1c2UKKGdvc2VxL2NsdXN0ZXJwcm9maWxlci90b3Bnby9nb3N0YXRzL2dwcm9maWxlcikga2VlcCBjaGFuZ2luZyB0aGUgaW5wdXQgcmVxdWlyZW1lbnRzIGFuZCBtYWtlIGl0CmhhcmQgZm9yIG1lIHRvIGtlZXAgdXAuICBMZXRzIHNlZSBob3cgd2VsbCBJIGRpZC4KCiMjIFNlYXJjaGluZyB3aXRoIGdvc2VxCgpJIHdhbnQgdG8gc2VhcmNoIHRoZSBzZXQgb2YgdXAvZG93biBnZW5lcyB1c2luZyBnb3NlcSwgc2luY2UgdGhleSBjaGFuZ2VkIGhvdyB0aGV5IGFjY2VwdAphbm5vdGF0aW9ucywgSSBhbSBjaGFuZ2luZyB0aGUgZ29zZXEgc2VhcmNoIHRvIG1hdGNoIHRoZSBuZXcgbWV0aG9kcy4gIEkgdGhpbmsgdGhleSB3aWxsIGJlIGJldHRlcgpidXQgYW0gbm90IHlldCBjZXJ0YWluLgoKYGBge3IgaW5pdGlhbF9vbnR9CmRlc2VxX3NpZ3VwIDwtIGlzY19tbV9zaWckZGVzZXEkdXBzW1sxXV0KZGVzZXFfc2lnZG93biA8LSBpc2NfbW1fc2lnJGRlc2VxJGRvd25zW1sxXV0KCmdvc2VxX3VwIDwtIHNtKHNpbXBsZV9nb3NlcShzaWdfZ2VuZXM9ZGVzZXFfc2lndXAsIGdvX2RiPW1tX2dvLCBsZW5ndGhfZGI9bW1fbGVuZ3RocykpCmdvc2VxX2Rvd24gPC0gc20oc2ltcGxlX2dvc2VxKHNpZ19nZW5lcz1kZXNlcV9zaWd1cCwgZ29fZGI9bW1fZ28sIGxlbmd0aF9kYj1tbV9sZW5ndGhzKSkKCmdwcm9maWxlcl91cCA8LSBzbShzaW1wbGVfZ3Byb2ZpbGVyKHNpZ19nZW5lcz1kZXNlcV9zaWd1cCwgc3BlY2llcz0ibW11c2N1bHVzIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZmlyc3RfY29sPSJkZXNlcV9sb2dmYyIpKQpncHJvZmlsZXJfZG93biA8LSBzbShzaW1wbGVfZ3Byb2ZpbGVyKHNpZ19nZW5lcz1kZXNlcV9zaWd1cCwgc3BlY2llcz0ibW11c2N1bHVzIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmaXJzdF9jb2w9ImRlc2VxX2xvZ2ZjIikpCgpnb3NlcV91cF93cml0ZSA8LSBzbSh3cml0ZV9nb3NlcV9kYXRhKGdvc2VxX3VwLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGV4Y2VsPXBhc3RlMCgidGFibGVzL2dvc2VxX2Rlc2VxX3NpZ3VwLXYiLCB2ZXIsICIueGxzeCIpKSkKZ29zZXFfZG93bl93cml0ZSA8LSBzbSh3cml0ZV9nb3NlcV9kYXRhKGdvc2VxX2Rvd24sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBleGNlbD1wYXN0ZTAoInRhYmxlcy9nb3NlcV9kZXNlcV9zaWdkb3duLXYiLCB2ZXIsICIueGxzeCIpKSkKCmdwcm9maWxlcl91cF93cml0ZSA8LSBzbSh3cml0ZV9ncHJvZmlsZXJfZGF0YShncHJvZmlsZXJfcmVzdWx0PWdwcm9maWxlcl91cCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGV4Y2VsPXBhc3RlMCgidGFibGVzL2dwcm9maWxlcl9kZXNlcV9zaWd1cC12IiwgdmVyLCAiLnhsc3giKSkpCmdwcm9maWxlcl9kb3duX3dyaXRlIDwtIHNtKHdyaXRlX2dwcm9maWxlcl9kYXRhKGdwcm9maWxlcl9yZXN1bHQ9Z3Byb2ZpbGVyX2Rvd24sCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGV4Y2VsPXBhc3RlMCgidGFibGVzL2dwcm9maWxlcl9kZXNlcV9zaWdkb3duLXYiLCB2ZXIsICIueGxzeCIpKSkKYGBgCgpbaW5kZXguaHRtbF0oaW5kZXguaHRtbCkgW3NhbXBsZV9lc3RpbWF0aW9uLmh0bWxdKHNhbXBsZV9lc3RpbWF0aW9uLmh0bWwpCgpgYGB7ciBzYXZlbWV9CnBhbmRlcjo6cGFuZGVyKHNlc3Npb25JbmZvKCkpCnRoaXNfc2F2ZSA8LSBwYXN0ZTAoZ3N1YihwYXR0ZXJuPSJcXC5SbWQiLCByZXBsYWNlPSIiLCB4PXJtZF9maWxlKSwgIi12IiwgdmVyLCAiLnJkYS54eiIpCm1lc3NhZ2UocGFzdGUwKCJTYXZpbmcgdG8gIiwgdGhpc19zYXZlKSkKdG1wIDwtIHNtKHNhdmVtZShmaWxlbmFtZT10aGlzX3NhdmUpKQpgYGAK