1 Sample Estimation version: 20171012

This document should make clear the suitability of our yeast data for differential expression analyses. It should also give some ideas about the depth and distribution of the data.

1.1 Visualizing raw data

There are lots of toys we have learned to use to play with with raw data and explore stuff like batch effects or non-canonical distributions or skewed counts. hpgltools provides some functionality to make this process easier. The graphs shown below and many more are generated with the wrapper ‘graph_metrics()’ but that takes away the chance to explain the graphs as I generate them.

1.2 Start with 9 samples

The paper describes the data for 9 samples and includes a table of FPKM values for them. I want to be able to prove to myself that I have the same data.

previous_fpkm <- read.csv("paper/s12_fpkm.csv")
rownames(previous_fpkm) <- previous_fpkm[["Gene.ID"]]
previous_fpkm <- previous_fpkm[, -1]
for (c in 1:ncol(previous_fpkm)) {
  previous_fpkm[[c]] <- as.numeric(gsub(pattern="\\,", replacement="", x=as.character(previous_fpkm[[c]])))
}
rsem_test <- read.csv(file="preprocessing/sample_02/outputs/rsem_lmexicana/lmexicana.genes.results", sep="\t")
rownames(rsem_test) <- gsub(pattern="^LmxM\\.\\d\\d_exon_", replacement="", x=rsem_test[["gene_id"]])
rownames(rsem_test) <- make.names(gsub(pattern="\\-\\d$", replacement="", x=rownames(rsem_test)), unique=TRUE)

rsem_prev_test <- merge(rsem_test, previous_fpkm, by="row.names")
tt <- rsem_prev_test[, c("AMA1", "AMA2", "AMA3", "PRO1", "PRO2", "PRO3", "AXA1", "AXA2", "AXA3", "FPKM")]
cor(tt, method="spearman")
##        AMA1   AMA2   AMA3   PRO1   PRO2   PRO3   AXA1   AXA2   AXA3   FPKM
## AMA1 1.0000 0.8774 0.8919 0.7247 0.6338 0.6909 0.9177 0.8269 0.8227 0.8641
## AMA2 0.8774 1.0000 0.9740 0.7188 0.7423 0.7671 0.8875 0.8951 0.8993 0.7473
## AMA3 0.8919 0.9740 1.0000 0.7390 0.7500 0.7795 0.9020 0.9142 0.9126 0.7684
## PRO1 0.7247 0.7188 0.7390 1.0000 0.8877 0.9493 0.8211 0.8645 0.8448 0.6162
## PRO2 0.6338 0.7423 0.7500 0.8877 1.0000 0.9604 0.7207 0.8735 0.8814 0.5338
## PRO3 0.6909 0.7671 0.7795 0.9493 0.9604 1.0000 0.7911 0.9074 0.8976 0.5894
## AXA1 0.9177 0.8875 0.9020 0.8211 0.7207 0.7911 1.0000 0.9184 0.9026 0.7872
## AXA2 0.8269 0.8951 0.9142 0.8645 0.8735 0.9074 0.9184 1.0000 0.9878 0.7104
## AXA3 0.8227 0.8993 0.9126 0.8448 0.8814 0.8976 0.9026 0.9878 1.0000 0.7018
## FPKM 0.8641 0.7473 0.7684 0.6162 0.5338 0.5894 0.7872 0.7104 0.7018 1.0000
maddy_table <- read.csv("/home/alizadeh/lmex_lmajor_files/relinfo/relinfo_b1/lmex_no_axaama_relinfo_b1.csv")
their_table <- read.csv("paper/S15_Table.csv", stringsAsFactors=FALSE)
pander::pander(sessionInfo())

R version 3.4.4 (2018-03-15)

Platform: x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C

attached base packages: stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: hpgltools(v.2018.03)

loaded via a namespace (and not attached): Rcpp(v.0.12.16), xml2(v.1.2.0), roxygen2(v.6.0.1), knitr(v.1.20), magrittr(v.1.5), devtools(v.1.13.5), BiocGenerics(v.0.24.0), munsell(v.0.4.3), colorspace(v.1.3-2), R6(v.2.2.2), rlang(v.0.2.0.9001), foreach(v.1.4.4), plyr(v.1.8.4), stringr(v.1.3.0), tools(v.3.4.4), parallel(v.3.4.4), grid(v.3.4.4), Biobase(v.2.38.0), data.table(v.1.10.4-3), gtable(v.0.2.0), withr(v.2.1.2), commonmark(v.1.4), htmltools(v.0.3.6), iterators(v.1.0.9), lazyeval(v.0.2.1), yaml(v.2.1.18), rprojroot(v.1.3-2), digest(v.0.6.15), tibble(v.1.4.2), ggplot2(v.2.2.1), base64enc(v.0.1-3), codetools(v.0.2-15), memoise(v.1.1.0), evaluate(v.0.10.1), rmarkdown(v.1.9), stringi(v.1.1.7), pander(v.0.6.1), pillar(v.1.2.1), compiler(v.3.4.4), scales(v.0.5.0.9000) and backports(v.1.1.2)

message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 1b009834267dea125ee94934203413fbd606e783
## R> packrat::restore()
## This is hpgltools commit: Mon Apr 23 14:59:56 2018 -0400: 1b009834267dea125ee94934203413fbd606e783
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
## Saving to 02_sample_estimation-v20171012.rda.xz
tmp <- sm(saveme(filename=this_save))
LS0tCnRpdGxlOiAiTC5tZXhpY2FuYSAyMDE3OiBzYW1wbGUgZXN0aW1hdGlvbiBvZiBleHRlcm5hbCBzYW1wbGVzIHdpdGggTWFkZHkuIgphdXRob3I6ICJhdGIgYWJlbGV3QGdtYWlsLmNvbSIKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiBodG1sX2RvY3VtZW50OgogIGNvZGVfZG93bmxvYWQ6IHRydWUKICBjb2RlX2ZvbGRpbmc6IHNob3cKICBmaWdfY2FwdGlvbjogdHJ1ZQogIGZpZ19oZWlnaHQ6IDcKICBmaWdfd2lkdGg6IDcKICBoaWdobGlnaHQ6IGRlZmF1bHQKICBrZWVwX21kOiBmYWxzZQogIG1vZGU6IHNlbGZjb250YWluZWQKICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICBzZWxmX2NvbnRhaW5lZDogdHJ1ZQogIHRoZW1lOiByZWFkYWJsZQogIHRvYzogdHJ1ZQogIHRvY19mbG9hdDoKICAgIGNvbGxhcHNlZDogZmFsc2UKICAgIHNtb290aF9zY3JvbGw6IGZhbHNlCi0tLQoKPHN0eWxlPgogIGJvZHkgLm1haW4tY29udGFpbmVyIHsKICAgIG1heC13aWR0aDogMTYwMHB4OwogIH0KPC9zdHlsZT4KCmBgYHtyIG9wdGlvbnMsIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkoImhwZ2x0b29scyIpCnR0IDwtIGRldnRvb2xzOjpsb2FkX2FsbCgifi9ocGdsdG9vbHMiKQprbml0cjo6b3B0c19rbml0JHNldChwcm9ncmVzcz1UUlVFLAogICAgICAgICAgICAgICAgICAgICB2ZXJib3NlPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgIHdpZHRoPTkwLAogICAgICAgICAgICAgICAgICAgICBlY2hvPVRSVUUpCmtuaXRyOjpvcHRzX2NodW5rJHNldChlcnJvcj1UUlVFLAogICAgICAgICAgICAgICAgICAgICAgZmlnLndpZHRoPTgsCiAgICAgICAgICAgICAgICAgICAgICBmaWcuaGVpZ2h0PTgsCiAgICAgICAgICAgICAgICAgICAgICBkcGk9OTYpCm9sZF9vcHRpb25zIDwtIG9wdGlvbnMoZGlnaXRzPTQsCiAgICAgICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9ycz1GQUxTRSwKICAgICAgICAgICAgICAgICAgICAgICBrbml0ci5kdXBsaWNhdGUubGFiZWw9ImFsbG93IikKZ2dwbG90Mjo6dGhlbWVfc2V0KGdncGxvdDI6OnRoZW1lX2J3KGJhc2Vfc2l6ZT0xMCkpCnNldC5zZWVkKDEpCnZlciA8LSAiMjAxNzEwMTIiCnByZXZpb3VzX2ZpbGUgPC0gIjAxX2Fubm90YXRpb24uUm1kIgoKdG1wIDwtIHRyeShzbShsb2FkbWUoZmlsZW5hbWU9cGFzdGUwKGdzdWIocGF0dGVybj0iXFwuUm1kIiwgcmVwbGFjZT0iIiwgeD1wcmV2aW91c19maWxlKSwgIi12IiwgdmVyLCAiLnJkYS54eiIpKSkpCnJtZF9maWxlIDwtICIwMl9zYW1wbGVfZXN0aW1hdGlvbi5SbWQiCmBgYAoKIyBTYW1wbGUgRXN0aW1hdGlvbiB2ZXJzaW9uOiBgciB2ZXJgCgpUaGlzIGRvY3VtZW50IHNob3VsZCBtYWtlIGNsZWFyIHRoZSBzdWl0YWJpbGl0eSBvZiBvdXIgeWVhc3QgZGF0YSBmb3IgZGlmZmVyZW50aWFsIGV4cHJlc3Npb24KYW5hbHlzZXMuICBJdCBzaG91bGQgYWxzbyBnaXZlIHNvbWUgaWRlYXMgYWJvdXQgdGhlIGRlcHRoIGFuZCBkaXN0cmlidXRpb24gb2YKdGhlIGRhdGEuCgojIyBWaXN1YWxpemluZyByYXcgZGF0YQoKVGhlcmUgYXJlIGxvdHMgb2YgdG95cyB3ZSBoYXZlIGxlYXJuZWQgdG8gdXNlIHRvIHBsYXkgd2l0aCB3aXRoIHJhdyBkYXRhIGFuZCBleHBsb3JlIHN0dWZmIGxpa2UKYmF0Y2ggZWZmZWN0cyBvciBub24tY2Fub25pY2FsIGRpc3RyaWJ1dGlvbnMgb3Igc2tld2VkIGNvdW50cy4gIGhwZ2x0b29scyBwcm92aWRlcyBzb21lIGZ1bmN0aW9uYWxpdHkKdG8gbWFrZSB0aGlzIHByb2Nlc3MgZWFzaWVyLiAgVGhlIGdyYXBocyBzaG93biBiZWxvdyBhbmQgbWFueSBtb3JlIGFyZSBnZW5lcmF0ZWQgd2l0aCB0aGUgd3JhcHBlcgonZ3JhcGhfbWV0cmljcygpJyBidXQgdGhhdCB0YWtlcyBhd2F5IHRoZSBjaGFuY2UgdG8gZXhwbGFpbiB0aGUgZ3JhcGhzIGFzIEkgZ2VuZXJhdGUgdGhlbS4KCiMjIFN0YXJ0IHdpdGggOSBzYW1wbGVzCgpUaGUgcGFwZXIgZGVzY3JpYmVzIHRoZSBkYXRhIGZvciA5IHNhbXBsZXMgYW5kIGluY2x1ZGVzIGEgdGFibGUgb2YgRlBLTSB2YWx1ZXMgZm9yIHRoZW0uICBJIHdhbnQgdG8KYmUgYWJsZSB0byBwcm92ZSB0byBteXNlbGYgdGhhdCBJIGhhdmUgdGhlIHNhbWUgZGF0YS4KCmBgYHtyIGNvbXBhcmVfOV9zYW1wbGVzfQpwcmV2aW91c19mcGttIDwtIHJlYWQuY3N2KCJwYXBlci9zMTJfZnBrbS5jc3YiKQpyb3duYW1lcyhwcmV2aW91c19mcGttKSA8LSBwcmV2aW91c19mcGttW1siR2VuZS5JRCJdXQpwcmV2aW91c19mcGttIDwtIHByZXZpb3VzX2Zwa21bLCAtMV0KZm9yIChjIGluIDE6bmNvbChwcmV2aW91c19mcGttKSkgewogIHByZXZpb3VzX2Zwa21bW2NdXSA8LSBhcy5udW1lcmljKGdzdWIocGF0dGVybj0iXFwsIiwgcmVwbGFjZW1lbnQ9IiIsIHg9YXMuY2hhcmFjdGVyKHByZXZpb3VzX2Zwa21bW2NdXSkpKQp9CnJzZW1fdGVzdCA8LSByZWFkLmNzdihmaWxlPSJwcmVwcm9jZXNzaW5nL3NhbXBsZV8wMi9vdXRwdXRzL3JzZW1fbG1leGljYW5hL2xtZXhpY2FuYS5nZW5lcy5yZXN1bHRzIiwgc2VwPSJcdCIpCnJvd25hbWVzKHJzZW1fdGVzdCkgPC0gZ3N1YihwYXR0ZXJuPSJeTG14TVxcLlxcZFxcZF9leG9uXyIsIHJlcGxhY2VtZW50PSIiLCB4PXJzZW1fdGVzdFtbImdlbmVfaWQiXV0pCnJvd25hbWVzKHJzZW1fdGVzdCkgPC0gbWFrZS5uYW1lcyhnc3ViKHBhdHRlcm49IlxcLVxcZCQiLCByZXBsYWNlbWVudD0iIiwgeD1yb3duYW1lcyhyc2VtX3Rlc3QpKSwgdW5pcXVlPVRSVUUpCgpyc2VtX3ByZXZfdGVzdCA8LSBtZXJnZShyc2VtX3Rlc3QsIHByZXZpb3VzX2Zwa20sIGJ5PSJyb3cubmFtZXMiKQp0dCA8LSByc2VtX3ByZXZfdGVzdFssIGMoIkFNQTEiLCAiQU1BMiIsICJBTUEzIiwgIlBSTzEiLCAiUFJPMiIsICJQUk8zIiwgIkFYQTEiLCAiQVhBMiIsICJBWEEzIiwgIkZQS00iKV0KY29yKHR0LCBtZXRob2Q9InNwZWFybWFuIikKCm1hZGR5X3RhYmxlIDwtIHJlYWQuY3N2KCIvaG9tZS9hbGl6YWRlaC9sbWV4X2xtYWpvcl9maWxlcy9yZWxpbmZvL3JlbGluZm9fYjEvbG1leF9ub19heGFhbWFfcmVsaW5mb19iMS5jc3YiKQp0aGVpcl90YWJsZSA8LSByZWFkLmNzdigicGFwZXIvUzE1X1RhYmxlLmNzdiIsIHN0cmluZ3NBc0ZhY3RvcnM9RkFMU0UpCmBgYAoKYGBge3Igc2F2ZW1lfQpwYW5kZXI6OnBhbmRlcihzZXNzaW9uSW5mbygpKQptZXNzYWdlKHBhc3RlMCgiVGhpcyBpcyBocGdsdG9vbHMgY29tbWl0OiAiLCBnZXRfZ2l0X2NvbW1pdCgpKSkKdGhpc19zYXZlIDwtIHBhc3RlMChnc3ViKHBhdHRlcm49IlxcLlJtZCIsIHJlcGxhY2U9IiIsIHg9cm1kX2ZpbGUpLCAiLXYiLCB2ZXIsICIucmRhLnh6IikKbWVzc2FnZShwYXN0ZTAoIlNhdmluZyB0byAiLCB0aGlzX3NhdmUpKQp0bXAgPC0gc20oc2F2ZW1lKGZpbGVuYW1lPXRoaXNfc2F2ZSkpCmBgYAo=