## Time-stamp: <Sat Jun 28 23:16:49 2014 Ashton Trey Belew (abelew@gmail.com)>
load("RData")
source("R/myr.R")
##rm(list=ls())
##save(list = ls(all=TRUE), file="RData")

Genome annotation input

I am pulling data from 3 primary sources: * The gas gff annotations/fasta genome * The NCBI reference genome for the strain 5005 * The microbesonline.org tab delimited file.

More information may be found in the reference/ directory.

annotations = import.gff3("reference/gff/nz131.gff.gz", asRangedData=FALSE)
annotation_info = as.data.frame(annotations)
genes = annotation_info[annotation_info$type=="gene",]
gene_annotations = subset(genes, select = c("start", "end", "width", "strand", "Name", "protein_id", "ID"))
short_annotations = gene_annotations[,c("ID", "Name")]
short_annotations$spy = rownames(short_annotations)
colnames(short_annotations) = c("ID", "Name", "Spy")
write_xls(short_annotations, "genes", rowname="ID")

microbes = read.csv(file="reference/microbesonline/nz131_annotations.tab.gz", header=1, sep="\t")
microbes_go = microbes[,c("sysName","GO")]
go_entries = strsplit(as.character(microbes_go$GO), split=",", perl=TRUE)
microbes_go_oneperrow = data.frame(name = rep(microbes_go$sysName, sapply(go_entries, length)), GO = unlist(go_entries))
microbes_go = microbes_go_oneperrow
rm(microbes_go_oneperrow)
rm(go_entries)
## These are used for gene ontology stuff...
microbes_lengths = microbes[,c("sysName", "start","stop")]
microbes_lengths$length = abs(microbes$start - microbes$stop)
microbes_lengths = microbes_lengths[,c("sysName","length")]

Make tooltips for interactive graphs

I am going to be potentially throwing tooltips all over the place, so let us make a quick datastructure for them.

tooltip_data = genes
tooltip_data = tooltip_data[,c("ID","Name", "locus_tag")]
tooltip_data$tooltip = paste(tooltip_data$Name, tooltip_data$locus_tag, sep=": ")
tooltip_data$tooltip = gsub("\\+", " ", tooltip_data$tooltip)
tooltip_data = tooltip_data[-1]
tooltip_data = tooltip_data[-1]
head(tooltip_data)

##     locus_tag          tooltip
## 12 Spy49_0001 dnaA: Spy49_0001
## 16 Spy49_0002 dnaN: Spy49_0002
## 20       <NA>   Spy49_0003: NA
## 24       <NA>   Spy49_0004: NA
## 28 Spy49_0005  pth: Spy49_0005
## 32 Spy49_0006 trcF: Spy49_0006

Set up the experimental design

I have a sample design csv file called all_samples.csv which should contain all the needed information for this process.

The count tables come from the preprocessing directory. In each HPGL directory there is a counts directory. I am pulling from these the count tables generated from bowtie runs which asked for a randomly assigned multimatch (-M 1) with 0 mismatches (-v 0). These files are called something like: 05v0M1gen_NZ131_t0_genome.count as an example.

library_colors = list(lib1="yellow4",
    lib2="darkgreen",
    lib3="red",
    lib4="darkblue")
time_colors = list(t0="lightgray",
    t1="darkgray",
    t2="black")

## The all_samples.csv controls the experimental settings
## all the following lines manipulate the information therein
## in order to set up media types, replicates, etc
sample_definitions = read.csv(file="all_samples_nz131.csv", sep=",")
## If I have summary lines in the csv, they will not start with 'HPGL' and so should be dropped.
sample_definitions = sample_definitions[grepl('^HPGL', sample_definitions$Sample.ID, perl=TRUE),]
## Pre set the color scheme
sample_definitions$colors = library_colors
## This long statement just writes out a computer path name containing the count files to read by sample name.
sample_definitions$counts = paste("data/count_tables/", sample_definitions$Strain, "-", sample_definitions$Replicate, sample_definitions$Time, ".count.gz", sep="")
sample_definitions = as.data.frame(sample_definitions)
rownames(sample_definitions) = sample_definitions$Sample.ID
## my_read_files does just that, it reads the count tables and makes a large raw count table from them.
all_count_tables = my_read_files(as.character(sample_definitions$Sample.ID), as.character(sample_definitions$counts))
## make it into a matrix for use as an expressionset
all_count_matrix = as.matrix(all_count_tables)

gene_info = all_count_matrix[rownames(all_count_matrix) %in% microbes$sysName,]
all_count_matrix = all_count_matrix[rownames(all_count_matrix) %in% microbes$sysName,]
metadata = new("AnnotatedDataFrame", data.frame(sample=sample_definitions$Sample.ID,
    condition=sample_definitions$Time,
    batch=sample_definitions$Library,
    strain=sample_definitions$Strain,
    color=as.character(sample_definitions$colors),
    counts=sample_definitions$counts))

## Now generate the expressionset object
sampleNames(metadata) = colnames(all_count_matrix)
feature_data = new("AnnotatedDataFrame", as.data.frame(gene_info))
featureNames(feature_data) = rownames(all_count_matrix)
experiment = new("ExpressionSet", exprs=all_count_matrix,
    phenoData=metadata, featureData=feature_data)
## print some information to see that it worked
print(experiment)

## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 1764 features, 4 samples 
##   element names: exprs 
## protocolData: none
## phenoData
##   sampleNames: HPGL0426 HPGL0427 HPGL0428 HPGL0429
##   varLabels: sample condition ... counts (6 total)
##   varMetadata: labelDescription
## featureData
##   featureNames: Spy49_0001 Spy49_0002 ... Spy49_tRNAval4 (1764
##     total)
##   fvarLabels: HPGL0426 HPGL0427 HPGL0428 HPGL0429
##   fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation:

summary(exprs(experiment))

##     HPGL0426       HPGL0427        HPGL0428        HPGL0429    
##  Min.   :   0   Min.   :    0   Min.   :    0   Min.   :    0  
##  1st Qu.:   0   1st Qu.:    0   1st Qu.:    0   1st Qu.:    0  
##  Median :   7   Median :   13   Median :    3   Median :    0  
##  Mean   :  20   Mean   :   67   Mean   :   65   Mean   :   18  
##  3rd Qu.:  22   3rd Qu.:   52   3rd Qu.:   13   3rd Qu.:    2  
##  Max.   :5800   Max.   :42458   Max.   :73486   Max.   :15212

head(fData(experiment))

##            HPGL0426 HPGL0427 HPGL0428 HPGL0429
## Spy49_0001        0        5        2        0
## Spy49_0002        2        5        3        0
## Spy49_0003        7       14        2        0
## Spy49_0004        9       16        7        3
## Spy49_0005        0        0        0        0
## Spy49_0006      109      366      168       27

head(pData(experiment))

##            sample condition batch strain     color
## HPGL0426 HPGL0426        t0     1  NZ131   yellow4
## HPGL0427 HPGL0427        t1     1  NZ131 darkgreen
## HPGL0428 HPGL0428        t2     1  NZ131       red
## HPGL0429 HPGL0429        t3     1  NZ131  darkblue
##                                         counts
## HPGL0426 data/count_tables/NZ131-l1t0.count.gz
## HPGL0427 data/count_tables/NZ131-l1t1.count.gz
## HPGL0428 data/count_tables/NZ131-l1t2.count.gz
## HPGL0429 data/count_tables/NZ131-l1t3.count.gz

dim(fData(experiment))

## [1] 1764    4

Graph metrics

I can use my normalization toy on this data, but that seems like a terrible idea

all_expt = expt_subset(experiment, "")
head(exprs(all_expt$expressionset))

##            HPGL0426 HPGL0427 HPGL0428 HPGL0429
## Spy49_0001        0        5        2        0
## Spy49_0002        2        5        3        0
## Spy49_0003        7       14        2        0
## Spy49_0004        9       16        7        3
## Spy49_0005        0        0        0        0
## Spy49_0006      109      366      168       27

head(microbes)

##   locusId      accession        GI scaffoldId start stop strand    sysName
## 1 5671438 YP_002285060.1 209558588     119384   232 1587      + Spy49_0001
## 2 5671439 YP_002285061.1 209558589     119384  1742 2878      + Spy49_0002
## 3 5671440 YP_002285062.1 209558590     119384  2953 3150      + Spy49_0003
## 4 5671441 YP_002285063.1 209558591     119384  3480 4595      + Spy49_0004
## 5 5671442 YP_002285064.1 209558592     119384  4665 5234      + Spy49_0005
## 6 5671443 YP_002285065.1 209558593     119384  5321 8740      + Spy49_0006
##         name                                                       desc
## 1       dnaA    Chromosomal replication initiator protein dnaA (RefSeq)
## 2       dnaN                Beta subunit of DNA polymerase III (RefSeq)
## 3 Spy49_0003                              hypothetical protein (RefSeq)
## 4 Spy49_0004 GTP-binding and nucleic acid-binding protein YchF (RefSeq)
## 5        pth                           Peptidyl-tRNA hydrolase (RefSeq)
## 6       trcF              Transcription-repair coupling factor (RefSeq)
##       COG COGFun
## 1  COG593      L
## 2  COG592      L
## 3 COG4481      S
## 4   COG12      J
## 5  COG193      J
## 6 COG1197     LK
##                                                          COGDesc
## 1                  ATPase involved in DNA replication initiation
## 2            DNA polymerase sliding clamp subunit (PCNA homolog)
## 3                  Uncharacterized protein conserved in bacteria
## 4                  Predicted GTPase, probable translation factor
## 5                                        Peptidyl-tRNA hydrolase
## 6 Transcription-repair coupling factor (superfamily II helicase)
##                                                           TIGRFam
## 1 TIGR00362 chromosomal replication initiator protein DnaA [dnaA]
## 2               TIGR00663 DNA polymerase III, beta subunit [dnaN]
## 3                                                                
## 4                       TIGR00092 GTP-binding protein YchF [ychF]
## 5                         TIGR00447 peptidyl-tRNA hydrolase [pth]
## 6            TIGR00580 transcription-repair coupling factor [mfd]
##                                                   TIGRRoles
## 1 DNA metabolism:DNA replication, recombination, and repair
## 2 DNA metabolism:DNA replication, recombination, and repair
## 3                                                          
## 4                                  Unknown function:General
## 5                                   Protein synthesis:Other
## 6 DNA metabolism:DNA replication, recombination, and repair
##                                                                                                                                                                                           GO
## 1                                                                                                                                     GO:0006270,GO:0006275,GO:0003688,GO:0017111,GO:0005524
## 2 GO:0006260,GO:0016451,GO:0003890,GO:0003895,GO:0016000,GO:0016452,GO:0003891,GO:0016448,GO:0019984,GO:0003677,GO:0003893,GO:0008408,GO:0016449,GO:0003889,GO:0003894,GO:0015999,GO:0016450
## 3                                                                                                                                                                                           
## 4                                                                                                                          GO:0015684,GO:0009276,GO:0005622,GO:0016021,GO:0015093,GO:0005525
## 5                                                                                                                                                                      GO:0006412,GO:0004045
## 6                                                                                                                          GO:0006281,GO:0006355,GO:0005524,GO:0003684,GO:0003700,GO:0008026
##         EC                       ECDesc
## 1                                      
## 2  2.7.7.7 DNA-directed DNA polymerase.
## 3                                      
## 4                                      
## 5 3.1.1.29    Aminoacyl-tRNA hydrolase.
## 6  3.6.1.-

microbes$width = abs(microbes$start - microbes$stop)
annotations = microbes
annotations = annotations[grep('pseudo', annotations$sysName, perl=TRUE, invert=TRUE),]
annotations = annotations[grep('^$', annotations$sysName, perl=TRUE, invert=TRUE),]
rownames(annotations) = annotations$sysName
all_qrpkm = my_norm(expt=all_expt, norm="quant", filter="log2", filter_low=FALSE, out_type="rpkm", annotations=annotations)$counts

## [1] "Applying normalization: quant"
## [1] "Setting output type as: rpkm"
## [1] "Applying: log2 filter"

all_norm_expt = all_expt
## I did that so that I can replace the expressionset with the
## normalized data more easily...
normalized_expressionset = all_norm_expt$expressionset
exprs(normalized_expressionset) = all_qrpkm
all_norm_expt$expressionset = normalized_expressionset

all_design = data.frame(all_norm_expt$design)
all_design$longnames = paste(all_design$strain,  all_design$batch, all_design$condition, sep="_")

my_boxplot(expt=all_norm_expt, names=all_design$longnames)

## Using id as id variables

plot of chunk graph_metrics

my_disheat(expt=all_norm_expt, names=all_design$longnames)

## [1] "Generating distance matrix using: euclidean"

plot of chunk graph_metrics

my_corheat(expt=all_norm_expt, names=all_design$longnames)

plot of chunk graph_metrics

Get comparisons for Yoann

## Compare library t0 t1
comp = exprs(expt_subset(all_norm_expt, "condition=='t0'|condition=='t1'")$expressionset)
pdf(file="figures/comparisons/nz131/t0_vs_t1_scatter.pdf")
sc = my_linear_scatter(comp)

## [1] "Calculating correlation between the axes."
## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = 76.54, df = 1762, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8655 0.8872
## sample estimates:
##    cor 
## 0.8768 
## 
## [1] "Calculating linear model between the axes"
## 
## Call:
## lmrob(formula = second ~ first, data = df, method = "SMDM")
##  \--> method = "SMDM"
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.712 -0.426 -0.106  0.531  9.039 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.10568    0.04229     2.5    0.013 *  
## first        0.98439    0.00629   156.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Robust residual standard error: 1.02 
## Multiple R-squared:  0.928,  Adjusted R-squared:  0.927 
## Convergence in 8 IRWLS iterations
## 
## Robustness weights: 
##  6 observations c(605,632,941,1577,1605,1711)
##   are outliers with |weight| = 0 ( < 5.7e-05); 
##  1244 weights are ~= 1. The remaining 514 ones are summarized as
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0003  0.2640  0.7210  0.6070  0.9480  0.9990 
## Algorithmic parameters: 
## tuning.chi1 tuning.chi2 tuning.chi3 tuning.chi4          bb tuning.psi1 
##    -5.0e-01     1.5e+00          NA     5.0e-01     5.0e-01    -5.0e-01 
## tuning.psi2 tuning.psi3 tuning.psi4  refine.tol     rel.tol   solve.tol 
##     1.5e+00     9.5e-01          NA     1.0e-07     1.0e-07     1.0e-07 
##      nResample         max.it       best.r.s       k.fast.s          k.max 
##            500             50              2              1            200 
##    maxit.scale      trace.lev            mts     compute.rd      numpoints 
##            200              0           1000              0             10 
## fast.s.large.n 
##           2000 
##           psi   subsampling           cov 
##         "lqq" "nonsingular"     ".vcov.w" 
## seed : int(0) 
## [1] "Generating histogram of the x axis."
## [1] "No binwidth provided, setting it to  in order to have 10000 bins."
## [1] "Generating histogram of the y axis."
## [1] "No binwidth provided, setting it to  in order to have 10000 bins."
## [1] "Generating a histogram comparing the axes."
## [1] "Summarise the data."
##      first           second     
##  Min.   : 0.00   Min.   : 0.00  
##  1st Qu.: 0.00   1st Qu.: 0.00  
##  Median : 6.84   Median : 6.91  
##  Mean   : 5.46   Mean   : 5.41  
##  3rd Qu.: 8.43   3rd Qu.: 8.48  
##  Max.   :18.44   Max.   :18.44  
## [1] "Uncorrected t test(s) between columns:"
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  play_all$expression and play_all$cond 
## 
##        first
## second 0.64 
## 
## P value adjustment method: none 
## [1] "Bon Ferroni corrected t test(s) between columns:"
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  play_all$expression and play_all$cond 
## 
##        first
## second 0.64 
## 
## P value adjustment method: bonferroni 
## [1] "No binwidth provided, setting it to 0.00184380094599986 in order to have 10000 bins."

sc$correlation

## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = 76.54, df = 1762, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8655 0.8872
## sample estimates:
##    cor 
## 0.8768

dev.off()

## Cairo 
##     2

## Compare library t0 t2
comp = exprs(expt_subset(all_norm_expt, "condition=='t0'|condition=='t2'")$expressionset)
pdf(file="figures/comparisons/nz131/t0_vs_t2_scatter.pdf")
sc = my_linear_scatter(comp)

## [1] "Calculating correlation between the axes."
## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = 55.87, df = 1762, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7820 0.8157
## sample estimates:
##    cor 
## 0.7995 
## 
## [1] "Calculating linear model between the axes"
## 
## Call:
## lmrob(formula = second ~ first, data = df, method = "SMDM")
##  \--> method = "SMDM"
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.4429  -0.6674  -0.0115   0.7285  12.6039 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.0115     0.0679    0.17     0.87    
## first         0.9768     0.0102   95.55   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Robust residual standard error: 1.72 
## Multiple R-squared:  0.818,  Adjusted R-squared:  0.818 
## Convergence in 9 IRWLS iterations
## 
## Robustness weights: 
##  1287 weights are ~= 1. The remaining 477 ones are summarized as
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0019  0.2870  0.7140  0.6200  0.9440  0.9990 
## Algorithmic parameters: 
## tuning.chi1 tuning.chi2 tuning.chi3 tuning.chi4          bb tuning.psi1 
##    -5.0e-01     1.5e+00          NA     5.0e-01     5.0e-01    -5.0e-01 
## tuning.psi2 tuning.psi3 tuning.psi4  refine.tol     rel.tol   solve.tol 
##     1.5e+00     9.5e-01          NA     1.0e-07     1.0e-07     1.0e-07 
##      nResample         max.it       best.r.s       k.fast.s          k.max 
##            500             50              2              1            200 
##    maxit.scale      trace.lev            mts     compute.rd      numpoints 
##            200              0           1000              0             10 
## fast.s.large.n 
##           2000 
##           psi   subsampling           cov 
##         "lqq" "nonsingular"     ".vcov.w" 
## seed : int(0) 
## [1] "Generating histogram of the x axis."
## [1] "No binwidth provided, setting it to  in order to have 10000 bins."
## [1] "Generating histogram of the y axis."
## [1] "No binwidth provided, setting it to  in order to have 10000 bins."
## [1] "Generating a histogram comparing the axes."
## [1] "Summarise the data."
##      first           second     
##  Min.   : 0.00   Min.   : 0.00  
##  1st Qu.: 0.00   1st Qu.: 0.00  
##  Median : 6.84   Median : 6.88  
##  Mean   : 5.46   Mean   : 5.03  
##  3rd Qu.: 8.43   3rd Qu.: 8.51  
##  Max.   :18.44   Max.   :18.44  
## [1] "Uncorrected t test(s) between columns:"
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  play_all$expression and play_all$cond 
## 
##        first  
## second 0.00078
## 
## P value adjustment method: none 
## [1] "Bon Ferroni corrected t test(s) between columns:"
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  play_all$expression and play_all$cond 
## 
##        first  
## second 0.00078
## 
## P value adjustment method: bonferroni 
## [1] "No binwidth provided, setting it to 0.00184409346271565 in order to have 10000 bins."

sc$correlation

## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = 55.87, df = 1762, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7820 0.8157
## sample estimates:
##    cor 
## 0.7995

dev.off()

## Cairo 
##     2

## Compare library t0 t3
comp = exprs(expt_subset(all_norm_expt, "condition=='t0'|condition=='t3'")$expressionset)
pdf(file="figures/comparisons/nz131/t0_vs_t3_scatter.pdf")
sc = my_linear_scatter(comp)

## [1] "Calculating correlation between the axes."
## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = 20.41, df = 1762, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3988 0.4743
## sample estimates:
##    cor 
## 0.4373 
## 
## [1] "Calculating linear model between the axes"
## 
## Call:
## lmrob(formula = second ~ first, data = df, method = "SMDM")
##  \--> method = "SMDM"
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -4.68100 -1.80218  0.00757  1.67505  8.76040 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.6549     0.0956    48.7   <2e-16 ***
## first         0.2751     0.0146    18.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Robust residual standard error: 2.2 
## Multiple R-squared:  0.173,  Adjusted R-squared:  0.172 
## Convergence in 5 IRWLS iterations
## 
## Robustness weights: 
##  1174 weights are ~= 1. The remaining 590 ones are summarized as
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.175   0.918   0.955   0.930   0.986   0.999 
## Algorithmic parameters: 
## tuning.chi1 tuning.chi2 tuning.chi3 tuning.chi4          bb tuning.psi1 
##    -5.0e-01     1.5e+00          NA     5.0e-01     5.0e-01    -5.0e-01 
## tuning.psi2 tuning.psi3 tuning.psi4  refine.tol     rel.tol   solve.tol 
##     1.5e+00     9.5e-01          NA     1.0e-07     1.0e-07     1.0e-07 
##      nResample         max.it       best.r.s       k.fast.s          k.max 
##            500             50              2              1            200 
##    maxit.scale      trace.lev            mts     compute.rd      numpoints 
##            200              0           1000              0             10 
## fast.s.large.n 
##           2000 
##           psi   subsampling           cov 
##         "lqq" "nonsingular"     ".vcov.w" 
## seed : int(0) 
## [1] "Generating histogram of the x axis."
## [1] "No binwidth provided, setting it to  in order to have 10000 bins."
## [1] "Generating histogram of the y axis."
## [1] "No binwidth provided, setting it to  in order to have 10000 bins."
## [1] "Generating a histogram comparing the axes."
## [1] "Summarise the data."
##      first           second     
##  Min.   : 0.00   Min.   : 2.25  
##  1st Qu.: 0.00   1st Qu.: 4.22  
##  Median : 6.84   Median : 5.46  
##  Mean   : 5.46   Mean   : 6.19  
##  3rd Qu.: 8.43   3rd Qu.: 8.29  
##  Max.   :18.44   Max.   :18.49  
## [1] "Uncorrected t test(s) between columns:"
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  play_all$expression and play_all$cond 
## 
##        first  
## second 3.1e-12
## 
## P value adjustment method: none 
## [1] "Bon Ferroni corrected t test(s) between columns:"
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  play_all$expression and play_all$cond 
## 
##        first  
## second 3.1e-12
## 
## P value adjustment method: bonferroni 
## [1] "No binwidth provided, setting it to 0.00184879482325277 in order to have 10000 bins."

sc$correlation

## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = 20.41, df = 1762, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3988 0.4743
## sample estimates:
##    cor 
## 0.4373

dev.off()

## Cairo 
##     2

## Compare library t1 t2
comp = exprs(expt_subset(all_norm_expt, "condition=='t1'|condition=='t2'")$expressionset)
pdf(file="figures/comparisons/nz131/t1_vs_t2_scatter.pdf")
sc = my_linear_scatter(comp)

## [1] "Calculating correlation between the axes."
## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = 84.96, df = 1762, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8870 0.9053
## sample estimates:
##    cor 
## 0.8965 
## 
## [1] "Calculating linear model between the axes"
## 
## Call:
## lmrob(formula = second ~ first, data = df, method = "SMDM")
##  \--> method = "SMDM"
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.6638 -0.4533 -0.0503  0.2463  8.5292 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.05033    0.02662    1.89    0.059 .  
## first        0.99574    0.00398  249.88   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Robust residual standard error: 0.662 
## Multiple R-squared:  0.973,  Adjusted R-squared:  0.973 
## Convergence in 7 IRWLS iterations
## 
## Robustness weights: 
##  94 observations c(59,68,132,176,236,247,294,351,364,372,458,464,471,484,509,517,535,537,552,555,558,562,564,567,615,649,670,686,704,707,714,715,728,750,796,838,852,874,876,892,893,904,909,914,915,919,922,939,940,947,948,949,972,985,1014,1024,1046,1061,1066,1068,1102,1105,1115,1117,1118,1119,1120,1166,1175,1202,1211,1227,1251,1288,1321,1327,1328,1345,1353,1361,1378,1424,1453,1459,1534,1592,1595,1622,1624,1631,1635,1640,1661,1674)
##   are outliers with |weight| <= 1.4e-05 ( < 5.7e-05); 
##  1179 weights are ~= 1. The remaining 491 ones are summarized as
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0001  0.3380  0.7630  0.6330  0.9330  0.9990 
## Algorithmic parameters: 
## tuning.chi1 tuning.chi2 tuning.chi3 tuning.chi4          bb tuning.psi1 
##    -5.0e-01     1.5e+00          NA     5.0e-01     5.0e-01    -5.0e-01 
## tuning.psi2 tuning.psi3 tuning.psi4  refine.tol     rel.tol   solve.tol 
##     1.5e+00     9.5e-01          NA     1.0e-07     1.0e-07     1.0e-07 
##      nResample         max.it       best.r.s       k.fast.s          k.max 
##            500             50              2              1            200 
##    maxit.scale      trace.lev            mts     compute.rd      numpoints 
##            200              0           1000              0             10 
## fast.s.large.n 
##           2000 
##           psi   subsampling           cov 
##         "lqq" "nonsingular"     ".vcov.w" 
## seed : int(0) 
## [1] "Generating histogram of the x axis."
## [1] "No binwidth provided, setting it to  in order to have 10000 bins."
## [1] "Generating histogram of the y axis."
## [1] "No binwidth provided, setting it to  in order to have 10000 bins."
## [1] "Generating a histogram comparing the axes."
## [1] "Summarise the data."
##      first           second     
##  Min.   : 0.00   Min.   : 0.00  
##  1st Qu.: 0.00   1st Qu.: 0.00  
##  Median : 6.91   Median : 6.88  
##  Mean   : 5.41   Mean   : 5.03  
##  3rd Qu.: 8.48   3rd Qu.: 8.51  
##  Max.   :18.44   Max.   :18.44  
## [1] "Uncorrected t test(s) between columns:"
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  play_all$expression and play_all$cond 
## 
##        first 
## second 0.0041
## 
## P value adjustment method: none 
## [1] "Bon Ferroni corrected t test(s) between columns:"
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  play_all$expression and play_all$cond 
## 
##        first 
## second 0.0041
## 
## P value adjustment method: bonferroni 
## [1] "No binwidth provided, setting it to 0.00184409346271565 in order to have 10000 bins."

sc$correlation

## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = 84.96, df = 1762, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8870 0.9053
## sample estimates:
##    cor 
## 0.8965

dev.off()

## Cairo 
##     2

## Compare library t1 t3
comp = exprs(expt_subset(all_norm_expt, "condition=='t1'|condition=='t3'")$expressionset)
pdf(file="figures/comparisons/nz131/t1_vs_t3_scatter.pdf")
sc = my_linear_scatter(comp)

## [1] "Calculating correlation between the axes."
## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = 24.56, df = 1762, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4693 0.5389
## sample estimates:
##    cor 
## 0.5049 
## 
## [1] "Calculating linear model between the axes"
## 
## Call:
## lmrob(formula = second ~ first, data = df, method = "SMDM")
##  \--> method = "SMDM"
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.422 -1.705  0.123  1.596  8.276 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.4955     0.0900    50.0   <2e-16 ***
## first         0.3101     0.0137    22.6   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Robust residual standard error: 2.12 
## Multiple R-squared:  0.233,  Adjusted R-squared:  0.233 
## Convergence in 5 IRWLS iterations
## 
## Robustness weights: 
##  1171 weights are ~= 1. The remaining 593 ones are summarized as
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.185   0.917   0.952   0.930   0.984   0.999 
## Algorithmic parameters: 
## tuning.chi1 tuning.chi2 tuning.chi3 tuning.chi4          bb tuning.psi1 
##    -5.0e-01     1.5e+00          NA     5.0e-01     5.0e-01    -5.0e-01 
## tuning.psi2 tuning.psi3 tuning.psi4  refine.tol     rel.tol   solve.tol 
##     1.5e+00     9.5e-01          NA     1.0e-07     1.0e-07     1.0e-07 
##      nResample         max.it       best.r.s       k.fast.s          k.max 
##            500             50              2              1            200 
##    maxit.scale      trace.lev            mts     compute.rd      numpoints 
##            200              0           1000              0             10 
## fast.s.large.n 
##           2000 
##           psi   subsampling           cov 
##         "lqq" "nonsingular"     ".vcov.w" 
## seed : int(0) 
## [1] "Generating histogram of the x axis."
## [1] "No binwidth provided, setting it to  in order to have 10000 bins."
## [1] "Generating histogram of the y axis."
## [1] "No binwidth provided, setting it to  in order to have 10000 bins."
## [1] "Generating a histogram comparing the axes."
## [1] "Summarise the data."
##      first           second     
##  Min.   : 0.00   Min.   : 2.25  
##  1st Qu.: 0.00   1st Qu.: 4.22  
##  Median : 6.91   Median : 5.46  
##  Mean   : 5.41   Mean   : 6.19  
##  3rd Qu.: 8.48   3rd Qu.: 8.29  
##  Max.   :18.44   Max.   :18.49  
## [1] "Uncorrected t test(s) between columns:"
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  play_all$expression and play_all$cond 
## 
##        first  
## second 1.5e-13
## 
## P value adjustment method: none 
## [1] "Bon Ferroni corrected t test(s) between columns:"
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  play_all$expression and play_all$cond 
## 
##        first  
## second 1.5e-13
## 
## P value adjustment method: bonferroni 
## [1] "No binwidth provided, setting it to 0.00184879482325277 in order to have 10000 bins."

sc$correlation

## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = 24.56, df = 1762, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4693 0.5389
## sample estimates:
##    cor 
## 0.5049

dev.off()

## Cairo 
##     2

## Compare library t2 t3
comp = exprs(expt_subset(all_norm_expt, "condition=='t2'|condition=='t3'")$expressionset)
pdf(file="figures/comparisons/nz131/t2_vs_t3_scatter.pdf")
sc = my_linear_scatter(comp)

## [1] "Calculating correlation between the axes."
## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = 26.12, df = 1762, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4938 0.5611
## sample estimates:
##    cor 
## 0.5283 
## 
## [1] "Calculating linear model between the axes"
## 
## Call:
## lmrob(formula = second ~ first, data = df, method = "SMDM")
##  \--> method = "SMDM"
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.353 -1.719  0.241  1.536  8.259 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.6665     0.0806    57.9   <2e-16 ***
## first         0.3017     0.0125    24.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Robust residual standard error: 2.08 
## Multiple R-squared:  0.259,  Adjusted R-squared:  0.258 
## Convergence in 6 IRWLS iterations
## 
## Robustness weights: 
##  1155 weights are ~= 1. The remaining 609 ones are summarized as
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.177   0.918   0.948   0.929   0.982   0.999 
## Algorithmic parameters: 
## tuning.chi1 tuning.chi2 tuning.chi3 tuning.chi4          bb tuning.psi1 
##    -5.0e-01     1.5e+00          NA     5.0e-01     5.0e-01    -5.0e-01 
## tuning.psi2 tuning.psi3 tuning.psi4  refine.tol     rel.tol   solve.tol 
##     1.5e+00     9.5e-01          NA     1.0e-07     1.0e-07     1.0e-07 
##      nResample         max.it       best.r.s       k.fast.s          k.max 
##            500             50              2              1            200 
##    maxit.scale      trace.lev            mts     compute.rd      numpoints 
##            200              0           1000              0             10 
## fast.s.large.n 
##           2000 
##           psi   subsampling           cov 
##         "lqq" "nonsingular"     ".vcov.w" 
## seed : int(0) 
## [1] "Generating histogram of the x axis."
## [1] "No binwidth provided, setting it to  in order to have 10000 bins."
## [1] "Generating histogram of the y axis."
## [1] "No binwidth provided, setting it to  in order to have 10000 bins."
## [1] "Generating a histogram comparing the axes."
## [1] "Summarise the data."
##      first           second     
##  Min.   : 0.00   Min.   : 2.25  
##  1st Qu.: 0.00   1st Qu.: 4.22  
##  Median : 6.88   Median : 5.46  
##  Mean   : 5.03   Mean   : 6.19  
##  3rd Qu.: 8.51   3rd Qu.: 8.29  
##  Max.   :18.44   Max.   :18.49  
## [1] "Uncorrected t test(s) between columns:"
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  play_all$expression and play_all$cond 
## 
##        first 
## second <2e-16
## 
## P value adjustment method: none 
## [1] "Bon Ferroni corrected t test(s) between columns:"
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  play_all$expression and play_all$cond 
## 
##        first 
## second <2e-16
## 
## P value adjustment method: bonferroni 
## [1] "No binwidth provided, setting it to 0.00184879482325277 in order to have 10000 bins."

sc$correlation

## 
##  Pearson's product-moment correlation
## 
## data:  df[, 1] and df[, 2]
## t = 26.12, df = 1762, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4938 0.5611
## sample estimates:
##    cor 
## 0.5283

dev.off()

## Cairo 
##     2