This worksheet is a copy of 20180726 but this time with feeling! (or new samples) A minor caveat: I had some difficulties converting the new data to a openMS suitable format. I am not certain if I messed up a priori, or if a change in the version of the software on the spec caused the problem, but for future reference:
To properly convert the data to mzXML, make sure to use the TPP MSConvert utility and have ‘TPP’ compatibility on.
I am using my slightly modified copy of SWATH2stats. This seeks to ensure that changes in the case of columns in the metadata from one version of OpenMS to another do not trouble me.
I am separately performing the analysis for our home-grown libraries and the publicly available ones (documented in the 201806 worksheets).
our_tric_data <- read.csv(paste0("results/tric/", ver, "/whole_8mz/comet_HCD.tsv"), sep="\t")
sample_annot <- extract_metadata(paste0("sample_sheets/Mtb_dia_samples_", ver, ".xlsx"))
kept <- ! grepl(x=rownames(sample_annot), pattern="^s\\.\\.")
sample_annot <- sample_annot[kept, ]
##sample_annot <- openxlsx::read.xlsx("sample_sheets/Mtb_dia_samples_20180611.xlsx")
##rownames(sample_annot) <- make.names(paste0("s", sample_annot[["sampleid"]]), unique=TRUE)
## Drop samples starting with comments
##keep_idx <- ! grepl(pattern="##", x=sample_annot[["sampleid"]])
##sample_annot <- sample_annot[keep_idx, ]
##colnames(sample_annot) <- gsub(x=colnames(sample_annot), pattern="\\.", replacement="")
## Set the mzXML column to match the filename column in the data.
loaded <- sm(devtools::load_all("~/scratch/git/SWATH2stats"))
## s2s, my witty way of shortening SWATH2stats...
our_s2s_exp <- sample_annotation(data=our_tric_data,
sample_annotation=sample_annot,
run_column="run", check_files=FALSE,
fullpeptidename_column="fullunimodpeptidename")
## Not checking that the files are identical between the annotation and data.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/01mzXML/dia/
## 20180806/2018_0726Briken03.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/01mzXML/dia/
## 20180806/2018_0726Briken11.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0315Briken01.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0315Briken02.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0315Briken03.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0315Briken04.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0315Briken05.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0315Briken06.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0315Briken21.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0315Briken22.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0315Briken23.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0315Briken24.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0315Briken25.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0315Briken26.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0502BrikenDIA01.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0502BrikenDIA02.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0502BrikenDIA03.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0502BrikenDIA04.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0502BrikenDIA05.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0502BrikenDIA06.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0502BrikenDIA07.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0502BrikenDIA08.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0502BrikenDIA09.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0502BrikenDIA11.mzXML' in the data file.
## Warning in sample_annotation(data = our_tric_data, sample_annotation =
## sample_annot, : No measurement value found for sample 'results/mzXML/dia/
## 20180611/2018_0502BrikenDIA12.mzXML' in the data file.
This just repeats the previous stanza, but uses the tuberculist transition-library swath data as its input rather than our own comet data.
There is one important caveat in the following block: I used a regex to remove the second half of geneID_geneName so that later when I merge in the annotation data I have it will match.
tb_tric_data <- read.csv(paste0("results/tric/", ver, "/whole_8mz_tuberculist/comet_HCD.tsv"),
sep="\t")
tb_tric_data[["ProteinName"]] <- gsub(pattern="^(.*)_.*$", replacement="\\1",
x=tb_tric_data[["ProteinName"]])
tb_s2s_exp <- sample_annotation(data=tb_tric_data, check_files=FALSE,
sample_annotation=sample_annot,
fullpeptidename_column="fullunimodpeptidename")
## Not checking that the files are identical between the annotation and data.
## Warning in sample_annotation(data = tb_tric_data, check_files = FALSE,
## sample_annotation = sample_annot, : No measurement value found for sample
## 'results/01mzXML/dia/20180806/2018_0726Briken03.mzXML' in the data file.
## Warning in sample_annotation(data = tb_tric_data, check_files = FALSE,
## sample_annotation = sample_annot, : No measurement value found for sample
## 'results/01mzXML/dia/20180806/2018_0726Briken11.mzXML' in the data file.
Now I have a couple data structures which should prove useful for the metrics provided by SWATH2stats, MSstats, and my own hpgltools.
The various metrics and filters provided by SWATH2stats seem quite reasonable to me. The only thing that really bothers me is that they are all case sensitive and I found that the most recent tric changed the capitalization of a column, causing these to all fall down. Therefore I went in and made everything case insensitive in a fashion similar to that done by MSstats (except I hate capital letters, so I used tolower() rather than toupper()).
## Get correlations on a sample by sample basis
pp(file="images/20180611_our_swath2stats_sample_cor.png")
## Going to write the image to: images/20180611_our_swath2stats_sample_cor.png when dev.off() is called.
sample_cor <- plot_correlation_between_samples(our_s2s_exp, size=2,
fun.aggregate=sum,
column.values="intensity")
dev.off()
## png
## 2
sample_cond_rep_cor <- plot_correlation_between_samples(our_s2s_exp, size=2,
comparison=transition_group_id ~
condition + bioreplicate + run,
fun.aggregate=sum,
column.values="intensity")
decoy_lists <- assess_decoy_rate(our_s2s_exp)
## Number of non-decoy peptides: 3105
## Number of decoy peptides: 258
## Decoy rate: 0.0831
## This seems a bit high to me, yesno?
fdr_overall <- assess_fdr_overall(our_s2s_exp, output="Rconsole", plot=TRUE)
byrun_fdr <- assess_fdr_byrun(our_s2s_exp, FFT=0.7, plot=TRUE, output="Rconsole")
## The average FDR by run on assay level is 0.014
## The average FDR by run on peptide level is 0.015
## The average FDR by run on protein level is 0.037
chosen_mscore <- mscore4assayfdr(our_s2s_exp, FFT=0.7, fdr_target=0.02)
## Target assay FDR: 0.02
## Required overall m-score cutoff: 0.0035481
## achieving assay FDR: 0.0196
prot_score <- mscore4protfdr(our_s2s_exp, FFT=0.7, fdr_target=0.02)
## Target protein FDR: 0.02
## Required overall m-score cutoff: 0.001
## achieving protein FDR: 0.0186
our_mscore_filtered <- filter_mscore(our_s2s_exp, chosen_mscore)
## Original dimension: 24787, new dimension: 23309, difference: 1478.
our_freq_mscore <- filter_mscore_freqobs(our_s2s_exp, 0.01, 0.8, rm.decoy=FALSE)
## Peptides need to have been quantified in more conditions than: 12.8 in order to pass this percentage-based threshold.
## Fraction of peptides selected: 0.16
## Original dimension: 25349, new dimension: 8014, difference: 17335.
our_data_filtered_fdr <- filter_mscore_fdr(our_mscore_filtered, FFT=0.7,
overall_protein_fdr_target=prot_score,
upper_overall_peptide_fdr_limit=0.05)
## Target protein FDR: 0.001
## Required overall m-score cutoff: 0.01
## achieving protein FDR: 0
## filter_mscore_fdr is filtering the data...
## finding m-score cutoff to achieve desired protein FDR in protein master list..
## finding m-score cutoff to achieve desired global peptide FDR..
## Target peptide FDR: 0.05
## Required overall m-score cutoff: 0.01
## Achieving peptide FDR: 0
## Proteins selected:
## Total proteins selected: 927
## Final target proteins: 927
## Final decoy proteins: 0
## Peptides mapping to these protein entries selected:
## Total mapping peptides: 3008
## Final target peptides: 3008
## Final decoy peptides: 0
## Total peptides selected from:
## Total peptides: 3008
## Final target peptides: 3008
## Final decoy peptides: 0
## Individual run FDR quality of the peptides was not calculated
## as not every run contains a decoy.
## The decoys have been removed from the returned data.
our_only_proteotypic <- filter_proteotypic_peptides(our_data_filtered_fdr)
## Number of proteins detected: 931
## Protein identifiers: Rv3036c, Rv1133c, Rv1098c, Rv2241, Rv2799, Rv0652
## Number of proteins detected that are supported by a proteotypic peptide: 911
## Number of proteotypic peptides detected: 2965
our_all_filtered <- filter_all_peptides(our_only_proteotypic)
## Number of proteins detected: 911
## First 6 protein identifiers: Rv3036c, Rv1133c, Rv1098c, Rv2241, Rv2799, Rv0652
our_only_strong <- filter_on_max_peptides(data=our_all_filtered, n_peptides=10)
## Before filtering:
## Number of proteins: 911
## Number of peptides: 2965
##
## Percentage of peptides removed: 8.67%
##
## After filtering:
## Number of proteins: 908
## Number of peptides: 2708
our_only_minimum <- filter_on_min_peptides(data=our_only_strong, n_peptides=3)
## Before filtering:
## Number of proteins: 908
## Number of peptides: 2708
##
## Percentage of peptides removed: 0.07%
##
## After filtering:
## Number of proteins: 820
## Number of peptides: 2706
## I think these matrixes are probably smarter to use than the raw outmatrix from tric.
## But I am not a fan of rerwriting the sample column names.
matrix_prefix <- file.path("results", "swath2stats", ver)
if (!file.exists(matrix_prefix)) {
dir.create(matrix_prefix)
}
protein_matrix_all <- write_matrix_proteins(
our_s2s_exp, write.csv=TRUE,
filename=file.path(matrix_prefix, "our_protein_all.csv"))
## Protein overview matrix results/swath2stats/20180806/our_protein_all.csv written to working folder.
dim(protein_matrix_all)
## [1] 1156 17
protein_matrix_mscore <- write_matrix_proteins(
our_mscore_filtered, write.csv=TRUE,
filename=file.path(matrix_prefix, "our_protein_matrix_mscore.csv"))
## Protein overview matrix results/swath2stats/20180806/our_protein_matrix_mscore.csv written to working folder.
dim(protein_matrix_mscore)
## [1] 927 17
peptide_matrix_mscore <- write_matrix_peptides(
our_mscore_filtered, write.csv=TRUE,
filename=file.path(matrix_prefix, "our_peptide_matrix_mscore.csv"))
## Peptide overview matrix results/swath2stats/20180806/our_peptide_matrix_mscore.csv written to working folder.
dim(peptide_matrix_mscore)
## [1] 3008 17
protein_matrix_minimum <- write_matrix_proteins(
our_only_minimum, write.csv=TRUE,
filename=file.path(matrix_prefix, "our_protein_matrix_minimum.csv"))
## Protein overview matrix results/swath2stats/20180806/our_protein_matrix_minimum.csv written to working folder.
dim(protein_matrix_minimum)
## [1] 820 17
peptide_matrix_minimum <- write_matrix_peptides(
our_only_minimum, write.csv=TRUE,
filename=file.path(matrix_prefix, "our_peptide_matrix_minimum.csv"))
## Peptide overview matrix results/swath2stats/20180806/our_peptide_matrix_minimum.csv written to working folder.
dim(peptide_matrix_minimum)
## [1] 19837 17
rt_cor <- plot_correlation_between_samples(
our_all_filtered, column.values="intensity", fun.aggregate=sum, size=2)
## I have no effing clue what this plot means.
variation <- plot_variation(our_all_filtered, fun.aggregate=sum)
cols <- colnames(our_all_filtered)
our_disaggregated <- disaggregate(our_all_filtered, all.columns=TRUE)
## The library contains6transitions per precursor.
## The data table was transformed into a table containing one row per transition.
our_msstats_input <- convert_MSstats(our_disaggregated)
## One or several columns required by MSstats were not in the data. The columns were created and filled with NAs.
## Missing columns: productcharge, isotopelabeltype
## isotopelabeltype was filled with light.
summary(our_msstats_input)
## proteinname peptidesequence precursorcharge fragmention
## Length:137316 Length:137316 Min. :2.00 Length:137316
## Class :character Class :character 1st Qu.:2.00 Class :character
## Mode :character Mode :character Median :2.00 Mode :character
## Mean :2.19
## 3rd Qu.:2.00
## Max. :4.00
## productcharge isotopelabeltype intensity bioreplicate
## Mode:logical Length:137316 Min. :1.20e+04 Length:137316
## NA's:137316 Class :character 1st Qu.:2.08e+06 Class :character
## Mode :character Median :7.88e+06 Mode :character
## Mean :5.49e+07
## 3rd Qu.:3.12e+07
## Max. :1.57e+10
## condition run
## wt_filtrate :24384 Length:137316
## wt_whole :38442 Class :character
## delta_whole :19704 Mode :character
## comp_whole :29286
## delta_filtrate: 9756
## comp_filtrate :15744
Repeat the above stanza using the tuberculist data.
## Get correlations on a sample by sample basis
pp(file="images/20180611_tb_swath2stats_sample_cor.png")
## Going to write the image to: images/20180611_tb_swath2stats_sample_cor.png when dev.off() is called.
sample_cor <- plot_correlation_between_samples(tb_s2s_exp, size=2,
fun.aggregate=sum,
column.values="intensity")
dev.off()
## png
## 2
sample_cond_rep_cor <- plot_correlation_between_samples(tb_s2s_exp, size=2,
comparison=transition_group_id ~
condition + bioreplicate + run,
fun.aggregate=sum,
column.values="intensity")
decoy_lists <- assess_decoy_rate(tb_s2s_exp)
## Number of non-decoy peptides: 24166
## Number of decoy peptides: 1626
## Decoy rate: 0.0673
## This seems a bit high to me, yesno?
fdr_overall <- assess_fdr_overall(tb_s2s_exp, output="Rconsole", plot=TRUE)
byrun_fdr <- assess_fdr_byrun(tb_s2s_exp, FFT=0.7, plot=TRUE, output="Rconsole")
## The average FDR by run on assay level is 0.007
## The average FDR by run on peptide level is 0.008
## The average FDR by run on protein level is 0.035
chosen_mscore <- mscore4assayfdr(tb_s2s_exp, FFT=0.7, fdr_target=0.02)
## Target assay FDR: 0.02
## Required overall m-score cutoff: 0.0050119
## achieving assay FDR: 0.0186
prot_score <- mscore4protfdr(tb_s2s_exp, FFT=0.7, fdr_target=0.02)
## Target protein FDR: 0.02
## Required overall m-score cutoff: 0.00056234
## achieving protein FDR: 0.0177
tb_mscore_filtered <- filter_mscore(tb_s2s_exp, chosen_mscore)
## Original dimension: 329180, new dimension: 305033, difference: 24147.
tb_freq_mscore <- filter_mscore_freqobs(tb_s2s_exp, 0.01, 0.8, rm.decoy=FALSE)
## Peptides need to have been quantified in more conditions than: 31.2 in order to pass this percentage-based threshold.
## Fraction of peptides selected: 0.036
## Original dimension: 333450, new dimension: 39657, difference: 293793.
tb_data_filtered_fdr <- filter_mscore_fdr(tb_mscore_filtered, FFT=0.7,
overall_protein_fdr_target=prot_score,
upper_overall_peptide_fdr_limit=0.05)
## Target protein FDR: 0.000562341325190349
## Required overall m-score cutoff: 0.01
## achieving protein FDR: 0
## filter_mscore_fdr is filtering the data...
## finding m-score cutoff to achieve desired protein FDR in protein master list..
## finding m-score cutoff to achieve desired global peptide FDR..
## Target peptide FDR: 0.05
## Required overall m-score cutoff: 0.01
## Achieving peptide FDR: 0
## Proteins selected:
## Total proteins selected: 3091
## Final target proteins: 3091
## Final decoy proteins: 0
## Peptides mapping to these protein entries selected:
## Total mapping peptides: 22927
## Final target peptides: 22927
## Final decoy peptides: 0
## Total peptides selected from:
## Total peptides: 22927
## Final target peptides: 22927
## Final decoy peptides: 0
## Individual run FDR quality of the peptides was not calculated
## as not every run contains a decoy.
## The decoys have been removed from the returned data.
tb_only_proteotypic <- filter_proteotypic_peptides(tb_data_filtered_fdr)
## Number of proteins detected: 3104
## Protein identifiers: Rv1270c, Rv0161, Rv1613, Rv2540c, Rv0440, Rv1327c
## Number of proteins detected that are supported by a proteotypic peptide: 2966
## Number of proteotypic peptides detected: 22765
tb_all_filtered <- filter_all_peptides(tb_only_proteotypic)
## Number of proteins detected: 2968
## First 6 protein identifiers: Rv1270c, Rv0161, Rv1613, Rv2540c, Rv0440, Rv1327c
tb_only_strong <- filter_on_max_peptides(data=tb_all_filtered, n_peptides=10)
## Before filtering:
## Number of proteins: 2966
## Number of peptides: 22765
##
## Percentage of peptides removed: 23.31%
##
## After filtering:
## Number of proteins: 2941
## Number of peptides: 17459
tb_only_minimum <- filter_on_min_peptides(data=tb_only_strong, n_peptides=3)
## Before filtering:
## Number of proteins: 2941
## Number of peptides: 17459
##
## Percentage of peptides removed: 0.02%
##
## After filtering:
## Number of proteins: 2701
## Number of peptides: 17455
## I think these matrixes are probably smarter to use than the raw outmatrix from tric.
## But I am not a fan of rerwriting the sample column names.
matrix_prefix <- file.path("results", "swath2stats", ver)
if (!file.exists(matrix_prefix)) {
dir.create(matrix_prefix)
}
protein_matrix_all <- write_matrix_proteins(
tb_s2s_exp, write.csv=TRUE,
filename=file.path(matrix_prefix, "tb_protein_all.csv"))
## Protein overview matrix results/swath2stats/20180806/tb_protein_all.csv written to working folder.
dim(protein_matrix_all)
## [1] 4488 40
protein_matrix_mscore <- write_matrix_proteins(
tb_mscore_filtered, write.csv=TRUE,
filename=file.path(matrix_prefix, "tb_protein_matrix_mscore.csv"))
## Protein overview matrix results/swath2stats/20180806/tb_protein_matrix_mscore.csv written to working folder.
dim(protein_matrix_mscore)
## [1] 3091 40
peptide_matrix_mscore <- write_matrix_peptides(
tb_mscore_filtered, write.csv=TRUE,
filename=file.path(matrix_prefix, "tb_peptide_matrix_mscore.csv"))
## Peptide overview matrix results/swath2stats/20180806/tb_peptide_matrix_mscore.csv written to working folder.
dim(peptide_matrix_mscore)
## [1] 22927 40
protein_matrix_minimum <- write_matrix_proteins(
tb_only_minimum, write.csv=TRUE,
filename=file.path(matrix_prefix, "tb_protein_matrix_minimum.csv"))
## Protein overview matrix results/swath2stats/20180806/tb_protein_matrix_minimum.csv written to working folder.
dim(protein_matrix_minimum)
## [1] 2701 40
peptide_matrix_minimum <- write_matrix_peptides(
tb_only_minimum, write.csv=TRUE,
filename=file.path(matrix_prefix, "tb_peptide_matrix_minimum.csv"))
## Peptide overview matrix results/swath2stats/20180806/tb_peptide_matrix_minimum.csv written to working folder.
dim(peptide_matrix_minimum)
## [1] 221200 40
rt_cor <- plot_correlation_between_samples(
tb_only_minimum, column.values="intensity", fun.aggregate=sum, size=2)
## I have no effing clue what this plot means.
variation <- plot_variation(tb_only_minimum, fun.aggregate=sum)
cols <- colnames(tb_all_filtered)
tb_disaggregated <- disaggregate(tb_all_filtered, all.columns=TRUE)
## The library contains5transitions per precursor.
## The data table was transformed into a table containing one row per transition.
tb_msstats_input <- convert_MSstats(tb_disaggregated)
## One or several columns required by MSstats were not in the data. The columns were created and filled with NAs.
## Missing columns: productcharge, isotopelabeltype
## isotopelabeltype was filled with light.
In response to some interesting queries from Yan, I made a few little functions which query and plot data from the scored data provided by openswath/pyprophet. Let us look at their results here.
our_pyprophet_fun <- extract_pyprophet_data(
metadata=paste0("sample_sheets/Mtb_dia_samples_", ver, ".xlsx"))
## Warning in extract_pyprophet_data(metadata = paste0("sample_sheets/
## Mtb_dia_samples_", : It appears that some files are missing in the
## metadata.
## Attempting to read the tsv file for: 2018_0315Briken01: results/pyprophet/20180611/whole_8mz_comet/2018_0315Briken01_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0315Briken02: results/pyprophet/20180611/whole_8mz_comet/2018_0315Briken02_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0315Briken03: results/pyprophet/20180611/whole_8mz_comet/2018_0315Briken03_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0315Briken04: results/pyprophet/20180611/whole_8mz_comet/2018_0315Briken04_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0315Briken05: results/pyprophet/20180611/whole_8mz_comet/2018_0315Briken05_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0315Briken06: results/pyprophet/20180611/whole_8mz_comet/2018_0315Briken06_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0315Briken21: results/pyprophet/20180611/whole_8mz_comet/2018_0315Briken21_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0315Briken22: results/pyprophet/20180611/whole_8mz_comet/2018_0315Briken22_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0315Briken23: results/pyprophet/20180611/whole_8mz_comet/2018_0315Briken23_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0315Briken24: results/pyprophet/20180611/whole_8mz_comet/2018_0315Briken24_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0315Briken25: results/pyprophet/20180611/whole_8mz_comet/2018_0315Briken25_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0315Briken26: results/pyprophet/20180611/whole_8mz_comet/2018_0315Briken26_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0502BrikenDIA07: results/pyprophet/20180611/whole_8mz_comet/2018_0502BrikenDIA07_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0502BrikenDIA08: results/pyprophet/20180611/whole_8mz_comet/2018_0502BrikenDIA08_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0502BrikenDIA09: results/pyprophet/20180611/whole_8mz_comet/2018_0502BrikenDIA09_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0502BrikenDIA11: results/pyprophet/20180611/whole_8mz_comet/2018_0502BrikenDIA11_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0502BrikenDIA12: results/pyprophet/20180611/whole_8mz_comet/2018_0502BrikenDIA12_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0502BrikenDIA01: results/pyprophet/20180611/whole_8mz_comet/2018_0502BrikenDIA01_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0502BrikenDIA02: results/pyprophet/20180611/whole_8mz_comet/2018_0502BrikenDIA02_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0502BrikenDIA03: results/pyprophet/20180611/whole_8mz_comet/2018_0502BrikenDIA03_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0502BrikenDIA04: results/pyprophet/20180611/whole_8mz_comet/2018_0502BrikenDIA04_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0502BrikenDIA05: results/pyprophet/20180611/whole_8mz_comet/2018_0502BrikenDIA05_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0502BrikenDIA06: results/pyprophet/20180611/whole_8mz_comet/2018_0502BrikenDIA06_vs_20180611_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken01: results/08pyprophet/20180806/whole_8mz/2018_0726Briken01_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken02: results/08pyprophet/20180806/whole_8mz/2018_0726Briken02_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken04: results/08pyprophet/20180806/whole_8mz/2018_0726Briken04_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken05: results/08pyprophet/20180806/whole_8mz/2018_0726Briken05_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken06: results/08pyprophet/20180806/whole_8mz/2018_0726Briken06_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken07: results/08pyprophet/20180806/whole_8mz/2018_0726Briken07_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken08: results/08pyprophet/20180806/whole_8mz/2018_0726Briken08_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken09: results/08pyprophet/20180806/whole_8mz/2018_0726Briken09_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken12: results/08pyprophet/20180806/whole_8mz/2018_0726Briken12_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken13: results/08pyprophet/20180806/whole_8mz/2018_0726Briken13_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken14: results/08pyprophet/20180806/whole_8mz/2018_0726Briken14_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken15: results/08pyprophet/20180806/whole_8mz/2018_0726Briken15_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken16: results/08pyprophet/20180806/whole_8mz/2018_0726Briken16_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken17: results/08pyprophet/20180806/whole_8mz/2018_0726Briken17_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken18: results/08pyprophet/20180806/whole_8mz/2018_0726Briken18_vs_20180806_whole_HCD_dia_scored.tsv.
## Attempting to read the tsv file for: 2018_0726Briken19: results/08pyprophet/20180806/whole_8mz/2018_0726Briken19_vs_20180806_whole_HCD_dia_scored.tsv.
our_mass_plot <- sm(plot_pyprophet_distribution(our_pyprophet_fun, column="mass"))
our_mass_plot[["violin"]]
our_deltart_plot_all <- sm(plot_pyprophet_distribution(
our_pyprophet_fun, column="delta_rt"))
our_deltart_plot_all[["violin"]]
our_deltart_plot_real <- sm(plot_pyprophet_distribution(
our_pyprophet_fun,
column="delta_rt", keep_decoys=FALSE))
our_deltart_plot_real[["violin"]]
our_deltart_plot_decoys <- sm(plot_pyprophet_distribution(
our_pyprophet_fun,
column="delta_rt", keep_real=FALSE))
our_deltart_plot_decoys[["violin"]]
our_dscore_plot <- sm(plot_pyprophet_distribution(
our_pyprophet_fun,
column="d_score"))
our_dscore_plot[["violin"]]
our_mscore_plot <- sm(plot_pyprophet_distribution(
our_pyprophet_fun,
column="m_score"))
our_mscore_plot[["violin"]]
our_widthvsmass <- sm(plot_pyprophet_data(our_pyprophet_fun, legend=FALSE))
our_widthvsmass$plot
tb_pyprophet_fun <- sm(extract_pyprophet_data(
metadata=paste0("sample_sheets/Mtb_dia_samples_", ver, ".xlsx"),
pyprophet_column="tuberculistscored"))
tb_mass_plot <- sm(plot_pyprophet_distribution(tb_pyprophet_fun, column="mass"))
tb_mass_plot[["violin"]]
tb_deltart_plot_all <- sm(plot_pyprophet_distribution(
tb_pyprophet_fun, column="delta_rt"))
tb_deltart_plot_all[["violin"]]
tb_deltart_plot_real <- sm(plot_pyprophet_distribution(
tb_pyprophet_fun,
column="delta_rt", keep_decoys=FALSE))
tb_deltart_plot_real[["violin"]]
tb_deltart_plot_decoys <- sm(plot_pyprophet_distribution(
tb_pyprophet_fun,
column="delta_rt", keep_real=FALSE))
tb_deltart_plot_decoys[["violin"]]
msstats.org seems to provide a complete solution for performing reasonable metrics of this data.
I am currently reading: http://msstats.org/wp-content/uploads/2017/01/MSstats_v3.7.3_manual.pdf
I made some moderately intrusive changes to MSstats to make it clearer, as well.
Let us invoke MSstats with the SWATH2stats derived data.
tt <- sm(devtools::load_all("~/scratch/git/MSstats"))
checkpoint <- "our_dataprocess.rda"
if (file.exists(checkpoint)) {
load(file=checkpoint)
} else {
our_msstats_quant <- sm(dataProcess(our_msstats_input))
save(file=checkpoint, list=c("our_msstats_quant"))
}
checkpoint <- "our_plots.rda"
if (file.exists(checkpoint)) {
load(file=checkpoint)
} else {
our_msstats_plots <- sm(dataProcessPlots(our_msstats_quant, type="QCPLOT"))
save(file=checkpoint, list=c("our_msstats_plots"))
}
my_levels <- levels(as.factor(our_msstats_input$condition))
my_levels
## [1] "wt_filtrate" "wt_whole" "delta_whole" "comp_whole"
## [5] "delta_filtrate" "comp_filtrate"
comparisons <- ghetto_contrast_matrix(
numerators=c("wt_filtrate", "delta_filtrate", "comp_filtrate",
"delta_filtrate", "comp_filtrate", "delta_whole",
"comp_whole"),
denominators=c("wt_whole", "delta_whole", "comp_whole",
"wt_filtrate", "wt_filtrate", "wt_whole",
"wt_whole"))
our_results <- list()
checkpoint <- "our_group.rda"
if (file.exists(checkpoint)) {
load(file=checkpoint)
} else {
for (c in 1:length(rownames(comparisons))) {
name <- rownames(comparisons)[c]
message("Starting ", name)
comp <- comparisons[c, ]
comp <- t(as.matrix(comp))
rownames(comp) <- name
result <- sm(MSstats::groupComparison(contrast.matrix=comp,
data=our_msstats_quant))
our_results[[name]] <- result
}
save(file=checkpoint, list=c("our_results"))
}
checkpoint <- "tb_dataprocess.rda"
if (file.exists(checkpoint)) {
load(file=checkpoint)
} else {
tb_msstats_quant <- sm(dataProcess(tb_msstats_input))
save(file=checkpoint, list=c("tb_msstats_quant"))
}
checkpoint <- "tb_plots.rda"
if (file.exists(checkpoint)) {
load(file=checkpoint)
} else {
tb_msstats_plots <- sm(dataProcessPlots(tb_msstats_quant, type="QCPLOT"))
save(file=checkpoint, list=c("tb_msstats_plots"))
}
my_levels <- levels(as.factor(tb_msstats_input$condition))
my_levels
## [1] "wt_filtrate" "wt_whole" "delta_whole" "comp_whole"
## [5] "delta_filtrate" "comp_filtrate"
comparisons <- ghetto_contrast_matrix(
numerators=c("wt_filtrate", "delta_filtrate", "comp_filtrate",
"delta_filtrate", "comp_filtrate", "delta_whole",
"comp_whole"),
denominators=c("wt_whole", "delta_whole", "comp_whole",
"wt_filtrate", "wt_filtrate", "wt_whole",
"wt_whole"))
tb_results <- list()
checkpoint <- "tb_group.rda"
if (file.exists(checkpoint)) {
load(file=checkpoint)
} else {
for (c in 1:length(rownames(comparisons))) {
name <- rownames(comparisons)[c]
message("Starting ", name)
comp <- comparisons[c, ]
comp <- t(as.matrix(comp))
rownames(comp) <- name
tb_results[[name]] <- sm(MSstats::groupComparison(contrast.matrix=comp,
data=tb_msstats_quant))
message("Finished ", name)
}
save(file=checkpoint, list=c("tb_results"))
}
Yan asked for the p/pe protein qc plots. ok. I changed the dataProcessPlots to return something useful, so that should be possible now.
pe_genes <- read.table("reference/annotated_pe_genes.txt")[[1]]
## Unfortunately, the names did not get set in my changed version of dataProcessPlots...
plotlst <- our_msstats_plots$QCPLOT
available_plots <- gsub(pattern="^1/", replacement="",
x=levels(our_msstats_quant$ProcessedData$PROTEIN))
names(plotlst) <- available_plots
pe_in_avail_idx <- pe_genes %in% available_plots
pe_in_avail <- pe_genes[pe_in_avail_idx]
pe_plots <- plotlst[pe_in_avail]
pdf(file="pe_qc_plots.pdf")
for (p in 1:length(pe_plots)) {
plot(pe_plots[[p]])
}
## Warning: Removed 102 rows containing non-finite values (stat_boxplot).
## Warning: Removed 78 rows containing non-finite values (stat_boxplot).
## Warning: Removed 162 rows containing non-finite values (stat_boxplot).
## Warning: Removed 36 rows containing non-finite values (stat_boxplot).
## Warning: Removed 102 rows containing non-finite values (stat_boxplot).
## Warning: Removed 252 rows containing non-finite values (stat_boxplot).
## Warning: Removed 114 rows containing non-finite values (stat_boxplot).
## Warning: Removed 18 rows containing non-finite values (stat_boxplot).
## Warning: Removed 54 rows containing non-finite values (stat_boxplot).
## Warning: Removed 228 rows containing non-finite values (stat_boxplot).
## Warning: Removed 96 rows containing non-finite values (stat_boxplot).
## Warning: Removed 174 rows containing non-finite values (stat_boxplot).
## Warning: Removed 108 rows containing non-finite values (stat_boxplot).
## Warning: Removed 342 rows containing non-finite values (stat_boxplot).
## Warning: Removed 396 rows containing non-finite values (stat_boxplot).
## Warning: Removed 180 rows containing non-finite values (stat_boxplot).
## Warning: Removed 66 rows containing non-finite values (stat_boxplot).
dev.off()
## png
## 2
length(pe_plots)
## [1] 19
Since I am not certain I understand these data, I will take the intensities from SWATH2stats, metadata, and annotation data; attempt to create a ‘normal’ expressionset; poke at it to see what I can learn.
I want to use the same metadata as were used for MSstats. It has a few important differences from the requirements of hpgltools: pretty much only that I do not allow rownames/sampleIDs to start with a number.
I do not want the \1 before the protein names, I already merged them into one entry per gene vis SWATH2stats.
our_prot_mtrx <- read.csv(file.path("results", "swath2stats", ver, "our_protein_matrix_minimum.csv"))
rownames(our_prot_mtrx) <- gsub(pattern="^1\\/", replacement="", x=our_prot_mtrx[["proteinname"]])
our_prot_mtrx <- our_prot_mtrx[, -1]
## Important question: Did SWATH2stats reorder my data?
colnames(our_prot_mtrx) <- gsub(pattern="^(.*)(2018.*)$", replacement="s\\2", x=colnames(our_prot_mtrx))
tb_prot_mtrx <- read.csv(file.path("results", "swath2stats", ver, "tb_protein_matrix_minimum.csv"))
rownames(tb_prot_mtrx) <- gsub(pattern="^1\\/", replacement="", x=tb_prot_mtrx[["proteinname"]])
tb_prot_mtrx <- tb_prot_mtrx[, -1]
## Important question: Did SWATH2stats reorder my data?
colnames(tb_prot_mtrx) <- gsub(pattern="^(.*)(2018.*)$", replacement="s\\2", x=colnames(tb_prot_mtrx))
Now we should have sufficient pieces to make an expressionset.
While here, I will also split the data into a cf and whole-cell pair of data structures.
## Drop the metadata not in the protein matrix:
## And ensure that they are the same order.
reordered <- colnames(our_prot_mtrx)
metadata <- sample_annot[reordered, ]
our_protein_expt <- create_expt(metadata=metadata,
count_dataframe=our_prot_mtrx,
gene_info=mtb_annotations)
## Reading the sample metadata.
## The sample definitions comprises: 16 rows(samples) and 27 columns(metadata fields).
## Matched 817 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
our_whole_expt <- subset_expt(our_protein_expt, subset="collectiontype=='whole'")
## There were 16, now there are 8 samples.
our_cf_expt <- subset_expt(our_protein_expt, subset="collectiontype=='filtrate'")
## There were 16, now there are 8 samples.
## Drop the metadata not in the protein matrix:
## And ensure that they are the same order.
reordered <- colnames(tb_prot_mtrx)
metadata <- sample_annot[reordered, ]
tb_protein_expt <- sm(create_expt(metadata,
count_dataframe=tb_prot_mtrx,
gene_info=mtb_annotations))
tb_whole_expt <- subset_expt(tb_protein_expt, subset="collectiontype=='whole'")
## There were 39, now there are 19 samples.
tb_cf_expt <- subset_expt(tb_protein_expt, subset="collectiontype=='filtrate'")
## There were 39, now there are 20 samples.
our_protein_metrics <- sm(graph_metrics(our_protein_expt))
our_protein_norm <- sm(normalize_expt(our_protein_expt, transform="log2", convert="cpm",
norm="quant", filter=TRUE))
our_protein_norm_metrics <- sm(graph_metrics(our_protein_norm))
our_protein_fsva <- sm(normalize_expt(our_protein_expt, transform="log2",
batch="fsva", filter=TRUE))
our_protein_fsva_metrics <- sm(graph_metrics(our_protein_fsva))
tb_protein_metrics <- sm(graph_metrics(tb_protein_expt))
tb_protein_norm <- sm(normalize_expt(tb_protein_expt, transform="log2", convert="cpm",
norm="quant", filter=TRUE))
tb_protein_norm_metrics <- sm(graph_metrics(tb_protein_norm))
tb_protein_fsva <- sm(normalize_expt(tb_protein_expt, transform="log2", convert="cpm",
batch="fsva", filter=TRUE))
tb_protein_fsva_metrics <- sm(graph_metrics(tb_protein_fsva))
our_whole_metrics <- sm(graph_metrics(our_whole_expt))
our_whole_norm <- sm(normalize_expt(our_whole_expt, transform="log2", convert="cpm",
norm="quant", filter=TRUE))
our_whole_norm_metrics <- sm(graph_metrics(our_whole_norm))
our_whole_fsva <- sm(normalize_expt(our_whole_expt, transform="log2", convert="cpm",
batch="fsva", filter=TRUE))
our_whole_fsva_metrics <- sm(graph_metrics(our_whole_fsva))
our_whole_varpart <- varpart(our_whole_expt)
## varpart sees only 1 batch, adjusting the model accordingly.
## Attempting mixed linear model with: ~ (1|condition)
## Fitting the expressionset to the model, this is slow.
## Projected run time: ~ 0.04 min
## Placing factor: condition at the beginning of the model.
our_whole_varpart$partition_plot
our_whole_cv <- plot_variance_coefficients(our_protein_expt)
## Naively calculating coefficient of variation and quartile dispersion with respect to condition.
## Finished calculating dispersion estimates.
our_whole_cv$disp
our_batch_cv <- plot_variance_coefficients(our_protein_expt, x_axis="batch")
## Naively calculating coefficient of variation and quartile dispersion with respect to batch.
## Finished calculating dispersion estimates.
our_batch_cv$disp
our_genotype_cv <- plot_variance_coefficients(our_protein_expt, x_axis="genotype")
## Naively calculating coefficient of variation and quartile dispersion with respect to genotype.
## Finished calculating dispersion estimates.
our_genotype_cv$disp
our_prep_cv <- plot_variance_coefficients(our_protein_expt, x_axis="prepdate")
## Naively calculating coefficient of variation and quartile dispersion with respect to prepdate.
## Finished calculating dispersion estimates.
our_prep_cv$disp
tb_whole_metrics <- sm(graph_metrics(tb_whole_expt))
tb_whole_norm <- sm(normalize_expt(tb_whole_expt, transform="log2", convert="cpm",
norm="quant", filter=TRUE))
tb_whole_norm_metrics <- sm(graph_metrics(tb_whole_norm))
tb_whole_fsva <- sm(normalize_expt(tb_whole_expt, transform="log2", convert="cpm",
batch="fsva", filter=TRUE))
tb_whole_fsva_metrics <- sm(graph_metrics(tb_whole_fsva))
tb_whole_varpart <- varpart(tb_whole_expt)
## Attempting mixed linear model with: ~ (1|condition) + (1|batch)
## Fitting the expressionset to the model, this is slow.
## Projected run time: ~ 0.2 min
## Placing factor: condition at the beginning of the model.
tb_whole_varpart$partition_plot
tb_whole_cv <- plot_variance_coefficients(tb_protein_expt)
## Naively calculating coefficient of variation and quartile dispersion with respect to condition.
## Finished calculating dispersion estimates.
tb_whole_cv$disp
tb_batch_cv <- plot_variance_coefficients(tb_protein_expt, x_axis="batch")
## Naively calculating coefficient of variation and quartile dispersion with respect to batch.
## Finished calculating dispersion estimates.
tb_batch_cv$disp
tb_genotype_cv <- plot_variance_coefficients(tb_protein_expt, x_axis="genotype")
## Naively calculating coefficient of variation and quartile dispersion with respect to genotype.
## Finished calculating dispersion estimates.
tb_genotype_cv$disp
tb_prep_cv <- plot_variance_coefficients(tb_protein_expt, x_axis="prepdate")
## Naively calculating coefficient of variation and quartile dispersion with respect to prepdate.
## Finished calculating dispersion estimates.
tb_prep_cv$disp
vio <- plot_boxplot(tb_protein_expt, violin=TRUE)
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, adding 1 to the data.
## Changed 50738 zero count features.
our_cf_metrics <- sm(graph_metrics(our_cf_expt))
our_cf_norm <- sm(normalize_expt(our_cf_expt, transform="log2", convert="cpm",
norm="quant", filter=TRUE))
our_cf_norm_metrics <- sm(graph_metrics(our_cf_norm))
our_cf_fsva <- sm(normalize_expt(our_cf_expt, transform="log2", convert="cpm",
batch="fsva", filter=TRUE))
our_cf_fsva_metrics <- sm(graph_metrics(our_cf_fsva))
tb_cf_metrics <- sm(graph_metrics(tb_cf_expt))
tb_cf_norm <- sm(normalize_expt(tb_cf_expt, transform="log2", convert="cpm",
norm="quant", filter=TRUE))
tb_cf_norm_metrics <- sm(graph_metrics(tb_cf_norm))
tb_cf_fsva <- sm(normalize_expt(tb_cf_expt, transform="log2", convert="cpm",
batch="fsva", filter=TRUE))
tb_cf_fsva_metrics <- sm(graph_metrics(tb_cf_fsva))
pp(image=our_protein_metrics$libsize,
file=file.path("images", paste0(ver, "_our_libsize.png")))
## Writing the image to: images/20180806_our_libsize.png and calling dev.off().
## It seems to me that the scale of the data is all within an order of magnitude or two.
## I cannot get used to these absurdly large numbers though.
pp(image=our_protein_norm_metrics$pcaplot,
file=file.path("images", paste0(ver, "_our_norm_pca.png")))
## Writing the image to: images/20180806_our_norm_pca.png and calling dev.off().
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## There appears to be a nice split in the data, however the un-assayable batch
## effect is a problem.
pp(image=our_protein_fsva_metrics$pcaplot,
file=file.path("images", paste0(ver, "_fsva_pca.png")))
## Writing the image to: images/20180806_fsva_pca.png and calling dev.off().
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## fsva seems to get some handle on the data, but I don't think we should rely
## upon it.
pp(image=our_protein_norm_metrics$corheat,
file=file.path("images", paste0(ver, "_norm_corheat.png")))
## Writing the image to: images/20180806_norm_corheat.png and calling dev.off().
## Once again, the whole-cell/culture-filtrate split is very large.
pp(image=our_protein_metrics$density,
file=file.path("images", paste0(ver, "_raw_density.png")))
## Writing the image to: images/20180806_raw_density.png and calling dev.off().
## There are two obvious distributions in the data, once again split between types.
pp(image=our_protein_metrics$boxplot,
file=file.path("images", paste0(ver, "_boxplot.png")))
## Writing the image to: images/20180806_boxplot.png and calling dev.off().
## This recapitulates the previous plot.
pp(image=our_whole_metrics$libsize,
file=file.path("images", paste0(ver, "_whole_libsize.png")))
## Writing the image to: images/20180806_whole_libsize.png and calling dev.off().
pp(image=our_whole_norm_metrics$pcaplot,
file=file.path("images", paste0(ver, "_whole_norm_pca.png")))
## Writing the image to: images/20180806_whole_norm_pca.png and calling dev.off().
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
pp(image=our_whole_fsva_metrics$pcaplot,
file=file.path("images", paste0(ver, "_whole_fsva_pca.png")))
## Writing the image to: images/20180806_whole_fsva_pca.png and calling dev.off().
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
pp(image=our_whole_norm_metrics$corheat,
file=file.path("images", paste0(ver, "_whole_norm_corheat.png")))
## Writing the image to: images/20180806_whole_norm_corheat.png and calling dev.off().
pp(image=our_whole_metrics$density,
file=file.path("images", paste0(ver, "_whole_raw_density.png")))
## Writing the image to: images/20180806_whole_raw_density.png and calling dev.off().
pp(image=our_whole_metrics$boxplot,
file=file.path("images", paste0(ver, "_whole_boxplot.png")))
## Writing the image to: images/20180806_whole_boxplot.png and calling dev.off().
pp(image=our_cf_metrics$libsize,
file=file.path("images", paste0(ver, "_libsize.png")))
## Writing the image to: images/20180806_libsize.png and calling dev.off().
pp(image=our_cf_norm_metrics$pcaplot,
file=file.path("images", paste0(ver, "_norm_pca.png")))
## Writing the image to: images/20180806_norm_pca.png and calling dev.off().
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
pp(image=our_cf_fsva_metrics$pcaplot,
file=file.path("images", paste0(ver, "_fsva_pca.png")))
## Writing the image to: images/20180806_fsva_pca.png and calling dev.off().
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
pp(image=our_cf_norm_metrics$corheat,
file=file.path("images", paste0(ver, "_norm_corheat.png")))
## Writing the image to: images/20180806_norm_corheat.png and calling dev.off().
pp(image=our_cf_metrics$density,
file=file.path("images", paste0(ver, "_raw_density.png")))
## Writing the image to: images/20180806_raw_density.png and calling dev.off().
pp(image=our_cf_metrics$boxplot,
file=file.path("images", paste0(ver, "_boxplot.png")))
## Writing the image to: images/20180806_boxplot.png and calling dev.off().
pp(image=tb_protein_metrics$libsize,
file=file.path("images", paste0(ver, "_tb_libsize.png")))
## Writing the image to: images/20180806_tb_libsize.png and calling dev.off().
## It seems to me that the scale of the data is all within an order of magnitude or two.
## I cannot get used to these absurdly large numbers though.
pp(image=tb_protein_norm_metrics$pcaplot,
file=file.path("images", paste0(ver, "_tb_norm_pca.png")))
## Writing the image to: images/20180806_tb_norm_pca.png and calling dev.off().
## There appears to be a nice split in the data, however the un-assayable batch
## effect is a problem.
pp(image=tb_protein_fsva_metrics$pcaplot,
file=file.path("images", paste0(ver, "_fsva_pca.png")))
## Writing the image to: images/20180806_fsva_pca.png and calling dev.off().
## fsva seems to get some handle on the data, but I don't think we should rely
## upon it.
pp(image=tb_protein_norm_metrics$corheat,
file=file.path("images", paste0(ver, "_norm_corheat.png")))
## Writing the image to: images/20180806_norm_corheat.png and calling dev.off().
## Once again, the whole-cell/culture-filtrate split is very large.
pp(image=tb_protein_metrics$density,
file=file.path("images", paste0(ver, "_raw_density.png")))
## Writing the image to: images/20180806_raw_density.png and calling dev.off().
## There are two obvious distributions in the data, once again split between types.
pp(image=tb_protein_metrics$boxplot,
file=file.path("images", paste0(ver, "_boxplot.png")))
## Writing the image to: images/20180806_boxplot.png and calling dev.off().
## This recapitulates the previous plot.
pp(image=tb_whole_metrics$libsize,
file=file.path("images", paste0(ver, "_whole_libsize.png")))
## Writing the image to: images/20180806_whole_libsize.png and calling dev.off().
pp(image=tb_whole_norm_metrics$pcaplot,
file=file.path("images", paste0(ver, "_whole_norm_pca.png")))
## Writing the image to: images/20180806_whole_norm_pca.png and calling dev.off().
pp(image=tb_whole_fsva_metrics$pcaplot,
file=file.path("images", paste0(ver, "_whole_fsva_pca.png")))
## Writing the image to: images/20180806_whole_fsva_pca.png and calling dev.off().
pp(image=tb_whole_norm_metrics$corheat,
file=file.path("images", paste0(ver, "_whole_norm_corheat.png")))
## Writing the image to: images/20180806_whole_norm_corheat.png and calling dev.off().
pp(image=tb_whole_metrics$density,
file=file.path("images", paste0(ver, "_whole_raw_density.png")))
## Writing the image to: images/20180806_whole_raw_density.png and calling dev.off().
pp(image=tb_whole_metrics$boxplot,
file=file.path("images", paste0(ver, "_whole_boxplot.png")))
## Writing the image to: images/20180806_whole_boxplot.png and calling dev.off().
pp(image=tb_cf_metrics$libsize,
file=file.path("images", paste0(ver, "_libsize.png")))
## Writing the image to: images/20180806_libsize.png and calling dev.off().
pp(image=tb_cf_norm_metrics$pcaplot,
file=file.path("images", paste0(ver, "_norm_pca.png")))
## Writing the image to: images/20180806_norm_pca.png and calling dev.off().
pp(image=tb_cf_fsva_metrics$pcaplot,
file=file.path("images", paste0(ver, "_fsva_pca.png")))
## Writing the image to: images/20180806_fsva_pca.png and calling dev.off().
pp(image=tb_cf_norm_metrics$corheat,
file=file.path("images", paste0(ver, "_norm_corheat.png")))
## Writing the image to: images/20180806_norm_corheat.png and calling dev.off().
pp(image=tb_cf_metrics$density,
file=file.path("images", paste0(ver, "_raw_density.png")))
## Writing the image to: images/20180806_raw_density.png and calling dev.off().
pp(image=tb_cf_metrics$boxplot,
file=file.path("images", paste0(ver, "_boxplot.png")))
## Writing the image to: images/20180806_boxplot.png and calling dev.off().
I am thinking that a simple sum of the data in the technical replicates should be sufficient.
our_compressed <- concatenate_runs(our_protein_expt, column="bioreplicate")
## The original expressionset has 16 samples.
## The final expressionset has 16 samples.
tb_compressed <- concatenate_runs(tb_protein_expt, column="bioreplicate")
## The original expressionset has 39 samples.
## The final expressionset has 18 samples.
our_whole_compressed <- concatenate_runs(our_whole_expt, column="bioreplicate")
## The original expressionset has 8 samples.
## The final expressionset has 8 samples.
our_cf_compressed <- concatenate_runs(our_cf_expt, column="bioreplicate")
## The original expressionset has 8 samples.
## The final expressionset has 8 samples.
tb_whole_compressed <- concatenate_runs(tb_whole_expt, column="bioreplicate")
## The original expressionset has 19 samples.
## The final expressionset has 9 samples.
tb_cf_compressed <- concatenate_runs(tb_cf_expt, column="bioreplicate")
## The original expressionset has 20 samples.
## The final expressionset has 9 samples.
our_compressed_metrics <- sm(graph_metrics(our_compressed))
our_compressed_norm <- sm(normalize_expt(our_compressed, transform="log2", convert="cpm",
norm="quant", filter=TRUE))
our_compressed_norm_metrics <- sm(graph_metrics(our_compressed_norm))
tb_compressed_metrics <- sm(graph_metrics(tb_compressed))
tb_compressed_norm <- sm(normalize_expt(tb_compressed, transform="log2", convert="cpm",
norm="quant", filter=TRUE))
tb_compressed_norm_metrics <- sm(graph_metrics(tb_compressed_norm))
our_compressed_norm_metrics$pcaplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
tb_compressed_norm_metrics$pcaplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
our_compressed_de <- all_pairwise(our_compressed, model_batch=FALSE, force=TRUE)
## There is just one batch in this data.
## Assuming no batch in model for testing pca.
## There is just one batch in this data.
## Finished running DE analyses, collecting outputs.
## Comparing analyses 1/15: comp_whole_vs_comp_filtrate
## Comparing analyses 2/15: delta_filtrate_vs_comp_filtrate
## Comparing analyses 3/15: delta_whole_vs_comp_filtrate
## Comparing analyses 4/15: wt_filtrate_vs_comp_filtrate
## Comparing analyses 5/15: wt_whole_vs_comp_filtrate
## Comparing analyses 6/15: delta_filtrate_vs_comp_whole
## Comparing analyses 7/15: delta_whole_vs_comp_whole
## Comparing analyses 8/15: wt_filtrate_vs_comp_whole
## Comparing analyses 9/15: wt_whole_vs_comp_whole
## Comparing analyses 10/15: delta_whole_vs_delta_filtrate
## Comparing analyses 11/15: wt_filtrate_vs_delta_filtrate
## Comparing analyses 12/15: wt_whole_vs_delta_filtrate
## Comparing analyses 13/15: wt_filtrate_vs_delta_whole
## Comparing analyses 14/15: wt_whole_vs_delta_whole
## Comparing analyses 15/15: wt_whole_vs_wt_filtrate
tb_compressed_de <- all_pairwise(tb_compressed, model_batch=FALSE, force=TRUE)
## Assuming no batch in model for testing pca.
## Finished running DE analyses, collecting outputs.
## Comparing analyses 1/15: comp_whole_vs_comp_filtrate
## Comparing analyses 2/15: delta_filtrate_vs_comp_filtrate
## Comparing analyses 3/15: delta_whole_vs_comp_filtrate
## Comparing analyses 4/15: wt_filtrate_vs_comp_filtrate
## Comparing analyses 5/15: wt_whole_vs_comp_filtrate
## Comparing analyses 6/15: delta_filtrate_vs_comp_whole
## Comparing analyses 7/15: delta_whole_vs_comp_whole
## Comparing analyses 8/15: wt_filtrate_vs_comp_whole
## Comparing analyses 9/15: wt_whole_vs_comp_whole
## Comparing analyses 10/15: delta_whole_vs_delta_filtrate
## Comparing analyses 11/15: wt_filtrate_vs_delta_filtrate
## Comparing analyses 12/15: wt_whole_vs_delta_filtrate
## Comparing analyses 13/15: wt_filtrate_vs_delta_whole
## Comparing analyses 14/15: wt_whole_vs_delta_whole
## Comparing analyses 15/15: wt_whole_vs_wt_filtrate
our_pairwise_filt <- sm(normalize_expt(our_compressed, filter=TRUE))
our_pairwise_comp <- sm(all_pairwise(our_pairwise_filt, model_batch="fsva", force=TRUE))
our_pairwise_nobatch <- sm(all_pairwise(our_pairwise_filt, model_batch=FALSE, force=TRUE))
tb_pairwise_filt <- sm(normalize_expt(tb_compressed, filter=TRUE))
tb_pairwise_comp <- sm(all_pairwise(tb_pairwise_filt, model_batch="fsva", force=TRUE))
tb_pairwise_nobatch <- sm(all_pairwise(tb_pairwise_filt, model_batch=FALSE, force=TRUE))
our_compressed_de$comparison$heat
tb_compressed_de$comparison$heat
keepers <- list(
"wtcf_vs_wtwhole" = c("wt_filtrate", "wt_whole"))
set_name <- "wt_filtrate_vs_wt_whole"
msstats_result <- our_results[[set_name]][["ComparisonResult"]]
droppers <- c("undefined")
names(droppers) <- "log2fc"
## Make sure to set the rownames so it will merge into the excel file.
rownames(msstats_result) <- msstats_result[["Protein"]]
our_wtcf_wtwhole_tables <- sm(combine_de_tables(
our_compressed_de, keepers=keepers, extra_annot=msstats_result,
excludes=droppers,
excel=paste0("excel/our_wtcf_vs_wtwhole_tables-v", ver, ".xlsx")))
our_wtcf_nobatch_wtwhole_tables <- sm(combine_de_tables(
our_pairwise_nobatch, keepers=keepers, extra_annot=msstats_result,
excludes=droppers,
excel=paste0("excel/our_wtcf_vs_wtwhole_nobatch_tables-v", ver, ".xlsx")))
comp_table <- our_wtcf_wtwhole_tables$data[[set_name]]
cor.test(comp_table$log2fc, comp_table$limma_logfc, method="spearman")
## Warning in cor.test.default(comp_table$log2fc, comp_table$limma_logfc,
## method = "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: comp_table$log2fc and comp_table$limma_logfc
## S = 3.6e+07, p-value <2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5859
comp_table <- our_wtcf_nobatch_wtwhole_tables$data[[set_name]]
cor.test(comp_table$log2fc, comp_table$limma_logfc, method="spearman")
## Warning in cor.test.default(comp_table$log2fc, comp_table$limma_logfc,
## method = "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: comp_table$log2fc and comp_table$limma_logfc
## S = 3.6e+07, p-value <2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5883
msstats_result <- tb_results[[set_name]][["ComparisonResult"]]
droppers <- c("undefined")
names(droppers) <- "log2fc"
## Make sure to set the rownames so it will merge into the excel file.
rownames(msstats_result) <- msstats_result[["Protein"]]
tb_wtcf_wtwhole_tables <- sm(combine_de_tables(
tb_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excludes=droppers,
excel=paste0("excel/tb_wtcf_vs_wtwhole_tables-v", ver, ".xlsx")))
tb_wtcf_nobatch_wtwhole_tables <- sm(combine_de_tables(
tb_pairwise_nobatch, keepers=keepers, extra_annot=msstats_result,
excludes=droppers,
excel=paste0("excel/tb_wtcf_vs_wtwhole_nobatch_tables-v", ver, ".xlsx")))
comp_table <- tb_wtcf_wtwhole_tables$data[[set_name]]
cor.test(comp_table$log2fc, comp_table$limma_logfc, method="spearman")
## Warning in cor.test.default(comp_table$log2fc, comp_table$limma_logfc,
## method = "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: comp_table$log2fc and comp_table$limma_logfc
## S = 1.8e+09, p-value <2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3503
comp_table <- tb_wtcf_nobatch_wtwhole_tables$data[[set_name]]
cor.test(comp_table$log2fc, comp_table$limma_logfc, method="spearman")
## Warning in cor.test.default(comp_table$log2fc, comp_table$limma_logfc,
## method = "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: comp_table$log2fc and comp_table$limma_logfc
## S = 1.8e+09, p-value <2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3437
keepers <- list(
"deltacf_vs_deltawhole" = c("delta_filtrate", "delta_whole"))
set_name <- "delta_filtrate_vs_delta_whole"
msstats_result <- our_results[[set_name]][["ComparisonResult"]]
## Make sure to set the rownames so it will merge into the excel file.
rownames(msstats_result) <- msstats_result[["Protein"]]
our_deltacf_deltawhole_tables <- sm(combine_de_tables(
our_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excel=paste0("excel/our_deltacf_vs_deltawhole_tables-v", ver, ".xlsx")))
msstats_result <- tb_results[[set_name]][["ComparisonResult"]]
## Make sure to set the rownames so it will merge into the excel file.
rownames(msstats_result) <- msstats_result[["Protein"]]
tb_deltacf_deltawhole_tables <- sm(combine_de_tables(
tb_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excel=paste0("excel/tb_deltacf_vs_deltawhole_tables-v", ver, ".xlsx")))
keepers <- list(
"compcf_vs_compwhole" = c("comp_filtrate", "comp_whole"))
set_name <- "comp_filtrate_vs_comp_whole"
msstats_result <- our_results[[set_name]][["ComparisonResult"]]
## Make sure to set the rownames so it will merge into the excel file.
rownames(msstats_result) <- msstats_result[["Protein"]]
compcf_compwhole_tables <- sm(combine_de_tables(
our_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excel=paste0("excel/our_compcf_vs_compwhole_tables-v", ver, ".xlsx")))
msstats_result <- tb_results[[set_name]][["ComparisonResult"]]
## Make sure to set the rownames so it will merge into the excel file.
rownames(msstats_result) <- msstats_result[["Protein"]]
compcf_compwhole_tables <- sm(combine_de_tables(
tb_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excel=paste0("excel/tb_compcf_vs_compwhole_tables-v", ver, ".xlsx")))
keepers <- list(
"deltacf_vs_wtcf" = c("delta_filtrate", "wt_filtrate"))
set_name <- "delta_filtrate_vs_wt_filtrate"
msstats_result <- our_results[[set_name]][["ComparisonResult"]]
## Make sure to set the rownames so it will merge into the excel file.
rownames(msstats_result) <- msstats_result[["Protein"]]
deltacf_wtcf_tables <- sm(combine_de_tables(
our_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excel=paste0("excel/our_deltacf_vs_wtcf_tables-v", ver, ".xlsx")))
msstats_result <- tb_results[[set_name]][["ComparisonResult"]]
rownames(msstats_result) <- msstats_result[["Protein"]]
deltacf_wtcf_tables <- sm(combine_de_tables(
tb_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excel=paste0("excel/tb_deltacf_vs_wtcf_tables-v", ver, ".xlsx")))
comp_table <- deltacf_wtcf_tables$data[[set_name]]
cor.test(tail(comp_table$log2fc, n=50), tail(comp_table$limma_logfc, n=50), method="spearman")
## Warning in cor.test.default(tail(comp_table$log2fc, n = 50),
## tail(comp_table$limma_logfc, : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: tail(comp_table$log2fc, n = 50) and tail(comp_table$limma_logfc, n = 50)
## S = 27000, p-value = 0.008
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.3741
keepers <- list(
"compcf_vs_wtcf" = c("comp_filtrate", "wt_filtrate"))
set_name <- "comp_filtrate_vs_wt_filtrate"
msstats_result <- our_results[[set_name]][["ComparisonResult"]]
rownames(msstats_result) <- msstats_result[["Protein"]]
compcf_wtcf_tables <- sm(combine_de_tables(
our_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excel=paste0("excel/our_compcf_vs_wtcf_tables-v", ver, ".xlsx")))
msstats_result <- tb_results[[set_name]][["ComparisonResult"]]
rownames(msstats_result) <- msstats_result[["Protein"]]
compcf_wtcf_tables <- sm(combine_de_tables(
tb_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excel=paste0("excel/tb_compcf_vs_wtcf_tables-v", ver, ".xlsx")))
keepers <- list(
"wtcf_vs_wtwhole" = c("delta_whole", "wt_whole"))
set_name <- "delta_whole_vs_wt_whole"
msstats_result <- our_results[[set_name]][["ComparisonResult"]]
rownames(msstats_result) <- msstats_result[["Protein"]]
wtcf_wtwhole_tables <- sm(combine_de_tables(
our_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excel=paste0("excel/our_deltawhole_vs_wtwhole_tables-v", ver, ".xlsx")))
msstats_result <- tb_results[[set_name]][["ComparisonResult"]]
rownames(msstats_result) <- msstats_result[["Protein"]]
wtcf_wtwhole_tables <- sm(combine_de_tables(
tb_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excel=paste0("excel/tb_deltawhole_vs_wtwhole_tables-v", ver, ".xlsx")))
keepers <- list(
"compwhole_vs_wtwhole" = c("comp_whole", "wt_whole"))
set_name <- "comp_whole_vs_wt_whole"
msstats_result <- our_results[[set_name]][["ComparisonResult"]]
rownames(msstats_result) <- msstats_result[["Protein"]]
compwhole_wtwhole_tables <- sm(combine_de_tables(
our_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excel=paste0("excel/our_compwhole_vs_wtwhole_tables-v", ver, ".xlsx")))
msstats_result <- tb_results[[set_name]][["ComparisonResult"]]
rownames(msstats_result) <- msstats_result[["Protein"]]
compwhole_wtwhole_tables <- sm(combine_de_tables(
tb_pairwise_comp, keepers=keepers, extra_annot=msstats_result,
excel=paste0("excel/tb_compwhole_vs_wtwhole_tables-v", ver, ".xlsx")))
our_table <- our_pairwise_comp$deseq$all_tables$wt_whole_vs_wt_filtrate
tb_table <- tb_pairwise_comp$deseq$all_tables$wt_whole_vs_wt_filtrate
rownames(tb_table) <- gsub(pattern="^(.*)_.*", replacement="\\1", x=rownames(tb_table))
test_table <- merge(our_table, tb_table, by="row.names")
cor.test(test_table$logFC.x, test_table$logFC.y)
##
## Pearson's product-moment correlation
##
## data: test_table$logFC.x and test_table$logFC.y
## t = 13, df = 820, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3472 0.4618
## sample estimates:
## cor
## 0.4061
if (!isTRUE(get0("skip_load"))) {
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
pander::pander(sessionInfo())
}