For this set of data I will look only at the April 20 samples.
sample_sheet <- "sample_sheets/dda_samples.xlsx"
savefile <- "mzxml_dda_data_201805.rda"
metadata <- openxlsx::read.xlsx(sample_sheet)
keeper_idx <- metadata[["sampledate"]] == "20180420"
metadata <- metadata[keeper_idx, ]
knitr::kable(metadata)
sampleid | sampletype | condition | batch | enzyme | sampledate | rundate | runinfo | rawfile | mzxmlfile | note | |
---|---|---|---|---|---|---|---|---|---|---|---|
3 | 2018_0420Briken01 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken07.raw | mzXML/dda_201805_whole/2018_0420Briken01.mzXML | NA |
4 | 2018_0420Briken02 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken07.raw | mzXML/dda_201805_whole/2018_0420Briken02.mzXML | NA |
5 | 2018_0420Briken03 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken07.raw | mzXML/dda_201805_whole/2018_0420Briken03.mzXML | NA |
6 | 2018_0420Briken04 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken07.raw | mzXML/dda_201805_whole/2018_0420Briken04.mzXML | NA |
7 | 2018_0420Briken05 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken07.raw | mzXML/dda_201805_whole/2018_0420Briken05.mzXML | NA |
8 | 2018_0420Briken06 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken07.raw | mzXML/dda_201805_whole/2018_0420Briken06.mzXML | NA |
9 | 2018_0420Briken07 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken07.raw | mzXML/dda_201805_whole/2018_0420Briken07.mzXML | NA |
10 | 2018_0420Briken08 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken08.raw | mzXML/dda_201805_whole/2018_0420Briken08.mzXML | NA |
11 | 2018_0420Briken11 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken11.raw | mzXML/dda_201805_cf/2018_0420Briken11.mzXML | NA |
12 | 2018_0420Briken12 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken12.raw | mzXML/dda_201805_cf/2018_0420Briken12.mzXML | NA |
13 | 2018_0420Briken13 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken13.raw | mzXML/dda_201805_cf/2018_0420Briken13.mzXML | NA |
14 | 2018_0420Briken14 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken14.raw | mzXML/dda_201805_cf/2018_0420Briken14.mzXML | NA |
15 | 2018_0420Briken15 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken15.raw | mzXML/dda_201805_cf/2018_0420Briken15.mzXML | NA |
16 | 2018_0420Briken16 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken16.raw | mzXML/dda_201805_cf/2018_0420Briken16.mzXML | NA |
17 | 2018_0420Briken17 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken17.raw | mzXML/dda_201805_cf/2018_0420Briken17.mzXML | NA |
18 | 2018_0420Briken18 | WT H37Rv Whole Cell Lysate | wt | e | Trypsin | 20180420 | 20180420 | High resolution MS2 | raw/2018_0420Briken18.raw | mzXML/dda_201805_cf/2018_0420Briken18.mzXML | NA |
if (file.exists(savefile)) {
load(savefile)
} else {
mzxml_data <- sm(extract_mzxml_data(metadata,
file_column="mzxmlfile",
savefile=savefile))
}
intensity_boxplot <- sm(plot_mzxml_boxplot(mzxml_data))
pp(file="images/mzxml_intensities.png", image=intensity_boxplot)
## Writing the image to: images/mzxml_intensities.png and calling dev.off().
retention_boxplot <- sm(plot_mzxml_boxplot(mzxml_data, table="scans", column="peakscount"))
pp(file="images/mzxml_retention.png", image=retention_boxplot)
## Writing the image to: images/mzxml_retention.png and calling dev.off().
mz_boxplot <- sm(plot_mzxml_boxplot(mzxml_data, table="scans", column="basepeakmz"))
pp(file="images/mzxml_mzbase.png", image=mz_boxplot)
## Writing the image to: images/mzxml_mzbase.png and calling dev.off().
scanintensity_boxplot <- sm(plot_mzxml_boxplot(mzxml_data, table="scans", column="basepeakintensity"))
pp(file="images/mzxml_scanintensity.png", image=scanintensity_boxplot)
## Writing the image to: images/mzxml_scanintensity.png and calling dev.off().
intensity_wrt_mz <- sm(plot_intensity_mz(mzxml_data, x_scale="log", y_scale="log"))
pp(file="images/intensity_wrt_mz_dia.png", image=intensity_wrt_mz$plot)
## Writing the image to: images/intensity_wrt_mz_dia.png and calling dev.off().
## Warning in guide_merge.legend(init, x[[i]]): Duplicated override.aes is
## ignored.
## Warning in guide_merge.legend(init, x[[i]]): Duplicated override.aes is
## ignored.
Don’t forget that extract_mzxml_data() writes the acquisition window files required for openswathworkflow to run, so do not forget to run it.
sample_sheet <- "sample_sheets/Mtb_dia_samples.xlsx"
savefile <- "mzxml_dia_data_201805.rda"
metadata <- openxlsx::read.xlsx(sample_sheet)
keeper_idx <- metadata[["expt_id"]] == "may2018"
keeper_idx[is.na(keeper_idx)] <- FALSE
metadata <- metadata[keeper_idx, ]
knitr::kable(metadata)
sampleid | TubeID | Sample.Description | BioReplicate | techinical_replicate | Run | expt_id | Condition | batch | windowsize | enzyme | harvestdate | prepdate | rundate | runinfo | rawfile | Filename | filtered_result | unfiltered_result | note | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
31 | 2018_0502BrikenDIA07 | 4.4.18–1 | H37Rv ΔEsx-5A; Whole Cell Lysate | br8 | tr5 | 2018_0502BrikenDIA07 | may2018 | delta_whole | unknown | 8 | Trypsin | 20171019 | 20180403 | 20180502 | NA | raw/2018_0502BrikenDIA07.raw | mzXML/dia_201805/2018_0502BrikenDIA07.mzXML | NA | NA | for trypsin samples, instrument cleaned two days before. New trap column installed, analytical column relatively new |
32 | 2018_0502BrikenDIA08 | 4.4.18–2 | H37Rv ΔEsx-5A; Whole Cell Lysate | br9 | tr5 | 2018_0502BrikenDIA08 | may2018 | delta_whole | unknown | 8 | Trypsin | 20171019 | 20180403 | 20180502 | NA | raw/2018_0502BrikenDIA08.raw | mzXML/dia_201805/2018_0502BrikenDIA08.mzXML | NA | NA | NA |
33 | 2018_0502BrikenDIA09 | 4.4.18–3 | H37Rv ΔEsx-5A; Whole Cell Lysate | br10 | tr5 | 2018_0502BrikenDIA09 | may2018 | delta_whole | unknown | 8 | Trypsin | 20171019 | 20180403 | 20180502 | NA | raw/2018_0502BrikenDIA09.raw | mzXML/dia_201805/2018_0502BrikenDIA09.mzXML | NA | NA | NA |
34 | 2018_0502BrikenDIA10 | 4.4.18–4 | H37Rv ΔEsx-5A Complement; Whole Cell Lysate | br11 | tr5 | 2018_0502BrikenDIA10 | may2018 | comp_whole | unknown | 8 | Trypsin | 20171019 | 20180403 | 20180502 | NA | raw/2018_0502BrikenDIA10.raw | mzXML/dia_201805/2018_0502BrikenDIA10.mzXML | NA | NA | NA |
35 | 2018_0502BrikenDIA11 | 4.4.18–5 | H37Rv ΔEsx-5A Complement; Whole Cell Lysate | br12 | tr5 | 2018_0502BrikenDIA11 | may2018 | comp_whole | unknown | 8 | Trypsin | 20171019 | 20180403 | 20180502 | NA | raw/2018_0502BrikenDIA11.raw | mzXML/dia_201805/2018_0502BrikenDIA11.mzXML | NA | NA | NA |
36 | 2018_0502BrikenDIA12 | 4.4.18–6 | H37Rv ΔEsx-5A Complement; Whole Cell Lysate | br13 | tr5 | 2018_0502BrikenDIA12 | may2018 | comp_whole | unknown | 8 | Trypsin | 20171019 | 20180403 | 20180502 | NA | raw/2018_0502BrikenDIA12.raw | mzXML/dia_201805/2018_0502BrikenDIA12.mzXML | NA | NA | NA |
37 | 2018_0502BrikenDIA01 | 4.4.18–7 | H37Rv ΔEsx-5A; Culture Filtrate | br14 | tr5 | 2018_0502BrikenDIA01 | may2018 | delta_cf | unknown | 8 | Trypsin | 20171019 | 20180403 | 20180502 | NA | raw/2018_0502BrikenDIA01.raw | mzXML/dia_201805/2018_0502BrikenDIA01.mzXML | NA | NA | NA |
38 | 2018_0502BrikenDIA02 | 4.4.18–8 | H37Rv ΔEsx-5A; Culture Filtrate | br15 | tr5 | 2018_0502BrikenDIA02 | may2018 | delta_cf | unknown | 8 | Trypsin | 20171019 | 20180403 | 20180502 | NA | raw/2018_0502BrikenDIA02.raw | mzXML/dia_201805/2018_0502BrikenDIA02.mzXML | NA | NA | NA |
39 | 2018_0502BrikenDIA03 | 4.4.18–9 | H37Rv ΔEsx-5A; Culture Filtrate | br16 | tr5 | 2018_0502BrikenDIA03 | may2018 | delta_cf | unknown | 8 | Trypsin | 20171019 | 20180403 | 20180502 | NA | raw/2018_0502BrikenDIA03.raw | mzXML/dia_201805/2018_0502BrikenDIA03.mzXML | NA | NA | NA |
40 | 2018_0502BrikenDIA04 | 4.4.18–10 | H37Rv ΔEsx-5A Complement; Culture Filtrate | br17 | tr5 | 2018_0502BrikenDIA04 | may2018 | comp_cf | unknown | 8 | Trypsin | 20171019 | 20180403 | 20180502 | NA | raw/2018_0502BrikenDIA04.raw | mzXML/dia_201805/2018_0502BrikenDIA04.mzXML | NA | NA | NA |
41 | 2018_0502BrikenDIA05 | 4.4.18–11 | H37Rv ΔEsx-5A Complement; Culture Filtrate | br18 | tr5 | 2018_0502BrikenDIA05 | may2018 | comp_cf | unknown | 8 | Trypsin | 20171019 | 20180403 | 20180502 | NA | raw/2018_0502BrikenDIA05.raw | mzXML/dia_201805/2018_0502BrikenDIA05.mzXML | NA | NA | NA |
42 | 2018_0502BrikenDIA06 | 4.4.18–12 | H37Rv ΔEsx-5A Complement; Culture Filtrate | br19 | tr5 | 2018_0502BrikenDIA06 | may2018 | comp_cf | unknown | 8 | Trypsin | 20171019 | 20180403 | 20180502 | NA | raw/2018_0502BrikenDIA06.raw | mzXML/dia_201805/2018_0502BrikenDIA06.mzXML | NA | NA | NA |
Now let us plot the dia sample data.
if (file.exists(savefile)) {
load(savefile)
} else {
mzxml_data <- sm(extract_mzxml_data(metadata,
file_column="Filename",
savefile=savefile))
}
intensity_boxplot <- plot_mzxml_boxplot(mzxml_data)
## Adding 2018_0502BrikenDIA07
## Adding 2018_0502BrikenDIA08
## Adding 2018_0502BrikenDIA09
## Adding 2018_0502BrikenDIA10
## Adding 2018_0502BrikenDIA11
## Adding 2018_0502BrikenDIA12
## Adding 2018_0502BrikenDIA01
## Adding 2018_0502BrikenDIA02
## Adding 2018_0502BrikenDIA03
## Adding 2018_0502BrikenDIA04
## Adding 2018_0502BrikenDIA05
## Adding 2018_0502BrikenDIA06
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, adding 1 to the data.
## Changed 213990 zero count features.
pp(file="images/201805_dia_mzxml_intensities.png", image=intensity_boxplot)
## Writing the image to: images/201805_dia_mzxml_intensities.png and calling dev.off().
retention_boxplot <- plot_mzxml_boxplot(mzxml_data, table="scans", column="peakscount")
## Adding 2018_0502BrikenDIA07
## Adding 2018_0502BrikenDIA08
## Adding 2018_0502BrikenDIA09
## Adding 2018_0502BrikenDIA10
## Adding 2018_0502BrikenDIA11
## Adding 2018_0502BrikenDIA12
## Adding 2018_0502BrikenDIA01
## Adding 2018_0502BrikenDIA02
## Adding 2018_0502BrikenDIA03
## Adding 2018_0502BrikenDIA04
## Adding 2018_0502BrikenDIA05
## Adding 2018_0502BrikenDIA06
pp(file="images/201805_dia_mzxml_retention.png", image=retention_boxplot)
## Writing the image to: images/201805_dia_mzxml_retention.png and calling dev.off().
mz_boxplot <- plot_mzxml_boxplot(mzxml_data, table="scans", column="basepeakmz")
## Adding 2018_0502BrikenDIA07
## Adding 2018_0502BrikenDIA08
## Adding 2018_0502BrikenDIA09
## Adding 2018_0502BrikenDIA10
## Adding 2018_0502BrikenDIA11
## Adding 2018_0502BrikenDIA12
## Adding 2018_0502BrikenDIA01
## Adding 2018_0502BrikenDIA02
## Adding 2018_0502BrikenDIA03
## Adding 2018_0502BrikenDIA04
## Adding 2018_0502BrikenDIA05
## Adding 2018_0502BrikenDIA06
pp(file="images/201805_dia_mzxml_mzbase.png", image=mz_boxplot)
## Writing the image to: images/201805_dia_mzxml_mzbase.png and calling dev.off().
scanintensity_boxplot <- plot_mzxml_boxplot(mzxml_data, table="scans", column="basepeakintensity")
## Adding 2018_0502BrikenDIA07
## Adding 2018_0502BrikenDIA08
## Adding 2018_0502BrikenDIA09
## Adding 2018_0502BrikenDIA10
## Adding 2018_0502BrikenDIA11
## Adding 2018_0502BrikenDIA12
## Adding 2018_0502BrikenDIA01
## Adding 2018_0502BrikenDIA02
## Adding 2018_0502BrikenDIA03
## Adding 2018_0502BrikenDIA04
## Adding 2018_0502BrikenDIA05
## Adding 2018_0502BrikenDIA06
pp(file="images/201805_dia_mzxml_scanintensity.png", image=scanintensity_boxplot)
## Writing the image to: images/201805_dia_mzxml_scanintensity.png and calling dev.off().
intensity_wrt_mz <- plot_intensity_mz(mzxml_data, x_scale="log", y_scale="log")
## Adding 2018_0502BrikenDIA07
## Adding 2018_0502BrikenDIA08
## Adding 2018_0502BrikenDIA09
## Adding 2018_0502BrikenDIA10
## Adding 2018_0502BrikenDIA11
## Adding 2018_0502BrikenDIA12
## Adding 2018_0502BrikenDIA01
## Adding 2018_0502BrikenDIA02
## Adding 2018_0502BrikenDIA03
## Adding 2018_0502BrikenDIA04
## Adding 2018_0502BrikenDIA05
## Adding 2018_0502BrikenDIA06
pp(file="images/201805_dia_intensity_wrt_mz_dia.png", image=intensity_wrt_mz)
## Writing the image to: images/201805_dia_intensity_wrt_mz_dia.png and calling dev.off().
## Warning in guide_merge.legend(init, x[[i]]): Duplicated override.aes is
## ignored.
## Warning in guide_merge.legend(init, x[[i]]): Duplicated override.aes is
## ignored.
export type="whole"
./transition_library_201805.sh 2>&1 1>transition_library_hcd_201805.out
cat transition_library_hcd_201805.out
export type="whole"
./dia_invocation_hcd_201805.sh 2>&1 1>openswath_invocation_201805.out
export type="cf"
./transition_library_201805.sh 2>&1 1>transition_library_hcd_201805.out
cat transition_library_hcd_201805.out
./dia_invocation_hcd_201805.sh 2>&1 1>openswath_invocation_201805.out
if (!isTRUE(get0("skip_load"))) {
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
pander::pander(sessionInfo())
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 8de6008c66ed31502abbdef703c69345f86a6eed
## R> packrat::restore()
## This is hpgltools commit: Thu May 17 11:08:19 2018 -0400: 8de6008c66ed31502abbdef703c69345f86a6eed
## Saving to 01_preprocessing_comet_20180508-v20180215.rda.xz
R version 3.4.4 (2018-03-15)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: hpgltools(v.2018.03)
loaded via a namespace (and not attached): Rcpp(v.0.12.16), RColorBrewer(v.1.1-2), compiler(v.3.4.4), pillar(v.1.2.2), plyr(v.1.8.4), highr(v.0.6), base64enc(v.0.1-3), iterators(v.1.0.9), tools(v.3.4.4), digest(v.0.6.15), evaluate(v.0.10.1), memoise(v.1.1.0), tibble(v.1.4.2), gtable(v.0.2.0), rlang(v.0.2.0.9001), openxlsx(v.4.0.17), foreach(v.1.4.4), commonmark(v.1.5), yaml(v.2.1.19), parallel(v.3.4.4), withr(v.2.1.2), stringr(v.1.3.1), knitr(v.1.20), roxygen2(v.6.0.1), xml2(v.1.2.0), devtools(v.1.13.5), rprojroot(v.1.3-2), grid(v.3.4.4), data.table(v.1.11.2), Biobase(v.2.38.0), R6(v.2.2.2), rmarkdown(v.1.9), pander(v.0.6.1), ggplot2(v.2.2.1), magrittr(v.1.5), backports(v.1.1.2), scales(v.0.5.0.9000), codetools(v.0.2-15), htmltools(v.0.3.6), BiocGenerics(v.0.24.0), colorspace(v.1.3-2), stringi(v.1.2.2), lazyeval(v.0.2.1) and munsell(v.0.4.3)