The following invocations of OpenMS/pyprophet/tric are included in:
This script includes the process for generating a local, comet-based transition library from DDA samples generated locally as well as the steps used when processing the downloaded transition libraries.
In this first block, I will set a couple of variables and source a file containing the parameters for the rest of the script.
The raw data files provide opportunities to make sure that the later invocations of openMS/etc will actually work; for example, if there are too few transitions observed here, one should not be surprised if pyprophet and tric fail later.
sample_sheet <- glue::glue("sample_sheets/Mtb_dia_samples_{ver}.xlsx")
savefile <- "mzxml_dia_data_20180913.rda"
metadata <- openxlsx::read.xlsx(sample_sheet)
## Error in read.xlsx.default(sample_sheet): File does not exist.
## Error in grepl(pattern = "^#", x = metadata[["sampleid"]]): object 'metadata' not found
## Error in eval(expr, envir, enclos): object 'metadata' not found
## Error in knitr::kable(metadata): object 'metadata' not found
## Error in grep(pattern = "2018_0817", x = metadata[["sampleid"]]): object 'metadata' not found
## Error in eval(expr, envir, enclos): object 'metadata' not found
if (file.exists(savefile)) {
load(savefile)
} else {
mzxml_data <- extract_msraw_data(metadata, parallel=FALSE,
allow_window_overlap=FALSE,
file_column="Filename",
savefile=savefile)
mzml_data <- extract_msraw_data(metadata, parallel=FALSE,
format="mzML",
allow_window_overlap=FALSE,
file_column="mzmlfile",
savefile="testing.rda")
}
## Error in extract_msraw_data(metadata, parallel = FALSE, allow_window_overlap = FALSE, : object 'metadata' not found
## Error in plot_mzxml_boxplot(mzxml_data): object 'mzxml_data' not found
## Error in pp(file = paste0("images/20180913_dia_mzxml_intensities-v", ver, : object 'intensity_boxplot' not found
## Error in plot_mzxml_boxplot(mzxml_data, table = "scans", column = "peakscount"): object 'mzxml_data' not found
## Error in pp(file = paste0("images/20180913_dia_mzxml_retention-v", ver, : object 'retention_boxplot' not found
## Error in plot_mzxml_boxplot(mzxml_data, table = "scans", column = "basepeakmz"): object 'mzxml_data' not found
## Error in pp(file = paste0("images/20180913_dia_mzxml_mzbase-v", ver, ".png"), : object 'mz_boxplot' not found
## Error in plot_mzxml_boxplot(mzxml_data, table = "scans", column = "basepeakintensity"): object 'mzxml_data' not found
## Error in pp(file = paste0("images/20180913_dia_mzxml_scanintensity-v", : object 'scanintensity_boxplot' not found
In block 15 of dia_invocation_20180913.sh the command used is prested and copied here for interactive running.
echo "Converting the Tuberculist libraries to pqp."
TUBERCULIST_TRAML="preprocessing/05spectral_libraries/Mtb_TubercuList-R27_iRT_UPS_noMox_noMC_sall_osw_decoy.TraML"
echo "The input is: ${TUBERCULIST_TRAML}"
echo "The output is: ${TUBERCULIST_PQP}"
mkdir -p $(dirname ${TUBERCULIST_PQP})
TargetedFileConverter \
-in "${TUBERCULIST_TRAML}" \
-in_type TraML \
-out "${TUBERCULIST_PQP}" \
-out_type pqp \
2>"${TUBERCULIST_PQP}_convert.log" 1>&2
block 16 contains the commands used to run openswathworkflow and pyprophet. Those are repeated here in order to test them interactively when needed.
echo "Invoking the OpenSwathWorkflow using the tuberculist transitions."
base_mzxmldir="preprocessing/01mzXML/dia/${VERSION}"
swath_inputs=$(/bin/ls "${base_mzxmldir}")
echo "Checking in, the inputs are: ${swath_inputs}"
mkdir -p "${TUBERCULIST_OUTDIR}"
pypdir="${PYPROPHET_OUTDIR}_tuberculist"
echo "Creating pyprophet output directory: ${pypdir}."
mkdir -p "${pypdir}"
for input in ${swath_inputs}
do
in_mzxml="${base_mzxmldir}/${input}"
name=$(basename "${input}" .mzXML)
echo "Starting openswath run of ${name} using ${MZ_WINDOWS} windows at $(date)."
tb_output_prefix="${TUBERCULIST_OUTDIR}/${name}_vs_${VERSION}_${TYPE}_${DDA_METHOD}_dia"
pyprophet_output_prefix="${pypdir}/${name}_vs_${VERSION}_${TYPE}_${DDA_METHOD}_dia"
echo "Deleting previous swath output file: ${tb_output_prefix}.osw"
rm -f "${tb_output_prefix}.osw"
OpenSwathWorkflow \
-force \
-ini "parameters/openms_${VERSION}.ini" \
-in "${in_mzxml}" \
-swath_windows_file "windows/openswath_${name}.txt" \
-tr "${TUBERCULIST_PQP}" \
-out_osw "${tb_output_prefix}.osw" \
2>"${tb_output_prefix}_osw.log" 1>&2
if [[ "$?" -ne "0" ]]; then
echo "OpenSwathWorkflow for ${name} failed."
fi
rm -f "${tb_output_prefix}_scored.osw"
echo "Scoring individual swath run: ${tb_output_prefix}"
pyprophet \
score \
--in "${tb_output_prefix}.osw" \
--out "${pyprophet_output_prefix}_scored.osw" \
2>>"${pyprophet_output_prefix}_pyprophet_all.log" 1>&2
rm -f "${pyprophet_output_prefix}_scored.tsv"
echo "Exporting individual swath run: to ${pyprophet_output_prefix}_scored.tsv"
pyprophet \
export \
--in "${pyprophet_output_prefix}_scored.osw" \
--out "${pyprophet_output_prefix}_scored.tsv" \
2>>"${pyprophet_output_prefix}_pyprophet_export.log" 1>&2
## ok something is fubar, the stupid tsv files are being written in the cwd as run_filename.tsv
## No matter what I do!
mv "${input}.tsv" "${pyprophet_output_prefix}_scored.tsv"
if [[ "$?" -ne "0" ]]; then
echo "Exporting ${pyprophet_output_prefix}_scored.tsv failed."
fi
done
Finally, block 17 of the invocation script provides the command used to make the final, feature-aligned data which is used by SWATH2stats and friends. In addition, it generates a matrix of intensities by sample and some metadata. Once again, it is copy/pasted here to allow interactive testing.
tric_tb="${TRIC_OUTDIR}_tuberculist"
mkdir -p "${tric_tb}"
feature_alignment.py \
--force \
--in "./${pypdir}/"*.tsv \
--out "${tric_tb}/${SEARCH_METHOD}_${DDA_METHOD}.tsv" \
--out_matrix "${tric_tb}/${DDA_METHOD}_outmatrix.tsv" \
--out_meta "${tric_tb}/${DDA_METHOD}_meta.tsv" \
2>"${tric_tb}/feature_alignment.err" \
1>"${tric_tb}/feature_alignment.out"
echo "Wrote final output to ${tric_tb}/${SEARCH_METHOD}_${DDA_METHOD}.tsv"
if (!isTRUE(get0("skip_load"))) {
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
pander::pander(sessionInfo())
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 8ca465bb9928ffe95082f64aed9cf64799bbf8e6
## This is hpgltools commit: Wed Jul 31 16:40:59 2019 -0400: 8ca465bb9928ffe95082f64aed9cf64799bbf8e6
## Saving to 01_preprocessing_comet_20180806-v20180913.rda.xz
R version 3.6.0 (2019-04-26)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_US.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_US.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_US.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.UTF-8 and LC_IDENTIFICATION=C
attached base packages: parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: hpgltools(v.1.0), Biobase(v.2.44.0) and BiocGenerics(v.0.30.0)
loaded via a namespace (and not attached): backports(v.1.1.4), fastmatch(v.1.1-0), plyr(v.1.8.4), igraph(v.1.2.4.1), lazyeval(v.0.2.2), splines(v.3.6.0), BiocParallel(v.1.18.1), usethis(v.1.5.1), GenomeInfoDb(v.1.20.0), ggplot2(v.3.2.1), urltools(v.1.7.3), sva(v.3.32.1), digest(v.0.6.20), foreach(v.1.4.7), htmltools(v.0.3.6), GOSemSim(v.2.10.0), viridis(v.0.5.1), GO.db(v.3.8.2), gdata(v.2.18.0), magrittr(v.1.5), memoise(v.1.1.0), doParallel(v.1.0.15), openxlsx(v.4.1.0.1), limma(v.3.40.6), remotes(v.2.1.0), Biostrings(v.2.52.0), annotate(v.1.62.0), matrixStats(v.0.54.0), enrichplot(v.1.4.0), prettyunits(v.1.0.2), colorspace(v.1.4-1), blob(v.1.2.0), ggrepel(v.0.8.1), xfun(v.0.8), dplyr(v.0.8.3), callr(v.3.3.1), crayon(v.1.3.4), RCurl(v.1.95-4.12), jsonlite(v.1.6), genefilter(v.1.66.0), lme4(v.1.1-21), zeallot(v.0.1.0), survival(v.2.44-1.1), iterators(v.1.0.12), glue(v.1.3.1), polyclip(v.1.10-0), gtable(v.0.3.0), zlibbioc(v.1.30.0), XVector(v.0.24.0), UpSetR(v.1.4.0), DelayedArray(v.0.10.0), pkgbuild(v.1.0.4), scales(v.1.0.0), DOSE(v.3.10.2), DBI(v.1.0.0), Rcpp(v.1.0.2), viridisLite(v.0.3.0), xtable(v.1.8-4), progress(v.1.2.2), gridGraphics(v.0.4-1), bit(v.1.1-14), europepmc(v.0.3), stats4(v.3.6.0), httr(v.1.4.1), fgsea(v.1.10.0), gplots(v.3.0.1.1), RColorBrewer(v.1.1-2), pkgconfig(v.2.0.2), XML(v.3.98-1.20), farver(v.1.1.0), ggplotify(v.0.0.4), tidyselect(v.0.2.5), rlang(v.0.4.0), reshape2(v.1.4.3), AnnotationDbi(v.1.46.0), munsell(v.0.5.0), tools(v.3.6.0), cli(v.1.1.0), RSQLite(v.2.1.2), ggridges(v.0.5.1), devtools(v.2.1.0), evaluate(v.0.14), stringr(v.1.4.0), yaml(v.2.2.0), processx(v.3.4.1), knitr(v.1.24), bit64(v.0.9-7), fs(v.1.3.1), pander(v.0.6.3), zip(v.2.0.3), caTools(v.1.17.1.2), purrr(v.0.3.2), ggraph(v.1.0.2), nlme(v.3.1-141), xml2(v.1.2.2), DO.db(v.2.9), biomaRt(v.2.40.3), compiler(v.3.6.0), pbkrtest(v.0.4-7), rstudioapi(v.0.10), variancePartition(v.1.14.0), testthat(v.2.2.1), tibble(v.2.1.3), tweenr(v.1.0.1), stringi(v.1.4.3), ps(v.1.3.0), GenomicFeatures(v.1.36.4), desc(v.1.2.0), lattice(v.0.20-38), Matrix(v.1.2-17), nloptr(v.1.2.1), vctrs(v.0.2.0), pillar(v.1.4.2), triebeard(v.0.3.0), data.table(v.1.12.2), cowplot(v.1.0.0), bitops(v.1.0-6), rtracklayer(v.1.44.2), GenomicRanges(v.1.36.0), qvalue(v.2.16.0), colorRamps(v.2.3), R6(v.2.4.0), KernSmooth(v.2.23-15), gridExtra(v.2.3), IRanges(v.2.18.1), sessioninfo(v.1.1.1), codetools(v.0.2-16), boot(v.1.3-23), MASS(v.7.3-51.4), gtools(v.3.8.1), assertthat(v.0.2.1), pkgload(v.1.0.2), SummarizedExperiment(v.1.14.1), rprojroot(v.1.3-2), withr(v.2.1.2), GenomicAlignments(v.1.20.1), Rsamtools(v.2.0.0), S4Vectors(v.0.22.0), GenomeInfoDbData(v.1.2.1), mgcv(v.1.8-28), hms(v.0.5.0), clusterProfiler(v.3.12.0), grid(v.3.6.0), tidyr(v.0.8.3), minqa(v.1.2.4), rvcheck(v.0.1.3), rmarkdown(v.1.14), ggforce(v.0.3.0) and base64enc(v.0.1-3)