In the following blocks I want to use DIA Umpire to create transition libraries for openswath, then I want to run openswath and score the runs.
cd ~/scratch/proteomics/mycobacterium_tuberculosis_2018
module add openms
type="mzXML"
export VERSION="20190228"
basedir="${HOME}/scratch/proteomics/mycobacterium_tuberculosis_2018"
base_input="${basedir}/results/01${type}/dia/${VERSION}/"
umpire_inputs=$(/usr/bin/find "${base_input}" -name "*.${type}" | sort)
echo "Checking in: ${umpire_inputs}"
for input in ${umpire_inputs};
do
in_name=$(basename $input ".${type}")
out_name="${in_name}_Q1.mgf"
if [[ ! -f "${base_input}/${out_name}" ]]; then
echo "The output file: ${out_name} already exists."
else
java -jar DIA_Umpire_SE.jar ${input} diaumpire_se.params
fi
done
ProteinProphet \
results/04_dia_umpire_xinteract/interact.comet.pep.xml \
results/05_dia_umpire_prophet/combined.prot.xml
InterProphetParser \
DECOY=DECOY \
results/04_dia_umpire_xinteract/interact.comet.pep.xml \
results/05_dia_umpire_prophet/iProphet.pep.xml
Mayu.pl \
-A results/05_dia_umpire_prophet/iProphet.pep.xml \
-C reference/mtb_irt.fasta \
-E DECOY
## Rerunning because writing the file failed.
spectrast \
-cNSpecLib -cICID-QTOF \
-cf "Protein! ~ DECOY_" \
-cP0.4237 \
-c_IRTreference/irt.txt \
-c_IRR results/05_dia_umpire_prophet/iProphet.pep.xml
spectrast \
-cNSpecLib_cons \
-cICID-QTOF \
-cAC SpecLib.splib
spectrast2tsv.py \
-l 350,2000 \
-s b,y \
-x 1,2 \
-o 6 \
-n 6 \
-p 0.05 \
-d -e \
-k openswath \
-w windows/2018_0817BrikenTrypsinDIA19.txt \
-a SpecLib_cons_openswath.tsv \
SpecLib_cons.sptxt
TargetedFileConverter \
-in SpecLib_cons_openswath.tsv \
-in_type tsv \
-out SpecLib_cons_openswath.TraML \
-out_type TraML
OpenSwathDecoyGenerator \
-in SpecLib_cons_openswath.TraML \
-out SpecLib_cons_openswath_decoy.TraML \
-method shuffle
## -exclude_similar \
## -similarity_threshold 0.05 \
## -identity_threshold 0.7
TargetedFileConverter \
-in SpecLib_cons_openswath_decoy.TraML \
-in_type TraML \
-out SpecLib_cons_openswath_decoy.tsv \
-out_type tsv
TargetedFileConverter \
-in SpecLib_cons_openswath_decoy.TraML \
-in_type TraML \
-out SpecLib_cons_openswath_decoy.pqp \
-out_type pqp
export VERSION=${VERSION:-20190327}
echo "Loading environment modules and parameters for version: ${VERSION}."
source "parameters/${VERSION}_settings.sh"
echo "Invoking the OpenSwathWorkflow using local comet-derived transitions."
type="diaumpire"
input_type="mzXML"
export TRANSITION_PREFIX="SpecLib_cons_openswath_decoy"
echo "Checking in, the transition library is: ${TRANSITION_PREFIX}.pqp"
base_mzxmldir="results/01${input_type}/dia/${VERSION}"
swath_inputs=$(/usr/bin/find "${base_mzxmldir}" -name *.${input_type} -print | sort)
echo "Checking in, the inputs are: ${swath_inputs}"
mkdir -p "${SWATH_OUTDIR}_${type}"
pypdir="${PYPROPHET_OUTDIR}_${type}"
mkdir -p "${pypdir}"
for input in ${swath_inputs}
do
name=$(basename "${input}" ".${input_type}")
echo "Starting openswath run, library type ${type} for ${name} using ${MZ_WINDOWS} windows at $(date)."
swath_output_prefix="${SWATH_OUTDIR}_${type}/${name}_${DDA_METHOD}"
pyprophet_output_prefix="${PYPROPHET_OUTDIR}_${type}/${name}_${DDA_METHOD}"
echo "Deleting previous swath output file: ${swath_output_prefix}.osw"
rm -f "${swath_output_prefix}.osw"
rm -f "${swath_output_prefix}.tsv"
OpenSwathWorkflow \
-in "${input}" \
-force \
-sort_swath_maps \
-min_upper_edge_dist 1 \
-mz_correction_function "quadratic_regression_delta_ppm" \
-Scoring:TransitionGroupPicker:background_subtraction "original" \
-Scoring:stop_report_after_feature "5" \
-swath_windows_file "windows/openswath_${name}.txt" \
-tr "${TRANSITION_PREFIX}.pqp" \
-out_tsv "${swath_output_prefix}.tsv"
OpenSwathWorkflow \
-in "${input}" \
-force \
-sort_swath_maps \
-min_upper_edge_dist 1 \
-mz_correction_function "quadratic_regression_delta_ppm" \
-Scoring:TransitionGroupPicker:background_subtraction "original" \
-Scoring:stop_report_after_feature "5" \
-swath_windows_file "windows/openswath_${name}.txt" \
-tr "${TRANSITION_PREFIX}.pqp" \
-out_osw "${swath_output_prefix}.osw"
##2>"${swath_output_prefix}_osw.log" 1>&2
done
swath_out=$(dirname ${swath_output_prefix})
pyprophet_out="$(dirname "${pyprophet_output_prefix}")/openswath_merged.osw"
echo "Merging osw files to ${pyprophet_out}"
pyprophet merge \
--template "${TRANSITION_PREFIX}.pqp" \
--out="${pyprophet_out}" \
${swath_out}/*.osw
pyprophet score --in="${pyprophet_out}"
pyprophet export --in="${pyprophet_out}" --out "test.tsv"
## pyprophet always exports to the current working directory.
final_name="$(dirname ${pyprophet_out})/$(basename ${pyprophet_out} ".osw").tsv"
echo $final_name
mv "test.tsv"
ls -ld "${pyprophet_out}"
tric_tb="${TRIC_OUTDIR}_tuberculist"
mkdir -p "${tric_tb}"
feature_alignment.py \
--force \
--in "./${pypdir}/"*.tsv \
--out "${tric_tb}/${SEARCH_METHOD}_${DDA_METHOD}.tsv" \
--out_matrix "${tric_tb}/${DDA_METHOD}_outmatrix.tsv" \
--out_meta "${tric_tb}/${DDA_METHOD}_meta.tsv"
2>"${tric_tb}/feature_alignment.err" \
1>"${tric_tb}/feature_alignment.out"
echo "Wrote final output to ${tric_tb}/${SEARCH_METHOD}_${DDA_METHOD}.tsv"
Thanks to Vivek, I now am aware of DEP, which does everything I wish MSstats did. The matrix given to me by tric’s feature_alignment.py I think gives me what DEP requires, along with my annotations and sample sheet.
Let us see if this is true.
mtb_gff <- "reference/mycobacterium_tuberculosis_h37rv_2.gff.gz"
mtb_genome <- "reference/mtuberculosis_h37rv_genbank.fasta"
mtb_cds <- "reference/mtb_cds.fasta"
mtb_annotations <- sm(load_gff_annotations(mtb_gff, type="gene"))
colnames(mtb_annotations) <- gsub(pattern="\\.", replacement="", x=colnames(mtb_annotations))
mtb_annotations[["description"]] <- gsub(pattern="\\+", replacement=" ",
x=mtb_annotations[["description"]])
mtb_annotations[["function"]] <- gsub(pattern="\\+", replacement=" ",
x=mtb_annotations[["function"]])
rownames(mtb_annotations) <- mtb_annotations[["ID"]]
mtb_microbes <- load_microbesonline_annotations(id=83332)
## The species being downloaded is: Mycobacterium tuberculosis H37Rv
ver <- "20190327"
ump_data <- read.csv(
paste0("results/tric/", ver, "/whole_8mz_dia_umpire/comet_HCD.tsv"), sep="\t")
ump_data[["ProteinName"]] <- gsub(pattern="^(.*)_.*$", replacement="\\1",
x=ump_data[["ProteinName"]])
sample_annot <- extract_metadata(paste0("sample_sheets/Mtb_dia_samples_20190521.xlsx"))
colnames(sample_annot)
## [1] "sampleid" "tubeid" "tubelabel"
## [4] "figurereplicate" "figurename" "sampledescription"
## [7] "bioreplicate" "lcrun" "msrun"
## [10] "technicalreplicate" "replicatestate" "run"
## [13] "exptid" "genotype" "collectiontype"
## [16] "condition" "batch" "windowsize"
## [19] "enzyme" "harvestdate" "prepdate"
## [22] "rundate" "runinfo" "rawfile"
## [25] "filename" "mzmlfile" "diascored"
## [28] "tuberculistscored" "includeexclude"
## [1] "s2018_0315Briken01" "s2018_0315Briken02"
## [3] "s2018_0315Briken03" "s2018_0315Briken04"
## [5] "s2018_0315Briken05" "s2018_0315Briken06"
## [7] "s2018_0315Briken21" "s2018_0315Briken22"
## [9] "s2018_0315Briken23" "s2018_0315Briken24"
## [11] "s2018_0315Briken25" "s2018_0315Briken26"
## [13] "s2018_0502BrikenDIA01" "s2018_0502BrikenDIA02"
## [15] "s2018_0502BrikenDIA03" "s2018_0502BrikenDIA04"
## [17] "s2018_0502BrikenDIA05" "s2018_0502BrikenDIA06"
## [19] "s2018_0502BrikenDIA07" "s2018_0502BrikenDIA08"
## [21] "s2018_0502BrikenDIA09" "s2018_0502BrikenDIA10"
## [23] "s2018_0502BrikenDIA11" "s2018_0502BrikenDIA12"
## [25] "s2018_0726Briken01" "s2018_0726Briken02"
## [27] "s2018_0726Briken03" "s2018_0726Briken04"
## [29] "s2018_0726Briken05" "s2018_0726Briken06"
## [31] "s2018_0726Briken07" "s2018_0726Briken08"
## [33] "s2018_0726Briken09" "s2018_0726Briken11"
## [35] "s2018_0726Briken12" "s2018_0726Briken13"
## [37] "s2018_0726Briken14" "s2018_0726Briken15"
## [39] "s2018_0726Briken16" "s2018_0726Briken17"
## [41] "s2018_0726Briken18" "s2018_0726Briken19"
## [43] "s2018_0817BrikenTrypsinDIA01" "s2018_0817BrikenTrypsinDIA02"
## [45] "s2018_0817BrikenTrypsinDIA03" "s2018_0817BrikenTrypsinDIA04"
## [47] "s2018_0817BrikenTrypsinDIA05" "s2018_0817BrikenTrypsinDIA06"
## [49] "s2018_0817BrikenTrypsinDIA07" "s2018_0817BrikenTrypsinDIA08"
## [51] "s2018_0817BrikenTrypsinDIA09" "s2018_0817BrikenTrypsinDIA11"
## [53] "s2018_0817BrikenTrypsinDIA12" "s2018_0817BrikenTrypsinDIA13"
## [55] "s2018_0817BrikenTrypsinDIA14" "s2018_0817BrikenTrypsinDIA15"
## [57] "s2018_0817BrikenTrypsinDIA16" "s2018_0817BrikenTrypsinDIA17"
## [59] "s2018_0817BrikenTrypsinDIA18" "s2018_0817BrikenTrypsinDIA19"
## [1] "results/01mzXML/dia/20190327/2018_0315Briken01.mzXML"
## [2] "results/01mzXML/dia/20190327/2018_0315Briken02.mzXML"
## [3] "results/01mzXML/dia/20190327/2018_0315Briken03.mzXML"
## [4] "results/01mzXML/dia/20190327/2018_0315Briken04.mzXML"
## [5] "results/01mzXML/dia/20190327/2018_0315Briken05.mzXML"
## [6] "results/01mzXML/dia/20190327/2018_0315Briken06.mzXML"
## [7] "results/01mzXML/dia/20190327/2018_0315Briken21.mzXML"
## [8] "results/01mzXML/dia/20190327/2018_0315Briken22.mzXML"
## [9] "results/01mzXML/dia/20190327/2018_0315Briken23.mzXML"
## [10] "results/01mzXML/dia/20190327/2018_0315Briken24.mzXML"
## [11] "results/01mzXML/dia/20190327/2018_0315Briken25.mzXML"
## [12] "results/01mzXML/dia/20190327/2018_0315Briken26.mzXML"
## [13] "results/01mzXML/dia/20190327/2018_0502BrikenDIA01.mzXML"
## [14] "results/01mzXML/dia/20190327/2018_0502BrikenDIA02.mzXML"
## [15] "results/01mzXML/dia/20190327/2018_0502BrikenDIA03.mzXML"
## [16] "results/01mzXML/dia/20190327/2018_0502BrikenDIA04.mzXML"
## [17] "results/01mzXML/dia/20190327/2018_0502BrikenDIA05.mzXML"
## [18] "results/01mzXML/dia/20190327/2018_0502BrikenDIA06.mzXML"
## [19] "results/01mzXML/dia/20190327/2018_0502BrikenDIA07.mzXML"
## [20] "results/01mzXML/dia/20190327/2018_0502BrikenDIA08.mzXML"
## [21] "results/01mzXML/dia/20190327/2018_0502BrikenDIA09.mzXML"
## [22] "results/01mzXML/dia/20190327/2018_0502BrikenDIA10.mzXML"
## [23] "results/01mzXML/dia/20190327/2018_0502BrikenDIA11.mzXML"
## [24] "results/01mzXML/dia/20190327/2018_0502BrikenDIA12.mzXML"
## [25] "results/01mzXML/dia/20190327/2018_0726Briken01.mzXML"
## [26] "results/01mzXML/dia/20190327/2018_0726Briken02.mzXML"
## [27] "results/01mzXML/dia/20190327/2018_0726Briken03.mzXML"
## [28] "results/01mzXML/dia/20190327/2018_0726Briken04.mzXML"
## [29] "results/01mzXML/dia/20190327/2018_0726Briken05.mzXML"
## [30] "results/01mzXML/dia/20190327/2018_0726Briken06.mzXML"
## [31] "results/01mzXML/dia/20190327/2018_0726Briken07.mzXML"
## [32] "results/01mzXML/dia/20190327/2018_0726Briken08.mzXML"
## [33] "results/01mzXML/dia/20190327/2018_0726Briken09.mzXML"
## [34] "results/01mzXML/dia/20190327/2018_0726Briken11.mzXML"
## [35] "results/01mzXML/dia/20190327/2018_0726Briken12.mzXML"
## [36] "results/01mzXML/dia/20190327/2018_0726Briken13.mzXML"
## [37] "results/01mzXML/dia/20190327/2018_0726Briken14.mzXML"
## [38] "results/01mzXML/dia/20190327/2018_0726Briken15.mzXML"
## [39] "results/01mzXML/dia/20190327/2018_0726Briken16.mzXML"
## [40] "results/01mzXML/dia/20190327/2018_0726Briken17.mzXML"
## [41] "results/01mzXML/dia/20190327/2018_0726Briken18.mzXML"
## [42] "results/01mzXML/dia/20190327/2018_0726Briken19.mzXML"
## [43] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA01.mzXML"
## [44] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA02.mzXML"
## [45] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA03.mzXML"
## [46] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA04.mzXML"
## [47] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA05.mzXML"
## [48] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA06.mzXML"
## [49] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA07.mzXML"
## [50] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA08.mzXML"
## [51] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA09.mzXML"
## [52] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA11.mzXML"
## [53] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA12.mzXML"
## [54] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA13.mzXML"
## [55] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA14.mzXML"
## [56] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA15.mzXML"
## [57] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA16.mzXML"
## [58] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA17.mzXML"
## [59] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA18.mzXML"
## [60] "results/01mzXML/dia/20190327/2018_0817BrikenTrypsinDIA19.mzXML"
## Loading SWATH2stats
## Found the same mzXML files in the annotations and data.
## Number of non-decoy peptides: 17081
## Number of decoy peptides: 1801
## Decoy rate: 0.1054
## The average FDR by run on assay level is 0.015
## The average FDR by run on peptide level is 0.016
## The average FDR by run on protein level is 0.001
## Target assay FDR: 0.02
## Required overall m-score cutoff: 0.0031623
## achieving assay FDR: 0.0194
## Target protein FDR: 0.02
## Required overall m-score cutoff: 0.01
## achieving protein FDR: 0.00115
## Original dimension: 221952, new dimension: 211415, difference: 10537.
## Peptides need to have been quantified in more conditions than: 48 in order to pass this percentage-based threshold.
## Fraction of peptides selected: 0.00058
## Original dimension: 224796, new dimension: 680, difference: 224116.
## Target protein FDR: 0.01
## Required overall m-score cutoff: 0.01
## achieving protein FDR: 0
## filter_mscore_fdr is filtering the data...
## finding m-score cutoff to achieve desired protein FDR in protein master list..
## finding m-score cutoff to achieve desired global peptide FDR..
## Target peptide FDR: 0.05
## Required overall m-score cutoff: 0.01
## Achieving peptide FDR: 0
## Proteins selected:
## Total proteins selected: 2412
## Final target proteins: 2412
## Final decoy proteins: 0
## Peptides mapping to these protein entries selected:
## Total mapping peptides: 16868
## Final target peptides: 16868
## Final decoy peptides: 0
## Total peptides selected from:
## Total peptides: 16868
## Final target peptides: 16868
## Final decoy peptides: 0
## Individual run FDR quality of the peptides was not calculated
## as not every run contains a decoy.
## The decoys have been removed from the returned data.
## Number of proteins detected: 2363
## Protein identifiers: Rv0577, Rv0242c, Rv3012c, Rv2467, Rv3715c, Rv2220
## Number of proteins detected that are supported by a proteotypic peptide: 2337
## Number of proteotypic peptides detected: 16728
## Number of proteins detected: 2337
## First 6 protein identifiers: Rv0577, Rv0242c, Rv3012c, Rv2467, Rv3715c, Rv2220
## Before filtering:
## Number of proteins: 2337
## Number of peptides: 16728
##
## Percentage of peptides removed: 25.94%
##
## After filtering:
## Number of proteins: 2331
## Number of peptides: 12388
## Error in is.data.frame(x): object 'ump_filtered_ms_fdr_pr_all_str' not found
## Error in file.exists(matrix_prefix): object 'matrix_prefix' not found
## Error in file.path(matrix_prefix, "ump_protein_all.csv"): object 'matrix_prefix' not found
## Error in eval(expr, envir, enclos): object 'protein_matrix_all' not found
## Error in file.path(matrix_prefix, "ump_protein_matrix_mscore.csv"): object 'matrix_prefix' not found
## Error in eval(expr, envir, enclos): object 'protein_matrix_mscore' not found
## Error in file.path(matrix_prefix, "ump_peptide_matrix_mscore.csv"): object 'matrix_prefix' not found
## Error in eval(expr, envir, enclos): object 'peptide_matrix_mscore' not found
## Error in aggregate(data[, "intensity"], by = list(data[["proteinname"]], : object 'ump_filtered_all_filters' not found
## Error in eval(expr, envir, enclos): object 'protein_matrix_filtered' not found
## Error in is.data.frame(x): object 'ump_filtered_all_filters' not found
## Error in eval(expr, envir, enclos): object 'peptide_matrix_filtered' not found
## Reading results/01mzML/dia/20190327/2018_0315Briken01.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0315Briken02.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0315Briken03.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0315Briken04.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0315Briken05.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0315Briken06.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0315Briken21.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0315Briken22.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0315Briken23.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0315Briken24.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0315Briken25.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0315Briken26.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0502BrikenDIA01.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0502BrikenDIA02.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0502BrikenDIA03.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0502BrikenDIA04.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0502BrikenDIA05.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0502BrikenDIA06.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0502BrikenDIA07.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0502BrikenDIA08.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0502BrikenDIA09.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0502BrikenDIA10.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0502BrikenDIA11.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0502BrikenDIA12.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken01.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken02.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken03.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken04.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken05.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken06.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken07.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken08.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken09.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken11.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken12.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken13.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken14.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken15.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken16.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken17.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken18.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0726Briken19.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA01.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA02.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA03.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA04.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA05.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA06.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA07.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA08.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA09.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA11.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA12.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA13.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA14.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA15.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA16.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA17.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA18.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Reading results/01mzML/dia/20190327/2018_0817BrikenTrypsinDIA19.mzML
## Error in loadNamespace(name) : there is no package called 'mzR'
## Error in names(res) <- rownames(sample_definitions): 'names' attribute [60] must be the same length as the vector [0]
## Error in eval(expr, envir, enclos): object 'protein_matrix_filtered' not found
## Error in colnames(intensities): object 'intensities' not found
## Error in cols[[1]] <- "Protein": object 'cols' not found
## Error in eval(expr, envir, enclos): object 'cols' not found
## Error in eval(expr, envir, enclos): object 'intensities' not found
## Error in intensities[["Protein"]] <- NULL: object 'intensities' not found
## Error in colnames(intensities): object 'intensities' not found
## Error in colnames(intensities): object 'intensities' not found
## Error in colnames(intensities): object 'intensities' not found
## Error in `[.data.frame`(sample_annot, reordered, ): object 'reordered' not found
## Reading the sample metadata.
## The sample definitions comprises: 60 rows(samples) and 29 columns(metadata fields).
## Error in create_expt(sample_annot, count_dataframe = intensities, gene_info = mtb_annotations): object 'intensities' not found
## Loading DEP
## Warning in (function (dep_name, dep_ver = "*") : Dependency package
## 'MSnbase' not available.
## Warning in (function (dep_name, dep_ver = "*") : Dependency package 'vsn'
## not available.
## Warning in (function (dep_name, dep_ver = "*") : Dependency package
## 'fdrtool' not available.
## Warning in (function (dep_name, dep_ver = "*") : Dependency package
## 'ComplexHeatmap' not available.
## Warning in (function (dep_name, dep_ver = "*") : Dependency package
## 'circlize' not available.
## Warning in (function (dep_name, dep_ver = "*") : Dependency package
## 'shinydashboard' not available.
## Warning in (function (dep_name, dep_ver = "*") : Dependency package 'DT'
## not available.
## Warning in (function (dep_name, dep_ver = "*") : Dependency package
## 'imputeLCMD' not available.
## Dependency package(s)
## 'MSnbase','vsn','fdrtool','ComplexHeatmap','circlize','shinydashboard','DT','imputeLCMD'
## not available.
wtf <- function (proteins_unique, columns, expdesign) {
assertthat::assert_that(is.data.frame(proteins_unique), is.integer(columns),
is.data.frame(expdesign))
if (any(!c("name", "ID") %in% colnames(proteins_unique))) {
stop("'name' and/or 'ID' columns are not present in '",
deparse(substitute(proteins_unique)), "'.\nRun make_unique() to obtain the required columns",
call. = FALSE)
}
if (any(!c("label", "condition", "replicate") %in% colnames(expdesign))) {
stop("'label', 'condition' and/or 'replicate' columns",
"are not present in the experimental design", call. = FALSE)
}
if (any(!apply(proteins_unique[, columns], 2, is.numeric))) {
stop("specified 'columns' should be numeric", "\nRun make_se_parse() with the appropriate columns as argument",
call. = FALSE)
}
if (tibble::is.tibble(proteins_unique))
proteins_unique <- as.data.frame(proteins_unique)
if (tibble::is.tibble(expdesign))
expdesign <- as.data.frame(expdesign)
rownames(proteins_unique) <- proteins_unique$name
raw <- proteins_unique[, columns]
raw[raw == 0] <- NA
raw <- log2(raw)
expdesign <- mutate(expdesign, condition = make.names(condition))
## I changed the following because it didn't make sense to me.
if (is.null(expdesign[["ID"]])) {
expdesign <- expdesign %>%
tidyr::unite(condition, replicate, remove=FALSE)
}
rownames(expdesign) <- expdesign$ID
matched <- match(make.names(delete_prefix(expdesign$label)),
make.names(delete_prefix(colnames(raw))))
if (any(is.na(matched))) {
stop("None of the labels in the experimental design match ",
"with column names in 'proteins_unique'", "\nRun make_se() with the correct labels in the experimental design",
"and/or correct columns specification")
}
colnames(raw)[matched] <- expdesign$ID
raw <- raw[, !is.na(colnames(raw))][rownames(expdesign)]
row_data <- proteins_unique[, -columns]
rownames(row_data) <- row_data$name
se <- SummarizedExperiment(assays = as.matrix(raw), colData = expdesign,
rowData = row_data)
return(se)
}
## Error in library(DEP): there is no package called 'DEP'
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:hpgltools':
##
## anyMissing, rowMedians
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## Loading required package: BiocParallel
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
## Error in pData(protein_expt): object 'protein_expt' not found
## Error in design[["sampleid"]]: object of type 'closure' is not subsettable
## Error in design[["sampleid"]]: object of type 'closure' is not subsettable
## Error in exprs(protein_expt): object 'protein_expt' not found
## Error in exprs(protein_expt): object 'protein_expt' not found
## Error in rownames(mtb_unique): object 'mtb_unique' not found
## Error in rownames(mtb_unique): object 'mtb_unique' not found
## Error in make_se(mtb_unique, intensity_columns, design): could not find function "make_se"
## Error: 'plot_frequency' is not an exported object from 'namespace:DEP'
## Error in assay(mtb_se): object 'mtb_se' not found
## Error: 'filter_missval' is not an exported object from 'namespace:DEP'
## Error in assay(mtb_filt): object 'mtb_filt' not found
## Error: 'plot_numbers' is not an exported object from 'namespace:DEP'
## Error: 'plot_coverage' is not an exported object from 'namespace:DEP'
## Error: 'normalize_vsn' is not an exported object from 'namespace:DEP'
## Error: 'plot_normalization' is not an exported object from 'namespace:DEP'
## Error: 'plot_missval' is not an exported object from 'namespace:DEP'
## Error: 'plot_detect' is not an exported object from 'namespace:DEP'
## Error: 'impute' is not an exported object from 'namespace:DEP'
## Error: 'impute' is not an exported object from 'namespace:DEP'
## Error: 'plot_imputation' is not an exported object from 'namespace:DEP'
## Error: 'test_diff' is not an exported object from 'namespace:DEP'
## Error: 'test_diff' is not an exported object from 'namespace:DEP'
## Error: 'add_rejections' is not an exported object from 'namespace:DEP'
## Error: 'plot_cor' is not an exported object from 'namespace:DEP'
## Error: 'plot_heatmap' is not an exported object from 'namespace:DEP'
## Error: 'plot_heatmap' is not an exported object from 'namespace:DEP'
## Error: 'plot_volcano' is not an exported object from 'namespace:DEP'
## Error: 'plot_volcano' is not an exported object from 'namespace:DEP'
## Error: 'plot_cond' is not an exported object from 'namespace:DEP'
## Error: 'get_results' is not an exported object from 'namespace:DEP'
## Error in plot_single(mtb_dep, proteins = c("Rv0287", "Rv0288")): could not find function "plot_single"
## Error in hpgltools::write_xls(data = mtb_result, excel = "excel/dep_result.xlsx"): object 'mtb_result' not found
DEP has a neat function to plot missing values. Sadly, it does not return the actual matrix, only the plot. This is nice and all, but I need the matrix, ergo this minor change.
## Error in assay(se): object 'mtb_se' not found
## Error in summary(def_mtrx): object 'def_mtrx' not found
## Error in rowSums(def_mtrx): object 'def_mtrx' not found
## Error in head(defined_by_protein): object 'defined_by_protein' not found
## Error in colSums(def_mtrx): object 'def_mtrx' not found
## Error in eval(expr, envir, enclos): object 'defined_by_sample' not found
Compare our ‘normal’ openswath output via hpgltools analysis vs. the umpire version. Secondary goal: With and without imputation.
ver <- "20180913"
tric_data <- read.csv(
paste0("results/tric/", ver, "/whole_8mz_tuberculist/comet_HCD.tsv"), sep="\t")
tric_data[["ProteinName"]] <- gsub(pattern="^(.*)_.*$", replacement="\\1",
x=tric_data[["ProteinName"]])
sample_annot <- extract_metadata(paste0("sample_sheets/Mtb_dia_samples_", ver, ".xlsx"))
kept <- ! grepl(x=rownames(sample_annot), pattern="^s\\.\\.")
sample_annot <- sample_annot[kept, ]
devtools::load_all("~/scratch/git/SWATH2stats_myforked")
## Loading SWATH2stats
## Found the same mzXML files in the annotations and data.
## Number of non-decoy peptides: 21557
## Number of decoy peptides: 939
## Decoy rate: 0.0436
## The average FDR by run on assay level is 0.009
## The average FDR by run on peptide level is 0.01
## The average FDR by run on protein level is 0.047
## Target assay FDR: 0.02
## Required overall m-score cutoff: 0.0070795
## achieving assay FDR: 0.0181
## Target protein FDR: 0.02
## Required overall m-score cutoff: 0.00089125
## achieving protein FDR: 0.0182
## Original dimension: 133447, new dimension: 128204, difference: 5243.
## Peptides need to have been quantified in more conditions than: 9.6 in order to pass this percentage-based threshold.
## Fraction of peptides selected: 0.11
## Original dimension: 135427, new dimension: 33028, difference: 102399.
## Target protein FDR: 0.000891250938133746
## Required overall m-score cutoff: 0.01
## achieving protein FDR: 0
## filter_mscore_fdr is filtering the data...
## finding m-score cutoff to achieve desired protein FDR in protein master list..
## finding m-score cutoff to achieve desired global peptide FDR..
## Target peptide FDR: 0.05
## Required overall m-score cutoff: 0.01
## Achieving peptide FDR: 0
## Proteins selected:
## Total proteins selected: 2999
## Final target proteins: 2999
## Final decoy proteins: 0
## Peptides mapping to these protein entries selected:
## Total mapping peptides: 20921
## Final target peptides: 20921
## Final decoy peptides: 0
## Total peptides selected from:
## Total peptides: 20921
## Final target peptides: 20921
## Final decoy peptides: 0
## Individual run FDR quality of the peptides was not calculated
## as not every run contains a decoy.
## The decoys have been removed from the returned data.
## Number of proteins detected: 3016
## Protein identifiers: Rv2524c, Rv3716c, Rv1270c, Rv0724, Rv0161, Rv2535c
## Number of proteins detected that are supported by a proteotypic peptide: 2888
## Number of proteotypic peptides detected: 20772
## Number of proteins detected: 2890
## First 6 protein identifiers: Rv2524c, Rv3716c, Rv1270c, Rv0724, Rv0161, Rv2535c
## Before filtering:
## Number of proteins: 2888
## Number of peptides: 20772
##
## Percentage of peptides removed: 21.87%
##
## After filtering:
## Number of proteins: 2861
## Number of peptides: 16230
## Before filtering:
## Number of proteins: 2861
## Number of peptides: 16230
##
## Percentage of peptides removed: 0.04%
##
## After filtering:
## Number of proteins: 2603
## Number of peptides: 16223
## Protein overview matrix results/swath2stats/20180913/osw_protein_all.csv written to working folder.
## [1] 3873 13
## Protein overview matrix results/swath2stats/20180913/osw_protein_matrix_mscore.csv written to working folder.
## [1] 2999 13
## Peptide overview matrix results/swath2stats/20180913/osw_peptide_matrix_mscore.csv written to working folder.
## [1] 20921 13
## Protein overview matrix results/swath2stats/20180913/osw_protein_matrix_filtered.csv written to working folder.
## [1] 2603 13
## Peptide overview matrix results/swath2stats/20180913/osw_peptide_matrix_filtered.csv written to working folder.
## [1] 93860 13
## The library contains 5 transitions per precursor.
## The data table was transformed into a table containing one row per transition.
## One or several columns required by MSstats were not in the data. The columns were created and filled with NAs.
## Missing columns: productcharge, isotopelabeltype
## isotopelabeltype was filled with light.
prot_mtrx <- protein_matrix_filtered
rownames(prot_mtrx) <- gsub(pattern="^1\\/", replacement="", x=prot_mtrx[["proteinname"]])
prot_mtrx <- prot_mtrx[, -1]
## Important question: Did SWATH2stats reorder my data?
colnames(prot_mtrx) <- gsub(pattern="^(.*)(2018.*)$", replacement="s\\2", x=colnames(prot_mtrx))
reordered <- colnames(prot_mtrx)
metadata <- sample_annot[reordered, ]
osw_expt <- sm(create_expt(metadata,
count_dataframe=prot_mtrx,
gene_info=mtb_annotations))
ver <- "20190327"
enc_metadata <- hpgltools:::read_metadata("sample_sheets/Mtb_dia_samples_encyclopedia_20190327.xlsx")
rownames(enc_metadata) <- paste0("s", enc_metadata[["sampleid"]])
enc_matrix <- read.table("encyclopedia/most_samples_quant_report.elib.proteins.txt", header=TRUE)
enc_pep_matrix <- read.table("encyclopedia/most_samples_quant_report.elib.peptides.txt", header=TRUE)
rownames(enc_matrix) <- enc_matrix[["Protein"]]
enc_matrix <- enc_matrix[, -1]
enc_matrix <- enc_matrix[, -1]
enc_matrix <- enc_matrix[, -1]
colnames(enc_matrix)
## [1] "X2018_0502BrikenDIA01.mzML"
## [2] "X2018_0502BrikenDIA02.mzML"
## [3] "X2018_0502BrikenDIA03.mzML"
## [4] "X2018_0502BrikenDIA04.mzML"
## [5] "X2018_0502BrikenDIA05.mzML"
## [6] "X2018_0502BrikenDIA06.mzML"
## [7] "X2018_0502BrikenDIA07.mzML"
## [8] "X2018_0502BrikenDIA08.mzML"
## [9] "X2018_0502BrikenDIA09.mzML"
## [10] "X2018_0502BrikenDIA10.mzML"
## [11] "X2018_0502BrikenDIA11.mzML"
## [12] "X2018_0502BrikenDIA12.mzML"
## [13] "X2018_0726Briken01.mzML"
## [14] "X2018_0726Briken02.mzML"
## [15] "X2018_0726Briken03.mzML"
## [16] "X2018_0726Briken04.mzML"
## [17] "X2018_0726Briken05.mzML"
## [18] "X2018_0726Briken06.mzML"
## [19] "X2018_0726Briken07.mzML"
## [20] "X2018_0726Briken08.mzML"
## [21] "X2018_0726Briken09.mzML"
## [22] "X2018_0726Briken11.mzML"
## [23] "X2018_0726Briken12.mzML"
## [24] "X2018_0726Briken13.mzML"
## [25] "X2018_0726Briken14.mzML"
## [26] "X2018_0726Briken15.mzML"
## [27] "X2018_0726Briken16.mzML"
## [28] "X2018_0726Briken17.mzML"
## [29] "X2018_0726Briken18.mzML"
## [30] "X2018_0726Briken19.mzML"
## [31] "X2018_0817BrikenTrypsinDIA01.mzML"
## [32] "X2018_0817BrikenTrypsinDIA02.mzML"
## [33] "X2018_0817BrikenTrypsinDIA03.mzML"
## [34] "X2018_0817BrikenTrypsinDIA04.mzML"
## [35] "X2018_0817BrikenTrypsinDIA05.mzML"
## [36] "X2018_0817BrikenTrypsinDIA06.mzML"
## [37] "X2018_0817BrikenTrypsinDIA07.mzML"
## [38] "X2018_0817BrikenTrypsinDIA08.mzML"
## [39] "X2018_0817BrikenTrypsinDIA09.mzML"
## [40] "X2018_0817BrikenTrypsinDIA11.mzML"
## [41] "X2018_0817BrikenTrypsinDIA12.mzML"
## [42] "X2018_0817BrikenTrypsinDIA13.mzML"
## [43] "X2018_0817BrikenTrypsinDIA14.mzML"
## [44] "X2018_0817BrikenTrypsinDIA15.mzML"
## [45] "X2018_0817BrikenTrypsinDIA16.mzML"
## [46] "X2018_0817BrikenTrypsinDIA17.mzML"
## [47] "X2018_0817BrikenTrypsinDIA18.mzML"
## [48] "X2018_0817BrikenTrypsinDIA19.mzML"
colnames(enc_matrix) <- gsub(pattern="X", replacement="s", x=colnames(enc_matrix))
colnames(enc_matrix) <- gsub(pattern="\\.mzML", replacement="", x=colnames(enc_matrix))
colnames(enc_matrix) <- gsub(pattern="^X", replacement="s", x=colnames(enc_matrix))
na_idx <- is.na(enc_matrix)
enc_matrix[na_idx] <- 0
enc_expt <- create_expt(metadata=enc_metadata, count_dataframe=enc_matrix, gene_info=NULL)
## Reading the sample metadata.
## The sample definitions comprises: 48 rows(samples) and 28 columns(metadata fields).
## Matched 2632 annotations and counts.
## Bringing together the count matrix and gene information.
## The final expressionset has 2632 rows and 48 columns.
For the first and simplest comparison, I will take the median by condition for these three data sets and see how they compare. Then I will subset the data into whole vs. filtered and do the logFC comparisons and compare again. Finally I will repeat these processes with my version of the imputation provided by DEP.
## The factor delta_filtrate has 3 rows.
## The factor delta_whole has 3 rows.
## The factor wt_filtrate has 3 rows.
## The factor wt_whole has 3 rows.
## The factor delta_filtrate has 9 rows.
## The factor comp_filtrate has 10 rows.
## The factor delta_whole has 8 rows.
## The factor comp_whole has 9 rows.
## The factor wt_filtrate has 6 rows.
## The factor wt_whole has 6 rows.
## Error in pData(data): object 'ump_expt' not found
## Error in merge(all, ump_medians, by = "row.names"): object 'ump_medians' not found
## Error in `[.data.frame`(all, , c("delta_filtrate", "delta_filtrate.x", : undefined columns selected
## Error in cor.test(test_df[[1]], test_df[[2]], method = "spearman"): object 'test_df' not found
## Error in cor.test(test_df[[1]], test_df[[3]], method = "spearman"): object 'test_df' not found
## Error in `[.data.frame`(all, , c("delta_filtrate", "delta_filtrate.y")): undefined columns selected
## Error in cor.test(test_df[[1]], test_df[[2]]): object 'test_df' not found
## Error in normalize_expt(ump_expt, filter = TRUE): object 'ump_expt' not found
## Error in normalize_expt(input, filter = TRUE, batch = FALSE, transform = "log2", : object 'ump_norm' not found
## Error in combine_de_tables(ump_de, keepers = keepers, excel = paste0("excel/diaumpire_tables-v", : object 'ump_de' not found
## Found 11011 zeros in the data.
## The data has not been filtered.
## Filtering the data, turn on force to stop this.
## This function will replace the expt$expressionset slot with:
## pofa(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: pofa
## Removing 471 low-count genes (2132 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
## Error in as(exprs_set, "MSnSet"): no method or default for coercing "ExpressionSet" to "MSnSet"
## Found 46806 zeros in the data.
## The data has not been filtered.
## Filtering the data, turn on force to stop this.
## This function will replace the expt$expressionset slot with:
## pofa(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: pofa
## Removing 868 low-count genes (1764 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
## Error in as(exprs_set, "MSnSet"): no method or default for coercing "ExpressionSet" to "MSnSet"
## Error in exprs(expt): object 'ump_expt' not found
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 70c617a152947724e6b6f07de9c37021b60133ec
## This is hpgltools commit: Mon Jun 3 11:41:31 2019 -0400: 70c617a152947724e6b6f07de9c37021b60133ec
## Saving to dia_umpire_20190308-v20190327.rda.xz
R version 3.6.0 (2019-04-26)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_US.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_US.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_US.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.UTF-8 and LC_IDENTIFICATION=C
attached base packages: stats4, parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: SWATH2stats(v.1.13.5), SummarizedExperiment(v.1.14.0), DelayedArray(v.0.10.0), BiocParallel(v.1.18.0), matrixStats(v.0.54.0), GenomicRanges(v.1.36.0), GenomeInfoDb(v.1.20.0), IRanges(v.2.18.1), S4Vectors(v.0.22.0), testthat(v.2.1.1), hpgltools(v.1.0), Biobase(v.2.44.0) and BiocGenerics(v.0.30.0)
loaded via a namespace (and not attached): backports(v.1.1.4), fastmatch(v.1.1-0), selectr(v.0.4-1), plyr(v.1.8.4), igraph(v.1.2.4.1), lazyeval(v.0.2.2), splines(v.3.6.0), usethis(v.1.5.0), ggplot2(v.3.2.0), urltools(v.1.7.3), sva(v.3.32.1), digest(v.0.6.19), foreach(v.1.4.4), htmltools(v.0.3.6), GOSemSim(v.2.10.0), viridis(v.0.5.1), GO.db(v.3.8.2), gdata(v.2.18.0), magrittr(v.1.5), memoise(v.1.1.0), cluster(v.2.1.0), doParallel(v.1.0.14), openxlsx(v.4.1.0.1), limma(v.3.40.2), remotes(v.2.0.4), readr(v.1.3.1), Biostrings(v.2.52.0), annotate(v.1.62.0), enrichplot(v.1.4.0), prettyunits(v.1.0.2), colorspace(v.1.4-1), rvest(v.0.3.4), blob(v.1.1.1), ggrepel(v.0.8.1), xfun(v.0.7), dplyr(v.0.8.1), callr(v.3.2.0), crayon(v.1.3.4), RCurl(v.1.95-4.12), jsonlite(v.1.6), genefilter(v.1.66.0), lme4(v.1.1-21), survival(v.2.44-1.1), iterators(v.1.0.10), glue(v.1.3.1), polyclip(v.1.10-0), gtable(v.0.3.0), zlibbioc(v.1.30.0), XVector(v.0.24.0), UpSetR(v.1.4.0), pkgbuild(v.1.0.3), scales(v.1.0.0), DOSE(v.3.10.1), DBI(v.1.0.0), Rcpp(v.1.0.1), viridisLite(v.0.3.0), xtable(v.1.8-4), progress(v.1.2.2), gridGraphics(v.0.4-1), bit(v.1.1-14), europepmc(v.0.3), httr(v.1.4.0), fgsea(v.1.10.0), gplots(v.3.0.1.1), RColorBrewer(v.1.1-2), pkgconfig(v.2.0.2), XML(v.3.98-1.20), farver(v.1.1.0), later(v.0.8.0), labeling(v.0.3), ggplotify(v.0.0.3), tidyselect(v.0.2.5), rlang(v.0.3.4), reshape2(v.1.4.3), AnnotationDbi(v.1.46.0), munsell(v.0.5.0), tools(v.3.6.0), cli(v.1.1.0), RSQLite(v.2.1.1), ggridges(v.0.5.1), devtools(v.2.0.2), evaluate(v.0.14), stringr(v.1.4.0), yaml(v.2.2.0), processx(v.3.3.1), knitr(v.1.23), bit64(v.0.9-7), fs(v.1.3.1), pander(v.0.6.3), zip(v.2.0.2), caTools(v.1.17.1.2), purrr(v.0.3.2), ggraph(v.1.0.2), packrat(v.0.5.0), nlme(v.3.1-140), mime(v.0.7), xml2(v.1.2.0), DO.db(v.2.9), biomaRt(v.2.40.0), compiler(v.3.6.0), pbkrtest(v.0.4-7), rstudioapi(v.0.10), curl(v.3.3), variancePartition(v.1.14.0), tibble(v.2.1.3), tweenr(v.1.0.1), stringi(v.1.4.3), ps(v.1.3.0), GenomicFeatures(v.1.36.1), desc(v.1.2.0), lattice(v.0.20-38), Matrix(v.1.2-17), DEP(v.1.5.3), nloptr(v.1.2.1), pillar(v.1.4.1), triebeard(v.0.3.0), data.table(v.1.12.2), cowplot(v.0.9.4), bitops(v.1.0-6), httpuv(v.1.5.1), rtracklayer(v.1.44.0), qvalue(v.2.16.0), colorRamps(v.2.3), R6(v.2.4.0), promises(v.1.0.1), KernSmooth(v.2.23-15), gridExtra(v.2.3), sessioninfo(v.1.1.1), codetools(v.0.2-16), boot(v.1.3-22), MASS(v.7.3-51.4), gtools(v.3.8.1), assertthat(v.0.2.1), pkgload(v.1.0.2), rprojroot(v.1.3-2), withr(v.2.1.2), GenomicAlignments(v.1.20.1), Rsamtools(v.2.0.0), GenomeInfoDbData(v.1.2.1), mgcv(v.1.8-28), hms(v.0.4.2), clusterProfiler(v.3.12.0), grid(v.3.6.0), tidyr(v.0.8.3), minqa(v.1.2.4), rvcheck(v.0.1.3), rmarkdown(v.1.13), ggforce(v.0.2.2), shiny(v.1.3.2) and base64enc(v.0.1-3)