Lets see if we can get anything out of the data?
Can I extract the intensities and do something with them?
## Take the outputs from tric (feature_alignment.py)
cid_intensities <- read.csv(file="results/tric/CID_outmatrix.tsv", sep="\t")
hcd_intensities <- read.csv(file="results/tric/HCD_outmatrix.tsv", sep="\t")
## Pull the protein names
cid_intensities[["rownames"]] <- cid_intensities[["Protein"]]
hcd_intensities[["rownames"]] <- hcd_intensities[["Protein"]]
## Simplify them
cid_intensities[["rownames"]] <- gsub(pattern="^[[:digit:]]+\\/",
replacement="",
x=cid_intensities[["rownames"]])
hcd_intensities[["rownames"]] <- gsub(pattern="^[[:digit:]]+\\/",
replacement="",
x=hcd_intensities[["rownames"]])
## make suitable rownames
rownames(cid_intensities) <- make.names(cid_intensities[["rownames"]], unique=TRUE)
rownames(hcd_intensities) <- make.names(hcd_intensities[["rownames"]], unique=TRUE)
## Simplify the column names because they are way too long
shorter_colnames <- colnames(cid_intensities)
shorter_colnames <- gsub(pattern="^(.*)_vs.*$", replacement="\\1", x=shorter_colnames)
colnames(cid_intensities) <- paste0("cid_", shorter_colnames)
colnames(hcd_intensities) <- paste0("hcd_", shorter_colnames)
## Make a data table from them.
cid <- data.table::as.data.table(cid_intensities)
cid[["cid_rownames"]] <- make.names(cid[["cid_rownames"]], unique=TRUE)
hcd <- data.table::as.data.table(hcd_intensities)
hcd[["hcd_rownames"]] <- make.names(hcd[["hcd_rownames"]], unique=TRUE)
## Set the NAs to 0
nas <- is.na(cid)
cid[nas] <- 0
nas <- is.na(hcd)
hcd[nas] <- 0
## Set up a decoy column
cid[["decoy"]] <- 0
hcd[["decoy"]] <- 0
decoys <- grepl(pattern="^DECOY_", x=cid[["cid_Protein"]])
cid[decoys, "decoy"] <- 1
decoys <- grepl(pattern="^DECOY_", x=hcd[["hcd_Protein"]])
hcd[decoys, "decoy"] <- 1
## Make some medians for the columns of interest
intensity_cols <- grep(pattern="Intensity", x=colnames(cid))
intense <- cid[, intensity_cols, with=FALSE]
cid[["median_intense"]] <- matrixStats::rowMedians(as.matrix(intense))
intensity_cols <- grep(pattern="Intensity", x=colnames(hcd))
intense <- hcd[, intensity_cols, with=FALSE]
hcd[["median_intense"]] <- matrixStats::rowMedians(as.matrix(intense))
## Repeat for RT
rt_cols <- grep(pattern="_RT_", x=colnames(cid))
rt <- cid[, rt_cols, with=FALSE]
cid[["median_rt"]] <- matrixStats::rowMedians(as.matrix(rt))
rt_cols <- grep(pattern="_RT_", x=colnames(hcd))
rt <- hcd[, rt_cols, with=FALSE]
hcd[["median_rt"]] <- matrixStats::rowMedians(as.matrix(rt))
## And score
score_cols <- grep(pattern="_score_", x=colnames(cid))
score <- cid[, score_cols, with=FALSE]
cid[["median_score"]] <- matrixStats::rowMedians(as.matrix(score))
score_cols <- grep(pattern="_RT_", x=colnames(hcd))
score <- hcd[, score_cols, with=FALSE]
hcd[["median_score"]] <- matrixStats::rowMedians(as.matrix(score))
## Now plot them.
cid_scores <- cid[, c("median_score", "median_rt", "median_intense", "decoy")]
melted <- reshape2::melt(cid_scores)
## No id variables; using all as measure variables
library(ggplot2)
cid[["decoy"]] <- as.factor(cid[["decoy"]])
cid_score_dist <- ggplot(data=cid, aes_string(x="median_score", fill="decoy")) +
geom_density(aes_string(x="median_score", y="..count..", fill="decoy")) +
scale_x_continuous(trans=scales::log2_trans()) +
scale_fill_manual(values=c("0"="darkblue", "1"="darkred"))
cid_score_dist
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 2734 rows containing non-finite values (stat_density).
hcd[["decoy"]] <- as.factor(hcd[["decoy"]])
hcd_score_dist <- ggplot(data=hcd, aes_string(x="median_score", fill="decoy")) +
geom_density(aes_string(x="median_score", y="..count..", fill="decoy")) +
scale_x_continuous(trans=scales::log2_trans()) +
scale_fill_manual(values=c("0"="darkblue", "1"="darkred"))
hcd_score_dist
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 1863 rows containing non-finite values (stat_density).
library(dplyr)
decoy_idx <- cid[, "decoy"] == 1
decoys <- as.data.frame(cid)[decoy_idx, ]
nodecoys <- as.data.frame(cid)[!decoy_idx, ]
decoy_xint <- mean(decoys[, "median_intense"], na.rm=TRUE)
nodecoy_xint <- mean(nodecoys[, "median_intense"], na.rm=TRUE)
cid_intense_dist <- ggplot(data=cid, aes_string(x="median_intense", fill="decoy")) +
geom_density(aes_string(x="median_intense", y="..count..", fill="decoy")) +
scale_x_continuous(trans=scales::log2_trans()) +
geom_vline(xintercept=decoy_xint, color="darkred", linetype="dashed", size=1) +
geom_vline(xintercept=nodecoy_xint, color="darkblue", linetype="dashed", size=1) +
scale_fill_manual(values=c("0"="darkblue", "1"="darkred"))
cid_intense_dist
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 2734 rows containing non-finite values (stat_density).
decoy_idx <- hcd[, "decoy"] == 1
decoys <- as.data.frame(hcd)[decoy_idx, ]
nodecoys <- as.data.frame(hcd)[!decoy_idx, ]
decoy_xint <- mean(decoys[, "median_intense"], na.rm=TRUE)
nodecoy_xint <- mean(nodecoys[, "median_intense"], na.rm=TRUE)
hcd_intense_dist <- ggplot(data=hcd, aes_string(x="median_intense", fill="decoy")) +
geom_density(aes_string(x="median_intense", y="..count..", fill="decoy")) +
geom_vline(xintercept=decoy_xint, color="darkred", linetype="dashed", size=1) +
geom_vline(xintercept=nodecoy_xint, color="darkblue", linetype="dashed", size=1) +
scale_x_continuous(trans=scales::log2_trans()) +
scale_fill_manual(values=c("0"="darkblue", "1"="darkred"))
hcd_intense_dist
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 3147 rows containing non-finite values (stat_density).
decoy_idx <- cid[, "decoy"] == 1
decoys <- as.data.frame(cid)[decoy_idx, ]
nodecoys <- as.data.frame(cid)[!decoy_idx, ]
decoy_xint <- mean(decoys[, "median_rt"], na.rm=TRUE)
nodecoy_xint <- mean(nodecoys[, "median_rt"], na.rm=TRUE)
cid_rt_dist <- ggplot(data=cid, aes_string(x="median_rt", fill="decoy")) +
geom_density(aes_string(x="median_rt", y="..count..", fill="decoy")) +
scale_x_continuous(trans=scales::log2_trans()) +
geom_vline(xintercept=decoy_xint, color="darkred", linetype="dashed", size=1) +
geom_vline(xintercept=nodecoy_xint, color="darkblue", linetype="dashed", size=1) +
scale_fill_manual(values=c("0"="darkblue", "1"="darkred"))
cid_rt_dist
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 1448 rows containing non-finite values (stat_density).
decoy_idx <- hcd[, "decoy"] == 1
decoys <- as.data.frame(hcd)[decoy_idx, ]
nodecoys <- as.data.frame(hcd)[!decoy_idx, ]
decoy_xint <- mean(decoys[, "median_rt"], na.rm=TRUE)
nodecoy_xint <- mean(nodecoys[, "median_rt"], na.rm=TRUE)
hcd_rt_dist <- ggplot(data=hcd, aes_string(x="median_rt", fill="decoy")) +
geom_density(aes_string(x="median_rt", y="..count..", fill="decoy")) +
scale_x_continuous(trans=scales::log2_trans()) +
geom_vline(xintercept=decoy_xint, color="darkred", linetype="dashed", size=1) +
geom_vline(xintercept=nodecoy_xint, color="darkblue", linetype="dashed", size=1) +
scale_fill_manual(values=c("0"="darkblue", "1"="darkred"))
hcd_rt_dist
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 1863 rows containing non-finite values (stat_density).