hs_biopsy <- subset_expt(hs_expt, subset="experimentname=='biopsy'")
hs_biopsy <- set_expt_colors(expt=hs_biopsy,
colors=c("darkblue", "darkred", "lightblue", "red"))
##biopsy_expt$colors <- c("lightblue","lightblue","darkblue","darkblue","red","darkred")
hs_biopsy$batches
## HPGL0644 HPGL0645 HPGL0646 HPGL0647 HPGL0648 HPGL0649
## "d1016" "d1016" "d1021" "d1021" "d2040" "d2040"
hs_biopsy_metrics <- graph_metrics(hs_biopsy)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, adding 1 to the data.
## Changed 170832 zero count features.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, setting them to 0.5.
## Changed 170832 zero count features.
## Plotting the representation of the top-n genes.
## Printing a color to condition legend.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
hs_biopsy_norm <- normalize_expt(hs_biopsy, transform="log2", filter=TRUE, batch="fsva")
## This function will replace the expt$expressionset slot with:
## log2(fsva(hpgl(data)))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: hpgl
## Removing 36873 low-count genes (14168 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 146 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with fsva.
## Note to self: If you get an error like 'x contains missing values'; I think this
## means that the data has too many 0's and needs to have a better low-count filter applied.
## batch_counts: Before batch correction, 146 entries are >= 0.
## batch_counts: Using sva::fsva for batch correction.
## The number of elements which are < 0 after batch correction is: 45
## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE
hs_biopsynorm_metrics <- graph_metrics(hs_biopsy_norm)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## Plotting the representation of the top-n genes.
## Printing a color to condition legend.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
info <- pca_information(hs_biopsy_norm,
expt_factors=c("condition","batch","time","rnaqcpassed","rnangul"))
## More shallow curves in these plots suggest more genes in this principle component.
## The u and v components of SVD have only 4 columns, but the list of factors is 5 long.
## Therefore, only searching for 4 PCs.
We set up some raw and normalized metrics above, now lets print them.
hs_biopsy_metrics$libsize
hs_biopsy_metrics$pcaplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
hs_biopsy_metrics$corheat
hs_biopsy_metrics$disheat
hs_biopsynorm_metrics$pcaplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
hs_biopsynorm_metrics$tsneplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
hs_biopsynorm_metrics$corheat
hs_biopsynorm_metrics$disheat
info$pca_cor
## PC1 PC2 PC3 PC4
## condition 0.47749 -0.01659 0.75286 -0.45269
## batch 0.47125 -0.71043 0.14568 -0.05035
## time 0.06693 -0.36733 0.80537 -0.46040
## rnaqcpassed 0.27960 0.95764 -0.01728 -0.06678
## rnangul -0.58879 0.59724 -0.05961 0.23292
info$anova_fstat_heatmap
Then see if anything jumps out that makes sense.
hs_biopsy_prepost <- set_expt_conditions(expt=hs_biopsy, fact="time")
hs_biopsy_prepost_norm <- normalize_expt(hs_biopsy_prepost, transform="log2", filter=TRUE, batch="fsva")
## This function will replace the expt$expressionset slot with:
## log2(fsva(hpgl(data)))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: hpgl
## Removing 36873 low-count genes (14168 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 146 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with fsva.
## Note to self: If you get an error like 'x contains missing values'; I think this
## means that the data has too many 0's and needs to have a better low-count filter applied.
## batch_counts: Before batch correction, 146 entries are >= 0.
## batch_counts: Using sva::fsva for batch correction.
## The number of elements which are < 0 after batch correction is: 64
## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE
hs_biopsy_prepost_metrics <- graph_metrics(hs_biopsy_prepost_norm)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## Plotting the representation of the top-n genes.
## Printing a color to condition legend.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
hs_biopsy_prepost_metrics$pcaplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
hs_biopsy_prepost_norm <- normalize_expt(hs_biopsy_prepost_norm, norm="quant", conver="cpm")
## This function will replace the expt$expressionset slot with:
## cpm(quant(data))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
hs_biopsy_prepost_metrics <- graph_metrics(hs_biopsy_prepost_norm)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## Plotting the representation of the top-n genes.
## Printing a color to condition legend.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
hs_biopsy_prepost_metrics$tsneplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
hs_biopsy_prepost <- set_expt_conditions(expt=hs_biopsy, fact="state")
hs_biopsy_prepost_norm <- normalize_expt(hs_biopsy_prepost, transform="log2", filter=TRUE, batch="fsva")
## This function will replace the expt$expressionset slot with:
## log2(fsva(hpgl(data)))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: hpgl
## Removing 36873 low-count genes (14168 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 146 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with fsva.
## Note to self: If you get an error like 'x contains missing values'; I think this
## means that the data has too many 0's and needs to have a better low-count filter applied.
## batch_counts: Before batch correction, 146 entries are >= 0.
## batch_counts: Using sva::fsva for batch correction.
## The number of elements which are < 0 after batch correction is: 54
## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE
hs_biopsy_prepost_metrics <- graph_metrics(hs_biopsy_prepost_norm)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## Plotting the representation of the top-n genes.
## Printing a color to condition legend.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
hs_biopsy_prepost_metrics$pcaplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
hs_biopsy_prepost_norm <- normalize_expt(hs_biopsy_prepost_norm, norm="quant", conver="cpm")
## This function will replace the expt$expressionset slot with:
## cpm(quant(data))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
hs_biopsy_prepost_metrics <- graph_metrics(hs_biopsy_prepost_norm)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## Plotting the representation of the top-n genes.
## Printing a color to condition legend.
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
hs_biopsy_prepost_metrics$pcaplot
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 2a0661d6e37f8a3d8831eb3bbd6347c0d9c4b3b7
## R> packrat::restore()
## This is hpgltools commit: Thu Mar 29 16:59:07 2018 -0400: 2a0661d6e37f8a3d8831eb3bbd6347c0d9c4b3b7
## Saving to 02_estimation_biopsy-v20171110.rda.xz