Let us see what we can learn about our mystery samples!
esmer <- create_expt("sample_sheets/all_samples.xlsx", file_column="esmerfile")
## Reading the sample metadata.
## The sample definitions comprises: 7 rows(samples) and 7 columns(metadata fields).
## Reading count tables.
## Reading salmon data with tximport.
## Finished reading count tables.
## Matched 10473 annotations and counts.
## Bringing together the count matrix and gene information.
nonesmer <- create_expt("sample_sheets/all_samples.xlsx", file_column="nonesmerfile")
## Reading the sample metadata.
## The sample definitions comprises: 7 rows(samples) and 7 columns(metadata fields).
## Reading count tables.
## Reading salmon data with tximport.
## Finished reading count tables.
## Matched 10958 annotations and counts.
## Bringing together the count matrix and gene information.
unassigned <- create_expt("sample_sheets/all_samples.xlsx", file_column="unassignedfile")
## Reading the sample metadata.
## The sample definitions comprises: 7 rows(samples) and 7 columns(metadata fields).
## Reading count tables.
## Reading salmon data with tximport.
## Finished reading count tables.
## Matched 2507 annotations and counts.
## Bringing together the count matrix and gene information.
all <- create_expt("sample_sheets/all_samples.xlsx", file_column="allhisatfile")
## Reading the sample metadata.
## The sample definitions comprises: 7 rows(samples) and 7 columns(metadata fields).
## Reading count tables.
## Reading count tables with read.table().
## /cbcb/nelsayed-scratch/atb/rnaseq/tcruzi_2019/preprocessing/hpgl1168/outputs/hisat2_tcruzi_all/forward.count.xz contains 25114 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/tcruzi_2019/preprocessing/hpgl1169/outputs/hisat2_tcruzi_all/forward.count.xz contains 25114 rows and merges to 25114 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/tcruzi_2019/preprocessing/hpgl1170/outputs/hisat2_tcruzi_all/forward.count.xz contains 25114 rows and merges to 25114 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/tcruzi_2019/preprocessing/hpgl1171/outputs/hisat2_tcruzi_all/forward.count.xz contains 25114 rows and merges to 25114 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/tcruzi_2019/preprocessing/hpgl1172/outputs/hisat2_tcruzi_all/forward.count.xz contains 25114 rows and merges to 25114 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/tcruzi_2019/preprocessing/hpgl1173/outputs/hisat2_tcruzi_all/forward.count.xz contains 25114 rows and merges to 25114 rows.
## /cbcb/nelsayed-scratch/atb/rnaseq/tcruzi_2019/preprocessing/hpgl1174/outputs/hisat2_tcruzi_all/forward.count.xz contains 25114 rows and merges to 25114 rows.
## Finished reading count tables.
## Matched 25109 annotations and counts.
## Bringing together the count matrix and gene information.
esmer_write <- write_expt(esmer, excel=glue::glue("excel/esmer-v{ver}.xlsx"))
## Writing the first sheet, containing a legend and some summary data.
## Writing the raw reads.
## Graphing the raw reads.
## Writing the normalized reads.
## Line 2336 of expt.r, removed normalization of pca plotting.
## Graphing the normalized reads.
## Writing the median reads by factor.
## The factor wt has 3 rows.
## The factor unknown has 4 rows.
## Note: zip::zip() is deprecated, please use zip::zipr() instead
esmer_norm <- normalize_expt(esmer, norm="quant", transform="log2", convert="cpm", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 493 low-count genes (9980 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 707 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
esmer_pca <- plot_pca(esmer_norm)
esmer_pca$plot
non_norm <- normalize_expt(nonesmer, norm="quant", transform="log2", convert="cpm", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 526 low-count genes (10432 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 563 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
non_pca <- plot_pca(non_norm)
non_pca$plot
unas_norm <- normalize_expt(unassigned, norm="quant", transform="log2", convert="cpm", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (2507 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 2669 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
unas_pca <- plot_pca(non_norm)
unas_pca$plot
quick <- all_pairwise(esmer, model_batch=FALSE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 493 low-count genes (9980 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 707 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
## Plotting a PCA before surrogates/batch inclusion.
## Assuming no batch in model for testing pca.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
quick_tables <- combine_de_tables(quick)
## Writing a legend of columns.
## Working on table 1/1: wt_vs_unknown
## 20181210 a pthread error in normalize.quantiles leads me to robust.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
test_table <- quick_tables[["data"]][[1]]
test_idx <- order(test_table[["deseq_logfc"]])
test_table <- test_table[test_idx, ]
all_norm <- normalize_expt(all, norm="quant", transform="log2", convert="cpm", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 5600 low-count genes (19509 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 1163 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
all_pca <- plot_pca(all_norm)
all_pca$plot
all_filt <- normalize_expt(all, filter=TRUE)
## This function will replace the expt$expressionset slot with:
## cbcb(data)
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 5600 low-count genes (19509 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
all_de <- all_pairwise(all_filt, model_batch=FALSE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (19509 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 1163 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
## Plotting a PCA before surrogates/batch inclusion.
## Assuming no batch in model for testing pca.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
all_tables <- combine_de_tables(all_de, excel="excel/all_tables.xlsx")
## Deleting the file excel/all_tables.xlsx before writing the tables.
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on table 1/1: wt_vs_unknown
## 20181210 a pthread error in normalize.quantiles leads me to robust.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Adding venn plots for wt_vs_unknown.
## Limma expression coefficients for wt_vs_unknown; R^2: 0.954; equation: y = 0.969x + 0.155
## Edger expression coefficients for wt_vs_unknown; R^2: 0.955; equation: y = 0.951x + 0.309
## DESeq2 expression coefficients for wt_vs_unknown; R^2: 0.955; equation: y = 0.951x + 0.301
## Writing summary information.
## Attempting to add the comparison plot to pairwise_summary at row: 23 and column: 1
## Performing save of the workbook.
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
message(paste0("Saving to ", savefile))
tmp <- sm(saveme(filename=savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 801f62ba31ba525c6db9453255c6262fb1162d5c
## This is hpgltools commit: Thu Feb 21 14:03:11 2019 -0500: 801f62ba31ba525c6db9453255c6262fb1162d5c
## Saving to 01_annotation_v20190320.rda.xz