hs_annot <- load_biomart_annotations()$annotation
## The biomart annotations file already exists, loading from it.
rownames(hs_annot) <- make.names(
paste0(hs_annot[["ensembl_transcript_id"]], ".",
hs_annot[["transcript_version"]]),
unique=TRUE)
hs_tx_gene <- hs_annot[, c("ensembl_gene_id", "ensembl_transcript_id")]
hs_tx_gene[["id"]] <- rownames(hs_tx_gene)
hs_tx_gene <- hs_tx_gene[, c("id", "ensembl_gene_id")]
new_hs_annot <- hs_annot
rownames(new_hs_annot) <- make.names(hs_annot[["ensembl_gene_id"]], unique=TRUE)
lots <- create_expt("sample_sheets/many_samples.xlsx",
gene_info=new_hs_annot,
tx_gene_map=hs_tx_gene)
## Reading the sample metadata.
## The sample definitions comprises: 285 rows(samples) and 31 columns(metadata fields).
## Reading count tables.
## Using the transcript to gene mapping.
## Reading salmon data with tximport.
## Finished reading count tables.
## Matched 19629 annotations and counts.
## Bringing together the count matrix and gene information.
## The mapped IDs are not the rownames of your gene information, changing them now.
## Some annotations were lost in merging, setting them to 'undefined'.
What is the ‘signature’ of an infected macrophage? ** If one had to pick 10-20 marker genes which characterize an infected macrophage (up or down compared to uninfected), what would they be? ** If given a new sample that is unknown, what can we use to tell if it is infected or uninfected?
Can we make ‘better signatures’? ** Presumably signatures which define uninfected vs. infected.
initial <- plot_libsize(lots)
initial$plot
lots_met <- graph_metrics(lots)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, adding 1 to the data.
## Changed 1284413 zero count features.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, setting them to 0.5.
## Changed 1284413 zero count features.
## Plotting a CV plot.
## Naively calculating coefficient of variation/dispersion with respect to condition.
## Finished calculating dispersion estimates.
## Plotting the representation of the top-n genes.
## Plotting the expression of the top-n PC loaded genes.
## Printing a color to condition legend.
lots_norm <- normalize_expt(lots, norm="quant", filter=TRUE, convert="cpm",
transform="log2")
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(hpgl(data))))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: hpgl
## Removing 3762 low-count genes (15867 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 11575 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
norm_met <- graph_metrics(lots_norm)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## Plotting a CV plot.
## Naively calculating coefficient of variation/dispersion with respect to condition.
## Finished calculating dispersion estimates.
## Plotting the representation of the top-n genes.
## Plotting the expression of the top-n PC loaded genes.
## Printing a color to condition legend.
test_factors <- pca_information(lots_norm, num_components=4, plot_pcas=TRUE)
## More shallow curves in these plots suggest more genes in this principle component.
lots_pca <- plot_pca(lots_norm, plot_label=FALSE)
## Not putting labels on the plot.
lots_pca$plot
no_biopsy <- subset_expt(lots, subset="celltype!='skin'")
## There were 266, now there are 224 samples.
no_biopsy <- set_expt_conditions(no_biopsy, fact="infectstate")
macrophages <- subset_expt(lots, subset="celltype=='macrophage'")
## There were 266, now there are 203 samples.
uninf <- subset_expt(macrophages, subset="state=='uninfected'")
## There were 203, now there are 74 samples.
uninf_norm <- normalize_expt(uninf, convert="cpm", norm="quant", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## cpm(quant(hpgl(data)))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: hpgl
## Removing 6428 low-count genes (13201 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
test_uninf <- simple_xcell(uninf_norm, column="cds_length")
## The biomart annotations file already exists, loading from it.
## xCell strongly perfers rpkm values, re-normalizing now.
## This function will replace the expt$expressionset slot with:
## rpkm(data)
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: converting the data with rpkm.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
## Loading required namespace: xCell
test_uninf$heatmap
macrophages_norm <- normalize_expt(macrophages, convert="rpkm", column="cds_length", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## rpkm(hpgl(data))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: hpgl
## Removing 5595 low-count genes (14034 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with rpkm.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
na_idx <- is.na(exprs(macrophages_norm))
macrophages_norm[na_idx] <- 0
mtrx <- exprs(macrophages_norm)
mtrx[na_idx] <- 0
Biobase::exprs(macrophages_norm$expressionset) <- mtrx
test_macrophages <- simple_xcell(macrophages, column="cds_length", filter=TRUE, label_size=0.3, width=15)
## The biomart annotations file already exists, loading from it.
## xCell strongly perfers rpkm values, re-normalizing now.
## This function will replace the expt$expressionset slot with:
## rpkm(hpgl(data))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: hpgl
## Removing 5595 low-count genes (14034 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with rpkm.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
pp(file="images/macrophage_xcell_heatmap.pdf", image=test_macrophages$heatmap)
## Writing the image to: images/macrophage_xcell_heatmap.pdf and calling dev.off().
macr_infuninf <- set_expt_conditions(macrophages, fact="infectstate")
macr_norm <- normalize_expt(macr_infuninf, filter=TRUE, transform="log2",
convert="cpm", batch="svaseq")
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(hpgl(data))))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: hpgl
## Removing 5595 low-count genes (14034 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 193530 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## batch_counts: Before batch correction, 373431 entries 0<=x<1.
## batch_counts: Before batch correction, 193530 entries are >= 0.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 1 entries are 0<=x<1.
## batch_counts: Before batch/surrogate estimation, 193530 entries are x<=0.
## The be method chose 19 surrogate variable(s).
## Attempting svaseq estimation with 19 surrogates.
## The number of elements which are < 0 after batch correction is: 37156
## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE
macr_pca <- plot_pca(macr_norm, plot_label=FALSE)
## Not putting labels on the plot.
threed <- make_3d_pca(macr_pca)
threed$plot
pp(file="images/simple_macrophages.png", image=macr_pca$plot)
## Writing the image to: images/simple_macrophages.png and calling dev.off().
macr_tsne <- plot_tsne(macr_norm, plot_label=FALSE, iterations=5000)
## Not putting labels on the plot.
macr_tsne$plot
no_norm <- normalize_expt(no_biopsy, filter=TRUE, transform="log2",
convert="cpm", batch="svaseq")
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(hpgl(data))))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: hpgl
## Removing 5272 low-count genes (14357 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 234757 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## batch_counts: Before batch correction, 438636 entries 0<=x<1.
## batch_counts: Before batch correction, 234757 entries are >= 0.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 1 entries are 0<=x<1.
## batch_counts: Before batch/surrogate estimation, 234757 entries are x<=0.
## The be method chose 20 surrogate variable(s).
## Attempting svaseq estimation with 20 surrogates.
## The number of elements which are < 0 after batch correction is: 45136
## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE
no_pca <- plot_pca(no_norm, plot_label=FALSE, x_pc=1, y_pc=2, cis=FALSE)
## Not putting labels on the plot.
no_pca$plot
ggplt <- ggplotly(no_pca$plot)
## Error in ggplotly(no_pca$plot): could not find function "ggplotly"
widget <- htmlwidgets::saveWidget(as_widget(ggplt), "no_biopsy_svaseq_pca.html")
## Error in as_widget(ggplt): could not find function "as_widget"
## Get the top-1000 genes from cpm normalized data.
## This is not actually the top-1000 genes, it removes samples with coverage lower than x.
high_cov <- subset_expt(no_biopsy, coverage=10000000)
## There were 224, now there are 212 samples.
topsva <- normalize_expt(high_cov, filter=TRUE, transform="log2", batch="svaseq", num_surrogates=4)
## This function will replace the expt$expressionset slot with:
## log2(svaseq(hpgl(data)))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: hpgl
## Removing 5543 low-count genes (14086 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 168888 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## batch_counts: Before batch correction, 4813 entries 0<=x<1.
## batch_counts: Before batch correction, 168888 entries are >= 0.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 168888 entries are x<=0.
## The be method chose 20 surrogate variable(s).
## Attempting svaseq estimation with 20 surrogates.
## The number of elements which are < 0 after batch correction is: 27723
## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE
toppca <- plot_pca(topsva, plot_labels=FALSE, cis=FALSE)
## Not putting labels on the plot.
toppca$plot
topn_expt <- semantic_expt_filter(no_biopsy, topn=1000)
topn_norm <- normalize_expt(topn_expt, norm="quant", convert="cpm", transform="log2", batch="svaseq")
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(quant(data))))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Warning in normalize_expt(topn_expt, norm = "quant", convert = "cpm",
## transform = "log2", : Quantile normalization and sva do not always play
## well together.
## Step 1: not doing count filtering.
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## The be method chose 14 surrogate variable(s).
## Attempting svaseq estimation with 14 surrogates.
plot_pca(topn_norm, plot_labels=FALSE)$plot
## Not putting labels on the plot.
tt <- plot_pca_genes(topn_norm, pc_method="tsne")
## Warning in if (plot_labels == FALSE) {: the condition has length > 1 and
## only the first element will be used
## Warning in if (plot_labels == "normal") {: the condition has length > 1 and
## only the first element will be used
## Warning in if (plot_labels == "repel") {: the condition has length > 1 and
## only the first element will be used
## Warning in if (plot_labels == "dlsmart") {: the condition has length > 1
## and only the first element will be used
tt <- plot_pca_genes(topn_norm, pc_method="uwot")$plot
## Warning in if (plot_labels == FALSE) {: the condition has length > 1 and
## only the first element will be used
## Warning in if (plot_labels == "normal") {: the condition has length > 1 and
## only the first element will be used
## Warning in if (plot_labels == "repel") {: the condition has length > 1 and
## only the first element will be used
## Warning in if (plot_labels == "dlsmart") {: the condition has length > 1
## and only the first element will be used
ggplotly(tt)
## Error in ggplotly(tt): could not find function "ggplotly"
tt <- plot_pca_genes(topn_norm)
## Warning in if (plot_labels == FALSE) {: the condition has length > 1 and
## only the first element will be used
## Warning in if (plot_labels == "normal") {: the condition has length > 1 and
## only the first element will be used
## Warning in if (plot_labels == "repel") {: the condition has length > 1 and
## only the first element will be used
## Warning in if (plot_labels == "dlsmart") {: the condition has length > 1
## and only the first element will be used
no_batch_norm <- normalize_expt(no_biopsy, filter=TRUE, convert="cpm",
transform="log2", batch="svaseq", num_surrogates=3)
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(hpgl(data))))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: hpgl
## Removing 5272 low-count genes (14357 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 234757 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## batch_counts: Before batch correction, 438636 entries 0<=x<1.
## batch_counts: Before batch correction, 234757 entries are >= 0.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 1 entries are 0<=x<1.
## batch_counts: Before batch/surrogate estimation, 234757 entries are x<=0.
## The be method chose 20 surrogate variable(s).
## Attempting svaseq estimation with 20 surrogates.
## The number of elements which are < 0 after batch correction is: 45136
## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE
noba <- plot_pca(no_batch_norm, plot_labels=FALSE)
## Not putting labels on the plot.
noba$plot
no_test <- pca_information(no_norm,
expt_factors=c("condition", "batch", "expttime"),
num_components=5, plot_pcas=TRUE)
## More shallow curves in these plots suggest more genes in this principle component.
no_test$anova_neglogp_heatmap
lots_tsne <- plot_tsne(lots_norm, plot_label=FALSE)
## Not putting labels on the plot.
lots_tsne$plot
##norm_met <- graph_metrics(lots_norm)
##norm_met$pcaplot
gsva_big <- simple_gsva(lots, current_id="ENSEMBL")
## gsva requires the annotation field to be filled in.
## Converting the rownames() of the expressionset to ENTREZID.
## Before conversion, the expressionset has 19629 entries.
## After conversion, the expressionset has 19054 entries.
## Warning in .local(expr, gset.idx.list, ...): 233 genes with constant
## expression values throuhgout the samples.
## Warning in .local(expr, gset.idx.list, ...): Since argument method!
## ="ssgsea", genes with constant expression values are discarded.
## Mapping identifiers between gene sets and feature names
## Estimating GSVA scores for 3000 gene sets.
## Computing observed enrichment scores
## Estimating ECDFs with Poisson kernels
## Using parallel with 8 cores
##
|
| | 0%
gsva_expt <- gsva_big$expt
gsva_cor <- plot_corheat(gsva_expt)
pp(file="images/gsva_correlation.pdf", image=gsva_cor$plot)
## Writing the image to: images/gsva_correlation.pdf and calling dev.off().
test_xcell <- simple_xcell(lots, column="cds_length")
## The biomart annotations file already exists, loading from it.
## xCell strongly perfers rpkm values, re-normalizing now.
## This function will replace the expt$expressionset slot with:
## rpkm(data)
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: converting the data with rpkm.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
test_xcell$heatmap
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
## message(paste0("Saving to ", savefile))
## tmp <- sm(saveme(filename=savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset b0e11455e9f02944597c1bb5027f8ecbeb14201b
## This is hpgltools commit: Fri Jan 18 13:42:11 2019 -0500: b0e11455e9f02944597c1bb5027f8ecbeb14201b