1 Lots of samples!

1.1 Load all the data

hs_annot <- load_biomart_annotations()$annotation

## The biomart annotations file already exists, loading from it.

rownames(hs_annot) <- make.names(
  paste0(hs_annot[["ensembl_transcript_id"]], ".",
         hs_annot[["transcript_version"]]),
  unique=TRUE)
hs_tx_gene <- hs_annot[, c("ensembl_gene_id", "ensembl_transcript_id")]
hs_tx_gene[["id"]] <- rownames(hs_tx_gene)
hs_tx_gene <- hs_tx_gene[, c("id", "ensembl_gene_id")]
new_hs_annot <- hs_annot
rownames(new_hs_annot) <- make.names(hs_annot[["ensembl_gene_id"]], unique=TRUE)

lots <- create_expt("sample_sheets/many_samples.xlsx",
                    gene_info=new_hs_annot,
                    tx_gene_map=hs_tx_gene)

## Reading the sample metadata.

## The sample definitions comprises: 285 rows(samples) and 31 columns(metadata fields).

## Reading count tables.

## Using the transcript to gene mapping.

## Reading salmon data with tximport.

## Finished reading count tables.

## Matched 19629 annotations and counts.

## Bringing together the count matrix and gene information.

## The mapped IDs are not the rownames of your gene information, changing them now.

## Some annotations were lost in merging, setting them to 'undefined'.

1.2 Queries from Najib:

What are the characteritics of an infected macrophage?
What is the ‘signature’ of an infected macrophage? ** If one had to pick 10-20 marker genes which characterize an infected macrophage (up or down compared to uninfected), what would they be? ** If given a new sample that is unknown, what can we use to tell if it is infected or uninfected?
What makes ADC light up?
Can we make ‘better signatures’? ** Presumably signatures which define uninfected vs. infected.

1.3 Generate initial plots

initial <- plot_libsize(lots)
initial$plot

lots_met <- graph_metrics(lots)

## Graphing number of non-zero genes with respect to CPM by library.

## Graphing library sizes.

## Graphing a boxplot.

## This data will benefit from being displayed on the log scale.

## If this is not desired, set scale='raw'

## Some entries are 0.  We are on log scale, adding 1 to the data.

## Changed 1284413 zero count features.

## Graphing a correlation heatmap.

## Graphing a standard median correlation.

## Performing correlation.

## Graphing a distance heatmap.

## Graphing a standard median distance.

## Performing distance.

## Graphing a PCA plot.

## Graphing a T-SNE plot.

## Plotting a density plot.

## This data will benefit from being displayed on the log scale.

## If this is not desired, set scale='raw'

## Some entries are 0.  We are on log scale, setting them to 0.5.

## Changed 1284413 zero count features.

## Plotting a CV plot.

## Naively calculating coefficient of variation/dispersion with respect to condition.

## Finished calculating dispersion estimates.

## Plotting the representation of the top-n genes.

## Plotting the expression of the top-n PC loaded genes.

## Printing a color to condition legend.

lots_norm <- normalize_expt(lots, norm="quant", filter=TRUE, convert="cpm",
                            transform="log2")

## This function will replace the expt$expressionset slot with:

## log2(cpm(quant(hpgl(data))))

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.

## Step 1: performing count filter with option: hpgl

## Removing 3762 low-count genes (15867 remaining).

## Step 2: normalizing the data with quant.

## Using normalize.quantiles.robust due to a thread error in preprocessCore.

## Step 3: converting the data with cpm.

## Step 4: transforming the data with log2.

## transform_counts: Found 11575 values equal to 0, adding 1 to the matrix.

## Step 5: not doing batch correction.

norm_met <- graph_metrics(lots_norm)

## Graphing number of non-zero genes with respect to CPM by library.

## Graphing library sizes.

## Graphing a boxplot.

## Graphing a correlation heatmap.

## Graphing a standard median correlation.

## Performing correlation.

## Graphing a distance heatmap.

## Graphing a standard median distance.

## Performing distance.

## Graphing a PCA plot.

## Graphing a T-SNE plot.

## Plotting a density plot.

## Plotting a CV plot.

## Naively calculating coefficient of variation/dispersion with respect to condition.

## Finished calculating dispersion estimates.

## Plotting the representation of the top-n genes.

## Plotting the expression of the top-n PC loaded genes.

## Printing a color to condition legend.

test_factors <- pca_information(lots_norm, num_components=4, plot_pcas=TRUE)

## More shallow curves in these plots suggest more genes in this principle component.

1.4 Show initial plots

lots_pca <- plot_pca(lots_norm, plot_label=FALSE)

## Not putting labels on the plot.

lots_pca$plot

1.5 Make subsets

no_biopsy <- subset_expt(lots, subset="celltype!='skin'")

## There were 266, now there are 224 samples.

no_biopsy <- set_expt_conditions(no_biopsy, fact="infectstate")

macrophages <- subset_expt(lots, subset="celltype=='macrophage'")

## There were 266, now there are 203 samples.

uninf <- subset_expt(macrophages, subset="state=='uninfected'")

## There were 203, now there are 74 samples.

1.6 Run xCell

uninf_norm <- normalize_expt(uninf, convert="cpm", norm="quant", filter=TRUE)

## This function will replace the expt$expressionset slot with:

## cpm(quant(hpgl(data)))

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Leaving the data in its current base format, keep in mind that
##  some metrics are easier to see when the data is log2 transformed, but
##  EdgeR/DESeq do not accept transformed data.

## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.

## Step 1: performing count filter with option: hpgl

## Removing 6428 low-count genes (13201 remaining).

## Step 2: normalizing the data with quant.

## Using normalize.quantiles.robust due to a thread error in preprocessCore.

## Step 3: converting the data with cpm.

## Step 4: not transforming the data.

## Step 5: not doing batch correction.

test_uninf <- simple_xcell(uninf_norm, column="cds_length")

## The biomart annotations file already exists, loading from it.

## xCell strongly perfers rpkm values, re-normalizing now.

## This function will replace the expt$expressionset slot with:

## rpkm(data)

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Filter is false, this should likely be set to something, good
##  choices include cbcb, kofa, pofa (anything but FALSE).  If you want this to
##  stay FALSE, keep in mind that if other normalizations are performed, then the
##  resulting libsizes are likely to be strange (potentially negative!)

## Leaving the data in its current base format, keep in mind that
##  some metrics are easier to see when the data is log2 transformed, but
##  EdgeR/DESeq do not accept transformed data.

## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.

## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.

## Step 1: not doing count filtering.

## Step 2: not normalizing the data.

## Step 3: converting the data with rpkm.

## Step 4: not transforming the data.

## Step 5: not doing batch correction.

## Loading required namespace: xCell

test_uninf$heatmap

macrophages_norm <- normalize_expt(macrophages, convert="rpkm", column="cds_length", filter=TRUE)

## This function will replace the expt$expressionset slot with:

## rpkm(hpgl(data))

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Leaving the data in its current base format, keep in mind that
##  some metrics are easier to see when the data is log2 transformed, but
##  EdgeR/DESeq do not accept transformed data.

## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.

## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.

## Step 1: performing count filter with option: hpgl

## Removing 5595 low-count genes (14034 remaining).

## Step 2: not normalizing the data.

## Step 3: converting the data with rpkm.

## Step 4: not transforming the data.

## Step 5: not doing batch correction.

na_idx <- is.na(exprs(macrophages_norm))
macrophages_norm[na_idx] <- 0
mtrx <- exprs(macrophages_norm)
mtrx[na_idx] <- 0
Biobase::exprs(macrophages_norm$expressionset) <- mtrx
test_macrophages <- simple_xcell(macrophages, column="cds_length", filter=TRUE, label_size=0.3, width=15)

## The biomart annotations file already exists, loading from it.

## xCell strongly perfers rpkm values, re-normalizing now.

## This function will replace the expt$expressionset slot with:

## rpkm(hpgl(data))

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Leaving the data in its current base format, keep in mind that
##  some metrics are easier to see when the data is log2 transformed, but
##  EdgeR/DESeq do not accept transformed data.

## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.

## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.

## Step 1: performing count filter with option: hpgl

## Removing 5595 low-count genes (14034 remaining).

## Step 2: not normalizing the data.

## Step 3: converting the data with rpkm.

## Step 4: not transforming the data.

## Step 5: not doing batch correction.

pp(file="images/macrophage_xcell_heatmap.pdf", image=test_macrophages$heatmap)

## Writing the image to: images/macrophage_xcell_heatmap.pdf and calling dev.off().

1.7 Poke subsets

macr_infuninf <- set_expt_conditions(macrophages, fact="infectstate")
macr_norm <- normalize_expt(macr_infuninf, filter=TRUE, transform="log2",
                            convert="cpm", batch="svaseq")

## This function will replace the expt$expressionset slot with:

## log2(svaseq(cpm(hpgl(data))))

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.

## Step 1: performing count filter with option: hpgl

## Removing 5595 low-count genes (14034 remaining).

## Step 2: not normalizing the data.

## Step 3: converting the data with cpm.

## Step 4: transforming the data with log2.

## transform_counts: Found 193530 values equal to 0, adding 1 to the matrix.

## Step 5: doing batch correction with svaseq.

## Note to self:  If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.

## batch_counts: Before batch correction, 373431 entries 0<=x<1.

## batch_counts: Before batch correction, 193530 entries are >= 0.

## Passing off to all_adjusters.

## batch_counts: Before batch/surrogate estimation, 1 entries are 0<=x<1.

## batch_counts: Before batch/surrogate estimation, 193530 entries are x<=0.

## The be method chose 19 surrogate variable(s).

## Attempting svaseq estimation with 19 surrogates.

## The number of elements which are < 0 after batch correction is: 37156

## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE

macr_pca <- plot_pca(macr_norm, plot_label=FALSE)

## Not putting labels on the plot.

threed <- make_3d_pca(macr_pca)
threed$plot

pp(file="images/simple_macrophages.png", image=macr_pca$plot)

## Writing the image to: images/simple_macrophages.png and calling dev.off().

macr_tsne <- plot_tsne(macr_norm, plot_label=FALSE, iterations=5000)

## Not putting labels on the plot.

macr_tsne$plot

no_norm <- normalize_expt(no_biopsy, filter=TRUE, transform="log2",
                          convert="cpm", batch="svaseq")

## This function will replace the expt$expressionset slot with:

## log2(svaseq(cpm(hpgl(data))))

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.

## Step 1: performing count filter with option: hpgl

## Removing 5272 low-count genes (14357 remaining).

## Step 2: not normalizing the data.

## Step 3: converting the data with cpm.

## Step 4: transforming the data with log2.

## transform_counts: Found 234757 values equal to 0, adding 1 to the matrix.

## Step 5: doing batch correction with svaseq.

## Note to self:  If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.

## batch_counts: Before batch correction, 438636 entries 0<=x<1.

## batch_counts: Before batch correction, 234757 entries are >= 0.

## Passing off to all_adjusters.

## batch_counts: Before batch/surrogate estimation, 1 entries are 0<=x<1.

## batch_counts: Before batch/surrogate estimation, 234757 entries are x<=0.

## The be method chose 20 surrogate variable(s).

## Attempting svaseq estimation with 20 surrogates.

## The number of elements which are < 0 after batch correction is: 45136

## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE

no_pca <- plot_pca(no_norm, plot_label=FALSE, x_pc=1, y_pc=2, cis=FALSE)

## Not putting labels on the plot.

no_pca$plot

ggplt <- ggplotly(no_pca$plot)

## Error in ggplotly(no_pca$plot): could not find function "ggplotly"

widget <- htmlwidgets::saveWidget(as_widget(ggplt), "no_biopsy_svaseq_pca.html")

## Error in as_widget(ggplt): could not find function "as_widget"

## Get the top-1000 genes from cpm normalized data.
## This is not actually the top-1000 genes, it removes samples with coverage lower than x.
high_cov <- subset_expt(no_biopsy, coverage=10000000)

## There were 224, now there are 212 samples.

topsva <- normalize_expt(high_cov, filter=TRUE, transform="log2", batch="svaseq", num_surrogates=4)

## This function will replace the expt$expressionset slot with:

## log2(svaseq(hpgl(data)))

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Leaving the data unconverted.  It is often advisable to cpm/rpkm
##  the data to normalize for sampling differences, keep in mind though that rpkm
##  has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
##  will try to detect this).

## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.

## Step 1: performing count filter with option: hpgl

## Removing 5543 low-count genes (14086 remaining).

## Step 2: not normalizing the data.

## Step 3: not converting the data.

## Step 4: transforming the data with log2.

## transform_counts: Found 168888 values equal to 0, adding 1 to the matrix.

## Step 5: doing batch correction with svaseq.

## Note to self:  If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.

## batch_counts: Before batch correction, 4813 entries 0<=x<1.

## batch_counts: Before batch correction, 168888 entries are >= 0.

## Passing off to all_adjusters.

## batch_counts: Before batch/surrogate estimation, 168888 entries are x<=0.

## The be method chose 20 surrogate variable(s).

## Attempting svaseq estimation with 20 surrogates.

## The number of elements which are < 0 after batch correction is: 27723

## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE

toppca <- plot_pca(topsva, plot_labels=FALSE, cis=FALSE)

## Not putting labels on the plot.

toppca$plot

topn_expt <- semantic_expt_filter(no_biopsy, topn=1000)
topn_norm <- normalize_expt(topn_expt, norm="quant", convert="cpm", transform="log2", batch="svaseq")

## This function will replace the expt$expressionset slot with:

## log2(svaseq(cpm(quant(data))))

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Filter is false, this should likely be set to something, good
##  choices include cbcb, kofa, pofa (anything but FALSE).  If you want this to
##  stay FALSE, keep in mind that if other normalizations are performed, then the
##  resulting libsizes are likely to be strange (potentially negative!)

## Warning in normalize_expt(topn_expt, norm = "quant", convert = "cpm",
## transform = "log2", : Quantile normalization and sva do not always play
## well together.

## Step 1: not doing count filtering.

## Step 2: normalizing the data with quant.

## Using normalize.quantiles.robust due to a thread error in preprocessCore.

## Step 3: converting the data with cpm.

## Step 4: transforming the data with log2.

## Step 5: doing batch correction with svaseq.

## Note to self:  If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.

## Passing off to all_adjusters.

## The be method chose 14 surrogate variable(s).

## Attempting svaseq estimation with 14 surrogates.

plot_pca(topn_norm, plot_labels=FALSE)$plot

## Not putting labels on the plot.

tt <- plot_pca_genes(topn_norm, pc_method="tsne")

## Warning in if (plot_labels == FALSE) {: the condition has length > 1 and
## only the first element will be used

## Warning in if (plot_labels == "normal") {: the condition has length > 1 and
## only the first element will be used

## Warning in if (plot_labels == "repel") {: the condition has length > 1 and
## only the first element will be used

## Warning in if (plot_labels == "dlsmart") {: the condition has length > 1
## and only the first element will be used

tt <- plot_pca_genes(topn_norm, pc_method="uwot")$plot

## Warning in if (plot_labels == FALSE) {: the condition has length > 1 and
## only the first element will be used

## Warning in if (plot_labels == "normal") {: the condition has length > 1 and
## only the first element will be used

## Warning in if (plot_labels == "repel") {: the condition has length > 1 and
## only the first element will be used

## Warning in if (plot_labels == "dlsmart") {: the condition has length > 1
## and only the first element will be used

ggplotly(tt)

## Error in ggplotly(tt): could not find function "ggplotly"

tt <- plot_pca_genes(topn_norm)

## Warning in if (plot_labels == FALSE) {: the condition has length > 1 and
## only the first element will be used

## Warning in if (plot_labels == "normal") {: the condition has length > 1 and
## only the first element will be used

## Warning in if (plot_labels == "repel") {: the condition has length > 1 and
## only the first element will be used

## Warning in if (plot_labels == "dlsmart") {: the condition has length > 1
## and only the first element will be used

no_batch_norm <- normalize_expt(no_biopsy, filter=TRUE, convert="cpm",
                          transform="log2", batch="svaseq", num_surrogates=3)

## This function will replace the expt$expressionset slot with:

## log2(svaseq(cpm(hpgl(data))))

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.

## Step 1: performing count filter with option: hpgl

## Removing 5272 low-count genes (14357 remaining).

## Step 2: not normalizing the data.

## Step 3: converting the data with cpm.

## Step 4: transforming the data with log2.

## transform_counts: Found 234757 values equal to 0, adding 1 to the matrix.

## Step 5: doing batch correction with svaseq.

## Note to self:  If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.

## batch_counts: Before batch correction, 438636 entries 0<=x<1.

## batch_counts: Before batch correction, 234757 entries are >= 0.

## Passing off to all_adjusters.

## batch_counts: Before batch/surrogate estimation, 1 entries are 0<=x<1.

## batch_counts: Before batch/surrogate estimation, 234757 entries are x<=0.

## The be method chose 20 surrogate variable(s).

## Attempting svaseq estimation with 20 surrogates.

## The number of elements which are < 0 after batch correction is: 45136

## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE

noba <- plot_pca(no_batch_norm, plot_labels=FALSE)

## Not putting labels on the plot.

noba$plot

no_test <- pca_information(no_norm,
                           expt_factors=c("condition", "batch", "expttime"),
                           num_components=5, plot_pcas=TRUE)

## More shallow curves in these plots suggest more genes in this principle component.

no_test$anova_neglogp_heatmap

lots_tsne <- plot_tsne(lots_norm, plot_label=FALSE)

## Not putting labels on the plot.

lots_tsne$plot

##norm_met <- graph_metrics(lots_norm)
##norm_met$pcaplot

gsva_big <- simple_gsva(lots, current_id="ENSEMBL")

## gsva requires the annotation field to be filled in.

## Converting the rownames() of the expressionset to ENTREZID.

## Before conversion, the expressionset has 19629 entries.

## After conversion, the expressionset has 19054 entries.

## Warning in .local(expr, gset.idx.list, ...): 233 genes with constant
## expression values throuhgout the samples.

## Warning in .local(expr, gset.idx.list, ...): Since argument method!
## ="ssgsea", genes with constant expression values are discarded.

## Mapping identifiers between gene sets and feature names
## Estimating GSVA scores for 3000 gene sets.
## Computing observed enrichment scores
## Estimating ECDFs with Poisson kernels
## Using parallel with 8 cores
## 
  |                                                                       
  |                                                                 |   0%

gsva_expt <- gsva_big$expt
gsva_cor <- plot_corheat(gsva_expt)

pp(file="images/gsva_correlation.pdf", image=gsva_cor$plot)

## Writing the image to: images/gsva_correlation.pdf and calling dev.off().

test_xcell <- simple_xcell(lots, column="cds_length")

## The biomart annotations file already exists, loading from it.

## xCell strongly perfers rpkm values, re-normalizing now.

## This function will replace the expt$expressionset slot with:

## rpkm(data)

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Filter is false, this should likely be set to something, good
##  choices include cbcb, kofa, pofa (anything but FALSE).  If you want this to
##  stay FALSE, keep in mind that if other normalizations are performed, then the
##  resulting libsizes are likely to be strange (potentially negative!)

## Leaving the data in its current base format, keep in mind that
##  some metrics are easier to see when the data is log2 transformed, but
##  EdgeR/DESeq do not accept transformed data.

## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.

## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.

## Step 1: not doing count filtering.

## Step 2: not normalizing the data.

## Step 3: converting the data with rpkm.

## Step 4: not transforming the data.

## Step 5: not doing batch correction.

test_xcell$heatmap

if (!isTRUE(get0("skip_load"))) {
  pander::pander(sessionInfo())
  message(paste0("This is hpgltools commit: ", get_git_commit()))
  ## message(paste0("Saving to ", savefile))
  ## tmp <- sm(saveme(filename=savefile))
}

## If you wish to reproduce this exact build of hpgltools, invoke the following:

## > git clone http://github.com/abelew/hpgltools.git

## > git reset b0e11455e9f02944597c1bb5027f8ecbeb14201b

## This is hpgltools commit: Fri Jan 18 13:42:11 2019 -0500: b0e11455e9f02944597c1bb5027f8ecbeb14201b

LS0tCnRpdGxlOiAiTGVpc2htYW5pYSBzdHJhaW5zIDIwMTgwODI4OiBQbGF5aW5nIHdpdGggc2FtcGxlcy4iCmF1dGhvcjogImF0YiBhYmVsZXdAZ21haWwuY29tIgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKIGh0bWxfZG9jdW1lbnQ6CiAgY29kZV9kb3dubG9hZDogdHJ1ZQogIGNvZGVfZm9sZGluZzogc2hvdwogIGZpZ19jYXB0aW9uOiB0cnVlCiAgZmlnX2hlaWdodDogNwogIGZpZ193aWR0aDogNwogIGhpZ2hsaWdodDogZGVmYXVsdAogIGtlZXBfbWQ6IGZhbHNlCiAgbW9kZTogc2VsZmNvbnRhaW5lZAogIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogIHNlbGZfY29udGFpbmVkOiB0cnVlCiAgdGhlbWU6IHJlYWRhYmxlCiAgdG9jOiB0cnVlCiAgdG9jX2Zsb2F0OgogICBjb2xsYXBzZWQ6IGZhbHNlCiAgIHNtb290aF9zY3JvbGw6IGZhbHNlCi0tLQoKPHN0eWxlPgogIGJvZHkgLm1haW4tY29udGFpbmVyIHsKICAgIG1heC13aWR0aDogMTYwMHB4OwogIH0KPC9zdHlsZT4KCmBgYHtyIG9wdGlvbnMsIGluY2x1ZGU9RkFMU0V9CmlmICghaXNUUlVFKGdldDAoInNraXBfbG9hZCIpKSkgewogIGxpYnJhcnkoaHBnbHRvb2xzKQogIHR0IDwtIHNtKGRldnRvb2xzOjpsb2FkX2FsbCgifi9ocGdsdG9vbHMiKSkKICBrbml0cjo6b3B0c19rbml0JHNldChwcm9ncmVzcz1UUlVFLAogICAgICAgICAgICAgICAgICAgICAgIHZlcmJvc2U9VFJVRSwKICAgICAgICAgICAgICAgICAgICAgICB3aWR0aD05MCwKICAgICAgICAgICAgICAgICAgICAgICBlY2hvPVRSVUUpCiAga25pdHI6Om9wdHNfY2h1bmskc2V0KGVycm9yPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgICAgIGZpZy53aWR0aD04LAogICAgICAgICAgICAgICAgICAgICAgICBmaWcuaGVpZ2h0PTgsCiAgICAgICAgICAgICAgICAgICAgICAgIGRwaT05NikKICBvbGRfb3B0aW9ucyA8LSBvcHRpb25zKGRpZ2l0cz00LAogICAgICAgICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9ycz1GQUxTRSwKICAgICAgICAgICAgICAgICAgICAgICAgIGtuaXRyLmR1cGxpY2F0ZS5sYWJlbD0iYWxsb3ciKQogIGdncGxvdDI6OnRoZW1lX3NldChnZ3Bsb3QyOjp0aGVtZV9idyhiYXNlX3NpemU9MTIpKQogIHZlciA8LSAiMjAxODA4MjgiCiAgcHJldmlvdXNfZmlsZSA8LSBwYXN0ZTAoIjAxX2Fubm90YXRpb25fdiIsIHZlciwgIi5SbWQiKQoKICB0bXAgPC0gdHJ5KHNtKGxvYWRtZShmaWxlbmFtZT1nc3ViKHBhdHRlcm49IlxcLlJtZCIsIHJlcGxhY2U9IlxcLnJkYVxcLnh6IiwgeD1wcmV2aW91c19maWxlKSkpKQogIHJtZF9maWxlIDwtIHBhc3RlMCgiMDJfc2FtcGxlX2VzdGltYXRpb25fbG90c192IiwgdmVyLCAiLlJtZCIpCiAgc2F2ZWZpbGUgPC0gZ3N1YihwYXR0ZXJuPSJcXC5SbWQiLCByZXBsYWNlPSJcXC5yZGFcXC54eiIsIHg9cm1kX2ZpbGUpCn0KYGBgCgojIExvdHMgb2Ygc2FtcGxlcyEKCiMjIExvYWQgYWxsIHRoZSBkYXRhCgpgYGB7ciBsb3RzX29mX3NhbXBsZXN9CmhzX2Fubm90IDwtIGxvYWRfYmlvbWFydF9hbm5vdGF0aW9ucygpJGFubm90YXRpb24Kcm93bmFtZXMoaHNfYW5ub3QpIDwtIG1ha2UubmFtZXMoCiAgcGFzdGUwKGhzX2Fubm90W1siZW5zZW1ibF90cmFuc2NyaXB0X2lkIl1dLCAiLiIsCiAgICAgICAgIGhzX2Fubm90W1sidHJhbnNjcmlwdF92ZXJzaW9uIl1dKSwKICB1bmlxdWU9VFJVRSkKaHNfdHhfZ2VuZSA8LSBoc19hbm5vdFssIGMoImVuc2VtYmxfZ2VuZV9pZCIsICJlbnNlbWJsX3RyYW5zY3JpcHRfaWQiKV0KaHNfdHhfZ2VuZVtbImlkIl1dIDwtIHJvd25hbWVzKGhzX3R4X2dlbmUpCmhzX3R4X2dlbmUgPC0gaHNfdHhfZ2VuZVssIGMoImlkIiwgImVuc2VtYmxfZ2VuZV9pZCIpXQpuZXdfaHNfYW5ub3QgPC0gaHNfYW5ub3QKcm93bmFtZXMobmV3X2hzX2Fubm90KSA8LSBtYWtlLm5hbWVzKGhzX2Fubm90W1siZW5zZW1ibF9nZW5lX2lkIl1dLCB1bmlxdWU9VFJVRSkKCmxvdHMgPC0gY3JlYXRlX2V4cHQoInNhbXBsZV9zaGVldHMvbWFueV9zYW1wbGVzLnhsc3giLAogICAgICAgICAgICAgICAgICAgIGdlbmVfaW5mbz1uZXdfaHNfYW5ub3QsCiAgICAgICAgICAgICAgICAgICAgdHhfZ2VuZV9tYXA9aHNfdHhfZ2VuZSkKYGBgCgojIyBRdWVyaWVzIGZyb20gTmFqaWI6CgoqIFdoYXQgYXJlIHRoZSBjaGFyYWN0ZXJpdGljcyBvZiBhbiBpbmZlY3RlZCBtYWNyb3BoYWdlPwoqIFdoYXQgaXMgdGhlICdzaWduYXR1cmUnIG9mIGFuIGluZmVjdGVkIG1hY3JvcGhhZ2U/CiAgKiogIElmIG9uZSBoYWQgdG8gcGljayAxMC0yMCBtYXJrZXIgZ2VuZXMgd2hpY2ggY2hhcmFjdGVyaXplIGFuIGluZmVjdGVkCiAgbWFjcm9waGFnZSAodXAgb3IgZG93biBjb21wYXJlZCB0byB1bmluZmVjdGVkKSwgd2hhdCB3b3VsZCB0aGV5IGJlPwogICoqICBJZiBnaXZlbiBhIG5ldyBzYW1wbGUgdGhhdCBpcyB1bmtub3duLCB3aGF0IGNhbiB3ZSB1c2UgdG8gdGVsbCBpZiBpdCBpcwogIGluZmVjdGVkIG9yIHVuaW5mZWN0ZWQ/CgoqIFdoYXQgbWFrZXMgQURDIGxpZ2h0IHVwPwoqIENhbiB3ZSBtYWtlICdiZXR0ZXIgc2lnbmF0dXJlcyc/CiAgKiogIFByZXN1bWFibHkgc2lnbmF0dXJlcyB3aGljaCBkZWZpbmUgdW5pbmZlY3RlZCB2cy4gaW5mZWN0ZWQuCgojIyBHZW5lcmF0ZSBpbml0aWFsIHBsb3RzCgpgYGB7ciBpbml0aWFsX3Bsb3RzLCBmaWcuc2hvdz0iaGlkZSJ9CmluaXRpYWwgPC0gcGxvdF9saWJzaXplKGxvdHMpCmluaXRpYWwkcGxvdApsb3RzX21ldCA8LSBncmFwaF9tZXRyaWNzKGxvdHMpCmxvdHNfbm9ybSA8LSBub3JtYWxpemVfZXhwdChsb3RzLCBub3JtPSJxdWFudCIsIGZpbHRlcj1UUlVFLCBjb252ZXJ0PSJjcG0iLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgdHJhbnNmb3JtPSJsb2cyIikKbm9ybV9tZXQgPC0gZ3JhcGhfbWV0cmljcyhsb3RzX25vcm0pCnRlc3RfZmFjdG9ycyA8LSBwY2FfaW5mb3JtYXRpb24obG90c19ub3JtLCBudW1fY29tcG9uZW50cz00LCBwbG90X3BjYXM9VFJVRSkKYGBgCgojIyBTaG93IGluaXRpYWwgcGxvdHMKCmBgYHtyIHNob3dfaW5pdGlhbH0KbG90c19wY2EgPC0gcGxvdF9wY2EobG90c19ub3JtLCBwbG90X2xhYmVsPUZBTFNFKQpsb3RzX3BjYSRwbG90CmBgYAoKIyMgTWFrZSBzdWJzZXRzCgpgYGB7ciBtYWtlX3N1YnNldHN9Cm5vX2Jpb3BzeSA8LSBzdWJzZXRfZXhwdChsb3RzLCBzdWJzZXQ9ImNlbGx0eXBlIT0nc2tpbiciKQpub19iaW9wc3kgPC0gc2V0X2V4cHRfY29uZGl0aW9ucyhub19iaW9wc3ksIGZhY3Q9ImluZmVjdHN0YXRlIikKCm1hY3JvcGhhZ2VzIDwtIHN1YnNldF9leHB0KGxvdHMsIHN1YnNldD0iY2VsbHR5cGU9PSdtYWNyb3BoYWdlJyIpCgp1bmluZiA8LSBzdWJzZXRfZXhwdChtYWNyb3BoYWdlcywgc3Vic2V0PSJzdGF0ZT09J3VuaW5mZWN0ZWQnIikKYGBgCgojIyBSdW4geENlbGwKCmBgYHtyIHhjZWxsX3VuaW5mfQp1bmluZl9ub3JtIDwtIG5vcm1hbGl6ZV9leHB0KHVuaW5mLCBjb252ZXJ0PSJjcG0iLCBub3JtPSJxdWFudCIsIGZpbHRlcj1UUlVFKQp0ZXN0X3VuaW5mIDwtIHNpbXBsZV94Y2VsbCh1bmluZl9ub3JtLCBjb2x1bW49ImNkc19sZW5ndGgiKQp0ZXN0X3VuaW5mJGhlYXRtYXAKCm1hY3JvcGhhZ2VzX25vcm0gPC0gbm9ybWFsaXplX2V4cHQobWFjcm9waGFnZXMsIGNvbnZlcnQ9InJwa20iLCBjb2x1bW49ImNkc19sZW5ndGgiLCBmaWx0ZXI9VFJVRSkKbmFfaWR4IDwtIGlzLm5hKGV4cHJzKG1hY3JvcGhhZ2VzX25vcm0pKQptYWNyb3BoYWdlc19ub3JtW25hX2lkeF0gPC0gMAptdHJ4IDwtIGV4cHJzKG1hY3JvcGhhZ2VzX25vcm0pCm10cnhbbmFfaWR4XSA8LSAwCkJpb2Jhc2U6OmV4cHJzKG1hY3JvcGhhZ2VzX25vcm0kZXhwcmVzc2lvbnNldCkgPC0gbXRyeAp0ZXN0X21hY3JvcGhhZ2VzIDwtIHNpbXBsZV94Y2VsbChtYWNyb3BoYWdlcywgY29sdW1uPSJjZHNfbGVuZ3RoIiwgZmlsdGVyPVRSVUUsIGxhYmVsX3NpemU9MC4zLCB3aWR0aD0xNSkKcHAoZmlsZT0iaW1hZ2VzL21hY3JvcGhhZ2VfeGNlbGxfaGVhdG1hcC5wZGYiLCBpbWFnZT10ZXN0X21hY3JvcGhhZ2VzJGhlYXRtYXApCmBgYAoKIyMgUG9rZSBzdWJzZXRzCgpgYGB7ciBwb2tlX3N1YnNldHN9Cm1hY3JfaW5mdW5pbmYgPC0gc2V0X2V4cHRfY29uZGl0aW9ucyhtYWNyb3BoYWdlcywgZmFjdD0iaW5mZWN0c3RhdGUiKQptYWNyX25vcm0gPC0gbm9ybWFsaXplX2V4cHQobWFjcl9pbmZ1bmluZiwgZmlsdGVyPVRSVUUsIHRyYW5zZm9ybT0ibG9nMiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb252ZXJ0PSJjcG0iLCBiYXRjaD0ic3Zhc2VxIikKbWFjcl9wY2EgPC0gcGxvdF9wY2EobWFjcl9ub3JtLCBwbG90X2xhYmVsPUZBTFNFKQp0aHJlZWQgPC0gbWFrZV8zZF9wY2EobWFjcl9wY2EpCnRocmVlZCRwbG90CgpwcChmaWxlPSJpbWFnZXMvc2ltcGxlX21hY3JvcGhhZ2VzLnBuZyIsIGltYWdlPW1hY3JfcGNhJHBsb3QpCm1hY3JfdHNuZSA8LSBwbG90X3RzbmUobWFjcl9ub3JtLCBwbG90X2xhYmVsPUZBTFNFLCBpdGVyYXRpb25zPTUwMDApCm1hY3JfdHNuZSRwbG90Cgpub19ub3JtIDwtIG5vcm1hbGl6ZV9leHB0KG5vX2Jpb3BzeSwgZmlsdGVyPVRSVUUsIHRyYW5zZm9ybT0ibG9nMiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgY29udmVydD0iY3BtIiwgYmF0Y2g9InN2YXNlcSIpCm5vX3BjYSA8LSBwbG90X3BjYShub19ub3JtLCBwbG90X2xhYmVsPUZBTFNFLCB4X3BjPTEsIHlfcGM9MiwgY2lzPUZBTFNFKQpub19wY2EkcGxvdApnZ3BsdCA8LSBnZ3Bsb3RseShub19wY2EkcGxvdCkKd2lkZ2V0IDwtIGh0bWx3aWRnZXRzOjpzYXZlV2lkZ2V0KGFzX3dpZGdldChnZ3BsdCksICJub19iaW9wc3lfc3Zhc2VxX3BjYS5odG1sIikKCiMjIEdldCB0aGUgdG9wLTEwMDAgZ2VuZXMgZnJvbSBjcG0gbm9ybWFsaXplZCBkYXRhLgojIyBUaGlzIGlzIG5vdCBhY3R1YWxseSB0aGUgdG9wLTEwMDAgZ2VuZXMsIGl0IHJlbW92ZXMgc2FtcGxlcyB3aXRoIGNvdmVyYWdlIGxvd2VyIHRoYW4geC4KaGlnaF9jb3YgPC0gc3Vic2V0X2V4cHQobm9fYmlvcHN5LCBjb3ZlcmFnZT0xMDAwMDAwMCkKdG9wc3ZhIDwtIG5vcm1hbGl6ZV9leHB0KGhpZ2hfY292LCBmaWx0ZXI9VFJVRSwgdHJhbnNmb3JtPSJsb2cyIiwgYmF0Y2g9InN2YXNlcSIsIG51bV9zdXJyb2dhdGVzPTQpCnRvcHBjYSA8LSBwbG90X3BjYSh0b3BzdmEsIHBsb3RfbGFiZWxzPUZBTFNFLCBjaXM9RkFMU0UpCnRvcHBjYSRwbG90Cgp0b3BuX2V4cHQgPC0gc2VtYW50aWNfZXhwdF9maWx0ZXIobm9fYmlvcHN5LCB0b3BuPTEwMDApCnRvcG5fbm9ybSA8LSBub3JtYWxpemVfZXhwdCh0b3BuX2V4cHQsIG5vcm09InF1YW50IiwgY29udmVydD0iY3BtIiwgdHJhbnNmb3JtPSJsb2cyIiwgYmF0Y2g9InN2YXNlcSIpCnBsb3RfcGNhKHRvcG5fbm9ybSwgcGxvdF9sYWJlbHM9RkFMU0UpJHBsb3QKCnR0IDwtIHBsb3RfcGNhX2dlbmVzKHRvcG5fbm9ybSwgcGNfbWV0aG9kPSJ0c25lIikKdHQgPC0gcGxvdF9wY2FfZ2VuZXModG9wbl9ub3JtLCBwY19tZXRob2Q9InV3b3QiKSRwbG90CmdncGxvdGx5KHR0KQp0dCA8LSBwbG90X3BjYV9nZW5lcyh0b3BuX25vcm0pCgpub19iYXRjaF9ub3JtIDwtIG5vcm1hbGl6ZV9leHB0KG5vX2Jpb3BzeSwgZmlsdGVyPVRSVUUsIGNvbnZlcnQ9ImNwbSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgdHJhbnNmb3JtPSJsb2cyIiwgYmF0Y2g9InN2YXNlcSIsIG51bV9zdXJyb2dhdGVzPTMpCm5vYmEgPC0gcGxvdF9wY2Eobm9fYmF0Y2hfbm9ybSwgcGxvdF9sYWJlbHM9RkFMU0UpCm5vYmEkcGxvdAoKbm9fdGVzdCA8LSBwY2FfaW5mb3JtYXRpb24obm9fbm9ybSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgZXhwdF9mYWN0b3JzPWMoImNvbmRpdGlvbiIsICJiYXRjaCIsICJleHB0dGltZSIpLAogICAgICAgICAgICAgICAgICAgICAgICAgICBudW1fY29tcG9uZW50cz01LCBwbG90X3BjYXM9VFJVRSkKbm9fdGVzdCRhbm92YV9uZWdsb2dwX2hlYXRtYXAKCmxvdHNfdHNuZSA8LSBwbG90X3RzbmUobG90c19ub3JtLCBwbG90X2xhYmVsPUZBTFNFKQpsb3RzX3RzbmUkcGxvdAojI25vcm1fbWV0IDwtIGdyYXBoX21ldHJpY3MobG90c19ub3JtKQojI25vcm1fbWV0JHBjYXBsb3QKCmdzdmFfYmlnIDwtIHNpbXBsZV9nc3ZhKGxvdHMsIGN1cnJlbnRfaWQ9IkVOU0VNQkwiKQpnc3ZhX2V4cHQgPC0gZ3N2YV9iaWckZXhwdApnc3ZhX2NvciA8LSBwbG90X2NvcmhlYXQoZ3N2YV9leHB0KQpwcChmaWxlPSJpbWFnZXMvZ3N2YV9jb3JyZWxhdGlvbi5wZGYiLCBpbWFnZT1nc3ZhX2NvciRwbG90KQoKdGVzdF94Y2VsbCA8LSBzaW1wbGVfeGNlbGwobG90cywgY29sdW1uPSJjZHNfbGVuZ3RoIikKdGVzdF94Y2VsbCRoZWF0bWFwCmBgYAoKCmBgYHtyIHNhdmVtZX0KaWYgKCFpc1RSVUUoZ2V0MCgic2tpcF9sb2FkIikpKSB7CiAgcGFuZGVyOjpwYW5kZXIoc2Vzc2lvbkluZm8oKSkKICBtZXNzYWdlKHBhc3RlMCgiVGhpcyBpcyBocGdsdG9vbHMgY29tbWl0OiAiLCBnZXRfZ2l0X2NvbW1pdCgpKSkKICAjIyBtZXNzYWdlKHBhc3RlMCgiU2F2aW5nIHRvICIsIHNhdmVmaWxlKSkKICAjIyB0bXAgPC0gc20oc2F2ZW1lKGZpbGVuYW1lPXNhdmVmaWxlKSkKfQpgYGAK

Leishmania strains 20180828: Playing with samples.

atb abelew@gmail.com

2019-01-23