hs_expt <- set_expt_conditions(hs_expt, fact="infectstate")
hs_expt <- set_expt_batches(hs_expt, fact="studypmid")
hs_norm <- normalize_expt(hs_expt, transform="log2", convert="cpm",
norm="quant", filter="simple")
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(simple(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: simple
## Removing 212 low-count genes (19417 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 6752 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
## Potentially check over the experimental design, there appear to be missing values.
## Warning in plot_pca(hs_norm): There are NA values in the component data.
## This can lead to weird plotting errors.
## plot labels was not set and there are more than 100 samples, disabling it.
## Not putting labels on the plot.
## Warning: Removed 24 rows containing missing values (geom_point).
## Warning: Removed 24 rows containing missing values (geom_point).
hs_nb <- normalize_expt(hs_expt, transform="log2", convert="cpm",
norm="quant", filter="simple", batch="svaseq")
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(quant(simple(data)))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Warning in normalize_expt(hs_expt, transform = "log2", convert = "cpm", :
## Quantile normalization and sva do not always play well together.
## Step 1: performing count filter with option: simple
## Removing 212 low-count genes (19417 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 6752 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 1022588 entries are x>1: 51.6%.
## batch_counts: Before batch/surrogate estimation, 6752 entries are x==0: 0.341%.
## batch_counts: Before batch/surrogate estimation, 951194 entries are 0<x<1: 48.0%.
## The be method chose 10 surrogate variable(s).
## Attempting svaseq estimation with 10 surrogates.
## There are 30781 (1.55%) elements which are < 0 after batch correction.
## Potentially check over the experimental design, there appear to be missing values.
## Warning in plot_pca(hs_nb): There are NA values in the component data. This
## can lead to weird plotting errors.
## plot labels was not set and there are more than 100 samples, disabling it.
## Not putting labels on the plot.
## Warning: Removed 24 rows containing missing values (geom_point).
## Warning: Removed 24 rows containing missing values (geom_point).
hs_expt <- set_expt_conditions(hs_expt, fact="expttime")
hs_norm <- normalize_expt(hs_expt, transform="log2",
norm="quant", filter="simple", batch="svaseq")
## This function will replace the expt$expressionset slot with:
## log2(svaseq(quant(simple(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Warning in normalize_expt(hs_expt, transform = "log2", norm = "quant",
## filter = "simple", : Quantile normalization and sva do not always play well
## together.
## Step 1: performing count filter with option: simple
## Removing 212 low-count genes (19417 remaining).
## Step 2: normalizing the data with quant.
## Step 3: not converting the data.
## Step 4: transforming the data with log2.
## transform_counts: Found 6752 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 1473620 entries are x>1: 74.4%.
## batch_counts: Before batch/surrogate estimation, 6752 entries are x==0: 0.341%.
## batch_counts: Before batch/surrogate estimation, 500162 entries are 0<x<1: 25.3%.
## The be method chose 11 surrogate variable(s).
## Attempting svaseq estimation with 11 surrogates.
## There are 4732 (0.239%) elements which are < 0 after batch correction.
## Potentially check over the experimental design, there appear to be missing values.
## Warning in plot_pca(hs_norm): There are NA values in the component data.
## This can lead to weird plotting errors.
## plot labels was not set and there are more than 100 samples, disabling it.
## Not putting labels on the plot.
## Warning: Removed 24 rows containing missing values (geom_point).
## Warning: Removed 24 rows containing missing values (geom_point).
Najib asked about adding the various data provided by our work. The expressionset which contains this information live in ‘../multiple_leishmania_2018’, more explicitly, the expressionset may be loaded via Hs_M0Lm4h.rda
load("../multiple_leishmania_2018/Hs_M0Lm4h.rda")
all_expt <- combine_expts(hs_expt, expt, merge_meta=TRUE)
all_expt <- set_expt_conditions(all_expt, fact="infectstate")
all_norm <- normalize_expt(all_expt, filter=TRUE, norm="quant", convert="cpm",
transform="log2", batch="svaseq")
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(quant(cbcb(data)))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Warning in normalize_expt(all_expt, filter = TRUE, norm = "quant", convert
## = "cpm", : Quantile normalization and sva do not always play well together.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (19629 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 9876 values equal to 0, adding 1 to the matrix.
## Step 5: doing batch correction with svaseq.
## Note to self: If you get an error like 'x contains missing values' The data has too many 0's and needs a stronger low-count filter applied.
## Passing off to all_adjusters.
## batch_counts: Before batch/surrogate estimation, 4233025 entries are x>1: 58.4%.
## batch_counts: Before batch/surrogate estimation, 9876 entries are x==0: 0.136%.
## batch_counts: Before batch/surrogate estimation, 3000200 entries are 0<x<1: 41.4%.
## The be method chose 25 surrogate variable(s).
## Attempting svaseq estimation with 25 surrogates.
## There are 211468 (2.92%) elements which are < 0 after batch correction.
## Potentially check over the experimental design, there appear to be missing values.
## Warning in plot_pca(all_norm): There are NA values in the component data.
## This can lead to weird plotting errors.
## plot labels was not set and there are more than 100 samples, disabling it.
## Not putting labels on the plot.
## Warning: Removed 24 rows containing missing values (geom_point).