## The biomart annotations file already exists, loading from it.
We have an older revision of the sample sheet for this dataset. I added some samples from Dr. Mosser in order to compare against M1/M2 activation states. These extra samples are not likely to be the most appropriate because they are not U937 samples.
hg38_se <- create_se("sample_sheets/macrogen_samples.xlsx",
file_column = "hisat_hg38", gene_info = hg_df)## Reading the sample metadata.
## Checking the state of the condition column.
## Warning in extract_metadata(metadata, id_column = id_column, condition_column = condition_column, : There
## were NA values in the condition column, setting them to 'undefined'.
## Checking the state of the batch column.
## Warning in extract_metadata(metadata, id_column = id_column, condition_column = condition_column, : There
## were NA values in the condition column, setting them to 'undefined'.
## Checking the condition factor.
## The sample definitions comprises: 54 rows(samples) and 21 columns(metadata fields).
## Warning in create_se("sample_sheets/macrogen_samples.xlsx", file_column = "hisat_hg38", : Some samples
## were removed when cross referencing the samples against the count data.
## Matched 21405 annotations and counts.
## Some annotations were lost in merging, setting them to 'undefined'.
## The final summarized experiment has 21481 rows and 21 columns.
## The numbers of samples by condition are:
##
## Asymptomatic Chronic control Healthy
## 5 5 3 2
hg38_norm <- normalize(hg38_sampletype, convert = "cpm", norm = "quant",
transform = "log2", filter = TRUE)## Removing 9903 low-count genes (11578 remaining).
## transform_counts: Found 26 values equal to 0, adding 1 to the matrix.
## Library sizes of 15 samples,
## ranging from 1,872,219 to 7,805,747.
## The following samples have less than 13962.65 genes.
## [1] "m1" "m2" "a_20179" "c_10036" "a_20187" "c_10046" "a_20132" "c_10063" "a_20133" "c_10093"
## [11] "su1160" "a_20134" "c_10073"
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the hpgltools package.
## Please report the issue to the authors.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## A non-zero genes plot of 15 samples.
## These samples have an average 3.382 CPM coverage and 13550 genes observed, ranging from 12975 to
## 14728.
## A heatmap of pairwise sample correlations ranging from:
## 0.932974671519701 to 0.990409066934894.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by Asymptomatic, Chronic, control, Healthy
## Shapes are defined by undefined.
hg38_nb <- normalize(hg38_sampletype, convert = "cpm", batch = "svaseq",
filter = TRUE, transform = "log2")## Removing 9903 low-count genes (11578 remaining).
## transform_counts: Found 169 values less than 0.
## transform_counts: Found 169 values equal to 0, adding 1 to the matrix.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by Asymptomatic, Chronic, control, Healthy
## Shapes are defined by undefined.
keepers <- list(
"chr_asy" = c("Chronic", "Asymptomatic"),
"chr_hea" = c("Chronic", "Healthy"),
"chr_con" = c("Chronic", "control"),
"asy_hea" = c("Asymptomatic", "Healthy"),
"asy_con" = c("Asymptomatic", "control"),
"hea_con" = c("Healthy", "control"))
hg38_de <- all_pairwise(hg38_sampletype, keepers = keepers, model_fstring = "~ 0 + condition",
model_svs = "svaseq", filter = TRUE)## Asymptomatic Chronic control Healthy
## 5 5 3 2
## Removing 9903 low-count genes (11578 remaining).
## Basic step 0/3: Normalizing data.
## Basic step 0/3: Converting data.
## I think this is failing? SummarizedExperiment
## Basic step 0/3: Transforming data.
## Setting 4687 entries to zero.
## This received a matrix of SVs.
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## conditions
## Asymptomatic Chronic control Healthy
## 5 5 3 2
## conditions
## Asymptomatic Chronic control Healthy
## 5 5 3 2
## conditions
## Asymptomatic Chronic control Healthy
## 5 5 3 2
## A pairwise differential expression with results from: basic, deseq, ebseq, edger, limma, noiseq.
## This used a surrogate/batch estimate from: svaseq.
## The primary analysis performed 6 comparisons.
## Deleting the file excel/hg38_tables.xlsx before writing the tables.
## Looking for subscript invalid names, start of extract_keepers.
## Looking for subscript invalid names, end of extract_keepers.
## A set of combined differential expression results.
## table deseq_sigup deseq_sigdown edger_sigup edger_sigdown limma_sigup limma_sigdown
## 1 Chronic_vs_Asymptomatic 31 41 43 73 46 28
## 2 Chronic_vs_Healthy 59 43 98 77 34 46
## 3 Chronic_vs_control 680 458 748 551 611 554
## 4 Asymptomatic_vs_Healthy 112 83 183 127 92 113
## 5 Asymptomatic_vs_control 759 492 837 575 646 626
## 6 Healthy_vs_control 427 311 488 421 414 392
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the UpSetR package.
## Please report the issue to the authors.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the UpSetR package.
## Please report the issue to the authors.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Plot describing unique/shared genes in a differential expression table.
hg38_sig <- extract_significant_genes(hg38_tables, excel = "excel/hg38_sig.xlsx",
according_to = "deseq")## Deleting the file excel/hg38_sig.xlsx before writing the tables.
## A set of genes deemed significant according to deseq.
## The parameters defining significant were:
## LFC cutoff: 1 adj P cutoff: 0.05
## deseq_up deseq_down
## Chronic_vs_Asymptomatic 31 41
## Chronic_vs_Healthy 59 43
## Chronic_vs_control 680 458
## Asymptomatic_vs_Healthy 112 83
## Asymptomatic_vs_control 759 492
## Healthy_vs_control 427 311
## Warning in simple_clusterprofiler(sig_genes = structure(list(ensembl_transcript_id = c("ENST00000492807",
## : No genes were found between the significant genes and the universe.
## Error in testForValidKeytype(x, keytype) :
## 'keytype' must be a a single string
## Error in `simple_cl[["kegg_universe"]]`:
## ! subscript out of bounds