samplesheet <- "sample_sheets/tmrc3_samples_20210610.xlsx"
We take the annotation data from ensembl’s biomart instance. The genome which was used to map the data was hg38 revision 100. My default when using biomart is to load the data from 1 year before the current date.
hs_annot <- sm(load_biomart_annotations(year = "2020"))
hs_annot <- hs_annot[["annotation"]]
hs_annot[["transcript"]] <- paste0(rownames(hs_annot), ".", hs_annot[["version"]])
rownames(hs_annot) <- make.names(hs_annot[["ensembl_gene_id"]], unique = TRUE)
tx_gene_map <- hs_annot[, c("transcript", "ensembl_gene_id")]
summary(hs_annot)
## ensembl_transcript_id ensembl_gene_id version transcript_version
## Length:227921 Length:227921 Min. : 1.0 Min. : 1.00
## Class :character Class :character 1st Qu.: 6.0 1st Qu.: 1.00
## Mode :character Mode :character Median :12.0 Median : 1.00
## Mean :10.7 Mean : 3.08
## 3rd Qu.:16.0 3rd Qu.: 5.00
## Max. :29.0 Max. :17.00
##
## hgnc_symbol description gene_biotype cds_length
## Length:227921 Length:227921 Length:227921 Min. : 3
## Class :character Class :character Class :character 1st Qu.: 357
## Mode :character Mode :character Mode :character Median : 694
## Mean : 1139
## 3rd Qu.: 1446
## Max. :107976
## NA's :127343
## chromosome_name strand start_position end_position
## Length:227921 Length:227921 Min. :5.77e+02 Min. :6.47e+02
## Class :character Class :character 1st Qu.:3.11e+07 1st Qu.:3.12e+07
## Mode :character Mode :character Median :6.04e+07 Median :6.06e+07
## Mean :7.41e+07 Mean :7.42e+07
## 3rd Qu.:1.09e+08 3rd Qu.:1.09e+08
## Max. :2.49e+08 Max. :2.49e+08
##
## transcript
## Length:227921
## Class :character
## Mode :character
##
##
##
##
hs_go <- sm(load_biomart_go()[["go"]])
hs_length <- hs_annot[, c("ensembl_gene_id", "cds_length")]
colnames(hs_length) <- c("ID", "length")
This document is intended to provide an overview of TMRC3 samples which have been sequenced. It includes some plots and analyses showing the relationships among the samples as well as some differential analyses when possible.
The sample sheet is copied from our shared online sheet and updated with each release of sequencing data.
The first thing to note is the large range in coverage. There are multiple samples with coverage which is too low to use. These will be removed shortly.
In the following block I immediately exclude any non-coding reads as well.
## Create the expressionset and immediately pass it to a filter
## removing the non protein coding genes.
sanitize_columns <- c("visitnumber", "clinicaloutcome", "donor",
"typeofcells", "clinicalpresentation",
"condition", "batch")
hs_expt <- create_expt(samplesheet,
file_column = "hg38100hisatfile",
savefile = glue::glue("rda/hs_expt_all-v{ver}.rda"),
gene_info = hs_annot) %>%
exclude_genes_expt(column = "gene_biotype", method = "keep",
pattern = "protein_coding", meta_column = "ncrna_lost") %>%
sanitize_expt_metadata(columns = sanitize_columns) %>%
set_expt_factors(columns = sanitize_columns, class = "factor")
## Reading the sample metadata.
## Dropped 71 rows from the sample metadata because they were blank.
## The sample definitions comprises: 173 rows(samples) and 75 columns(metadata fields).
## Warning in create_expt(samplesheet, file_column = "hg38100hisatfile", savefile =
## glue::glue("rda/hs_expt_all-v{ver}.rda"), : Some samples were removed when cross
## referencing the samples against the count data.
## Matched 21452 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## The final expressionset has 21481 rows and 155 columns.
## Before removal, there were 21481 genes, now there are 19941.
## There are 17 samples which kept less than 90 percent counts.
## TMRC30015 TMRC30017 TMRC30019 TMRC30044 TMRC30045 TMRC30154 TMRC30097 TMRC30075
## 79.24 85.72 89.75 80.34 73.33 83.20 89.90 86.97
## TMRC30087 TMRC30101 TMRC30104 TMRC30114 TMRC30127 TMRC30120 TMRC30128 TMRC30131
## 83.63 88.41 80.29 87.62 89.49 79.16 82.53 86.82
## TMRC30073
## 89.26
levels(pData(hs_expt[["expressionset"]])[["visitnumber"]]) <- list(
'0' = "notapplicable", '1' = 1, '2' = 2, '3' = 3)
Split this data into CDS and lncRNA. Oh crap in order to do that I need to recount the data. Running now (20210518)
## lnc_expt <- create_expt(samplesheet,
## file_column = "hg38100lncfile",
## gene_info = hs_annot)
Once the data was loaded, there are a couple of metrics which may be plotted immediately.
nonzero <- plot_nonzero(hs_expt)
nonzero$plot
## Warning: ggrepel: 127 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ncrna_lost_df <- as.data.frame(pData(hs_expt)[["ncrna_lost"]])
rownames(ncrna_lost_df) <- rownames(pData(hs_expt))
colnames(ncrna_lost_df) <- "ncrna_lost"
tmpdf <- merge(nonzero$table, ncrna_lost_df, by = "row.names")
rownames(tmpdf) <- tmpdf[["Row.names"]]
tmpdf[["Row.names"]] <- NULL
ggplot(tmpdf, aes(x=ncrna_lost, y=nonzero_genes)) +
ggplot2::geom_point() +
ggplot2::ggtitle("Nonzero genes with respect to percent counts
lost when ncRNA was removed.")
Najib doesn’t want this plot, but I am using it to check new samples, so will hide it from general use.
libsize <- plot_libsize(hs_expt)
libsize$plot
I arbitrarily chose 11,000 non-zero genes as a minimum. We may want this to be higher.
hs_valid <- subset_expt(hs_expt, nonzero = 11000)
## The samples (and read coverage) removed when filtering 11000 non-zero genes are:
## TMRC30010 TMRC30050 TMRC30052
## 52471 808149 3087347
## subset_expt(): There were 155, now there are 152 samples.
valid_write <- sm(write_expt(hs_valid, excel = glue("excel/hs_valid-v{ver}.xlsx")))
The project seeks to determine the relationship of the innate immune response and inflammatory signaling to the clinical outcome of antileishmanial drug treatment. We will test the hypothesis that the profile of innate immune cell activation and their dynamics through the course of treatment differ between CL patients with prospectively determined therapeutic cure or failure.
This will be achieved through the characterization of the in vivo dynamics of blood-derived monocyte, neutrophil and eosinophil transcriptome before, during and at the end of treatment in CL patients. Cell-type specific transcriptomes, composite signatures and time-response expression profiles will be contrasted among patients with therapeutic cure or failure.
To address these, I added to the end of the sample sheet columns named ‘condition’, ‘batch’, ‘donor’, and ‘time’. These are filled in with shorthand values according to the above.
Before addressing the questions explicitly by subsetting the data, I want to get a look at the samples as they are.
new_names <- pData(hs_valid)[["samplename"]]
hs_valid <- hs_valid %>%
set_expt_batches(fact = "cellssource") %>%
set_expt_conditions(fact = "typeofcells") %>%
set_expt_samplenames(newnames = new_names)
all_norm <- sm(normalize_expt(hs_valid, transform = "log2", norm = "quant",
convert = "cpm", filter = TRUE))
all_pca <- plot_pca(all_norm, plot_labels = FALSE,
plot_title = "PCA - Cell type", size_column = "visitnumber")
pp(file = glue("images/tmrc3_pca_nolabels-v{ver}.png"), image = all_pca$plot)
write.csv(all_pca$table, file = "coords/hs_donor_pca_coords.csv")
plot_corheat(all_norm, plot_title = "Heirarchical clustering:
cell types")$plot
Now let us consider only the samples for which we have a clinical outcome. These fall primarily into either ‘cured’ or ‘failed’, but some people have not yet returned to the clinic after the first or second visit. These are deemed ‘lost’.
hs_clinical <- hs_valid %>%
set_expt_conditions(fact = "clinicaloutcome") %>%
set_expt_batches(fact = "typeofcells") %>%
subset_expt(subset = "typeofcells!='pbmcs'&typeofcells!='macrophages'")
## subset_expt(): There were 152, now there are 132 samples.
chosen_colors <- c("#D95F02", "#7570B3", "#1B9E77", "#FF0000", "#FF0000")
names(chosen_colors) <- c("cure", "failure", "lost", "null", "notapplicable")
hs_clinical <- set_expt_colors(hs_clinical, colors = chosen_colors)
newnames <- make.names(pData(hs_clinical)[["samplename"]], unique = TRUE)
hs_clinical <- set_expt_samplenames(hs_clinical, newnames = newnames)
## Error in names(new_expt[["colors"]]) <- newnames: 'names' attribute [132] must be the same length as the vector [131]
hs_clinical_norm <- sm(normalize_expt(hs_clinical, filter = TRUE, transform = "log2",
convert = "cpm", norm = "quant"))
clinical_pca <- plot_pca(hs_clinical_norm, plot_labels = FALSE,
size_column = "visitnumber", cis = NULL,
plot_title = "PCA - clinical samples")
## Error in data.frame(sampleid = as.character(design[["sampleid"]]), condition = design[[cond_column]], : arguments imply differing number of rows: 132, 131
pp(file = glue("images/all_clinical_nobatch_pca-v{ver}.png"), image = clinical_pca$plot,
height = 8, width = 20)
## Error in pp(file = glue("images/all_clinical_nobatch_pca-v{ver}.png"), : object 'clinical_pca' not found
hs_clinical_nobiop <- hs_clinical %>%
subset_expt(subset = "typeofcells!='biopsy'")
## subset_expt(): There were 132, now there are 83 samples.
hs_clinical_nobiop_norm <- sm(normalize_expt(hs_clinical_nobiop, filter = TRUE, transform = "log2",
convert = "cpm", norm = "quant"))
clinical_nobiop_pca <- plot_pca(hs_clinical_nobiop_norm, plot_labels = FALSE, cis = NULL,
plot_title = "PCA - clinical samples without biopsies")
## Potentially check over the experimental design, there appear to be missing values.
## Warning in plot_pca(hs_clinical_nobiop_norm, plot_labels = FALSE, cis = NULL, :
## There are NA values in the component data. This can lead to weird plotting
## errors.
pp(file = glue("images/all_clinical_nobiop_nobatch_pca-v{ver}.png"),
image = clinical_nobiop_pca$plot)
At this time we have two primary data structures of interest: hs_clinical and hs_clinical_nobiop
hs_clinical_nb <- normalize_expt(hs_clinical, filter = TRUE, batch = "svaseq",
transform = "log2", convert = "cpm")
## Removing 5271 low-count genes (14670 remaining).
## batch_counts: Before batch/surrogate estimation, 122582 entries are x==0: 6%.
## batch_counts: Before batch/surrogate estimation, 355614 entries are 0<x<1: 18%.
## Setting 23843 low elements to zero.
## transform_counts: Found 23843 values equal to 0, adding 1 to the matrix.
clinical_batch_pca <- plot_pca(hs_clinical_nb, plot_labels = FALSE, cis = NULL,
size_column = "visitnumber", plot_title = "PCA - clinical samples")
## Error in data.frame(sampleid = as.character(design[["sampleid"]]), condition = design[[cond_column]], : arguments imply differing number of rows: 132, 131
clinical_batch_pca$plot
## Error in eval(expr, envir, enclos): object 'clinical_batch_pca' not found
hs_clinical_nobiop_nb <- sm(normalize_expt(hs_clinical_nobiop, filter = TRUE, batch = "svaseq",
transform = "log2", convert = "cpm"))
clinical_nobiop_batch_pca <- plot_pca(hs_clinical_nobiop_nb,
plot_title = "PCA - clinical samples without biopsies",
plot_labels = FALSE)
## Potentially check over the experimental design, there appear to be missing values.
## Warning in plot_pca(hs_clinical_nobiop_nb, plot_title = "PCA - clinical samples
## without biopsies", : There are NA values in the component data. This can lead to
## weird plotting errors.
pp(file = "images/clinical_batch.png", image = clinical_nobiop_batch_pca$plot)
test <- plot_pca(hs_clinical_nobiop_nb, size_column = "visitnumber",
plot_title = "PCA - clinical samples without biopsies",
plot_labels = FALSE)
## Potentially check over the experimental design, there appear to be missing values.
## Warning in plot_pca(hs_clinical_nobiop_nb, size_column = "visitnumber", : There
## are NA values in the component data. This can lead to weird plotting errors.
test$plot
clinical_nobiop_batch_tsne <- plot_tsne(hs_clinical_nobiop_nb,
plot_title = "tSNE - clinical samples without biopsies",
plot_labels = FALSE)
## Potentially check over the experimental design, there appear to be missing values.
## Warning in plot_pca(..., pc_method = "tsne"): There are NA values in the
## component data. This can lead to weird plotting errors.
clinical_nobiop_batch_tsne$plot
test <- simple_varpart(hs_clinical_nobiop)
##
## Total:111 s
test$partition_plot
individual_celltypes <- subset_expt(hs_clinical_nobiop, subset="condition!='lost'")
## subset_expt(): There were 83, now there are 68 samples.
hs_clinic_de <- sm(all_pairwise(individual_celltypes, model_batch = "svaseq", filter = TRUE))
hs_clinic_table <- sm(combine_de_tables(
hs_clinic_de,
excel = glue::glue("excel/individual_celltypes_table-v{ver}.xlsx")))
hs_clinic_sig <- sm(extract_significant_genes(
hs_clinic_table,
excel = glue::glue("excel/individual_celltypes_sig-v{ver}.xlsx")))
hs_clinic_sig[["summary_df"]]
## limma_V1 limma_V2 edger_V1 edger_V2 deseq_V1 deseq_V2 ebseq_V1 ebseq_V2
## 1 183 181 263 213 249 232 96 166
## basic_V1 basic_V2
## 1 46 26
hs_clinic_de[["comparison"]][["heat"]]
## NULL
I am not sure if we have enough samples across the three visit to completely work as well as we would like, but there is only 1 way to find out! Now that I think about it, one thing which might be awesome is to use cell type as an interacting factor…
I figure this might be a place where the biopsy samples might prove useful.
clinical_nolost <- subset_expt(hs_clinical, subset="condition!='lost'")
## subset_expt(): There were 132, now there are 115 samples.
lrt_visit_clinical_test <- deseq_lrt(clinical_nolost, transform = "vst",
interactor_column = "visitnumber",
interest_column = "clinicaloutcome")
## converting counts to integer mode
## Error in checkFullRank(modelMatrix): the model matrix is not full rank, so the model cannot be fit as specified.
## Levels or combinations of levels without any samples have resulted in
## column(s) of zeros in the model matrix.
##
## Please read the vignette section 'Model matrix not full rank':
##
## vignette('DESeq2')
lrt_visit_clinical_test[["favorite_genes"]]
## Error in eval(expr, envir, enclos): object 'lrt_visit_clinical_test' not found
lrt_celltype_clinical_test <- deseq_lrt(clinical_nolost, transform = "vst",
interactor_column = "typeofcells",
interest_column = "clinicaloutcome")
## converting counts to integer mode
## Error in checkFullRank(modelMatrix): the model matrix is not full rank, so the model cannot be fit as specified.
## Levels or combinations of levels without any samples have resulted in
## column(s) of zeros in the model matrix.
##
## Please read the vignette section 'Model matrix not full rank':
##
## vignette('DESeq2')
lrt_celltype_clinical_test[["favorite_genes"]]
## Error in eval(expr, envir, enclos): object 'lrt_celltype_clinical_test' not found
A good suggestion from Theresa was to examine only the most variant genes from failure vs. cure and see how they change the clustering/etc results. This is my attempt to address this query.
hs_clinic_topn <- sm(extract_significant_genes(hs_clinic_table, n = 100))
table <- "failure_vs_cure"
wanted <- rbind(hs_clinic_topn[["deseq"]][["ups"]][[table]],
hs_clinic_topn[["deseq"]][["downs"]][[table]])
small_expt <- exclude_genes_expt(hs_clinical_nobiop, ids = rownames(wanted), method = "keep")
## Before removal, there were 19941 genes, now there are 200.
## There are 83 samples which kept less than 90 percent counts.
## 1017n1 1017m1 1034n1 1034n2 1034m2 1034m2- 2052e1 2052m2 2052n2 2052m3
## 0.4344 0.4209 2.3933 2.8302 1.3082 1.2597 0.6909 0.6058 1.3510 0.6899
## 2052n3 2065m1 2065n1 2066m1 2066n1 2065m2 2065n2 2065e2 2066m2 2066n2
## 1.1861 1.1063 3.1233 0.4222 0.7710 0.6235 0.8864 1.3413 0.5375 0.9750
## 2066e2 2068m1 2068n1 2068e1 2072m1 2072n1 2072e1 2071m1 2071n1 2073m1
## 1.1821 0.5670 0.8161 1.3970 0.5170 0.6085 0.8754 0.7574 1.7531 0.6883
## 2073n1 2073e1 2068m2 2068n2 2068e2 2072m2 2072n2 2072e2 2073m2 2073n2
## 1.8493 0.9073 0.3942 0.5760 0.8514 0.4248 0.5737 0.6222 1.0724 2.7964
## 2073e2 2066m3 2066n3 2066e3 2065e3 2068m3 2068n3 2068e3 2072m3 2072n3
## 0.8723 0.4794 0.6782 0.6991 0.6026 0.4552 0.7345 0.8428 0.7395 1.9132
## 2072e3 2073m3 2073n3 2073e3 2162m1 2162n1 2162e1 2162n2 2162e2 2162n3
## 0.8600 0.5342 0.7511 0.6015 0.4937 0.6971 1.0733 0.9258 1.1809 0.7148
## 2162e3 2167m1 2167n1 2167e1 2168m1 2168n1 2168e1 2168m2 2168n2 2168e2
## 0.7263 0.6428 0.9420 1.3436 1.0901 2.2668 1.1321 0.9822 2.2914 0.9692
## 2167m2 2167n3 2167e3 2168m3 2168n3 2168e3 2172n1 2172e1 1168m1 1168n1
## 0.4135 1.4135 1.4795 1.3873 3.6918 1.1862 0.3999 0.5349 0.7780 1.3948
## 1168m2 1168e2 1168n3
## 0.7665 0.5925 0.9169
small_norm <- sm(normalize_expt(small_expt, transform = "log2", convert = "cpm",
norm = "quant", filter = TRUE))
plot_pca(small_norm)$plot
## Potentially check over the experimental design, there appear to be missing values.
## Warning in plot_pca(small_norm): There are NA values in the component data. This
## can lead to weird plotting errors.
## Warning: ggrepel: 17 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
small_nb <- normalize_expt(small_expt, transform = "log2", convert = "cpm",
batch = "svaseq", norm = "quant", filter = TRUE)
## Warning in normalize_expt(small_expt, transform = "log2", convert = "cpm", :
## Quantile normalization and sva do not always play well together.
## Removing 0 low-count genes (200 remaining).
## batch_counts: Before batch/surrogate estimation, 1 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 894 entries are 0<x<1: 5%.
## Setting 801 low elements to zero.
## transform_counts: Found 801 values equal to 0, adding 1 to the matrix.
plot_pca(small_nb)$plot
## Potentially check over the experimental design, there appear to be missing values.
## Warning in plot_pca(small_nb): There are NA values in the component data. This
## can lead to weird plotting errors.
## Warning: ggrepel: 79 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## DESeq2 MA plot of failure / cure
hs_clinic_table[["plots"]][["failure_vs_cure"]][["deseq_ma_plots"]]$plot
## DESeq2 Volcano plot of failure / cure
hs_clinic_table[["plots"]][["failure_vs_cure"]][["deseq_vol_plots"]]$plot
ups <- hs_clinic_sig[["deseq"]][["ups"]][[1]]
downs <- hs_clinic_sig[["deseq"]][["downs"]][[1]]
hs_clinic_gprofiler_ups <- simple_gprofiler(ups)
## Performing gProfiler GO search of 249 genes against hsapiens.
## GO search found 106 hits.
## Performing gProfiler KEGG search of 249 genes against hsapiens.
## KEGG search found 10 hits.
## Performing gProfiler REAC search of 249 genes against hsapiens.
## REAC search found 10 hits.
## Performing gProfiler MI search of 249 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 249 genes against hsapiens.
## TF search found 55 hits.
## Performing gProfiler CORUM search of 249 genes against hsapiens.
## CORUM search found 1 hits.
## Performing gProfiler HP search of 249 genes against hsapiens.
## HP search found 0 hits.
hs_clinic_gprofiler_ups[["pvalue_plots"]][["bpp_plot_over"]]
hs_clinic_gprofiler_ups[["pvalue_plots"]][["mfp_plot_over"]]
hs_clinic_gprofiler_ups[["pvalue_plots"]][["reactome_plot_over"]]
##hs_try2 <- simple_gprofiler2(ups)
hs_clinic_gprofiler_downs <- simple_gprofiler(downs)
## Performing gProfiler GO search of 232 genes against hsapiens.
## GO search found 58 hits.
## Performing gProfiler KEGG search of 232 genes against hsapiens.
## KEGG search found 8 hits.
## Performing gProfiler REAC search of 232 genes against hsapiens.
## REAC search found 5 hits.
## Performing gProfiler MI search of 232 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 232 genes against hsapiens.
## TF search found 11 hits.
## Performing gProfiler CORUM search of 232 genes against hsapiens.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 232 genes against hsapiens.
## HP search found 2 hits.
hs_clinic_gprofiler_downs[["pvalue_plots"]][["bpp_plot_over"]]
hs_clinic_gprofiler_downs[["pvalue_plots"]][["mfp_plot_over"]]
hs_clinic_gprofiler_downs[["pvalue_plots"]][["reactome_plot_over"]]
hs_celltype_gsva_c2 <- sm(simple_gsva(individual_celltypes))
hs_celltype_gsva_c2_sig <- sm(get_sig_gsva_categories(
hs_celltype_gsva_c2,
excel = "excel/individual_celltypes_gsva_c2.xlsx"))
broad_c7 <- GSEABase::getGmt("reference/msigdb/c7.all.v7.2.entrez.gmt",
collectionType = GSEABase::BroadCollection(category = "c7"),
geneIdType = GSEABase::EntrezIdentifier())
hs_celltype_gsva_c7 <- sm(simple_gsva(individual_celltypes, signatures = broad_c7,
msig_xml = "reference/msigdb_v7.2.xml", cores = 10))
hs_celltype_gsva_c7_sig <- sm(get_sig_gsva_categories(
hs_celltype_gsva_c7,
excel = "excel/individual_celltypes_gsva_c7.xlsx"))
## The raw heatmap of the C2 values
hs_celltype_gsva_c2_sig[["raw_plot"]]
## The 'significance' scores of the C2 values
hs_celltype_gsva_c2_sig[["score_plot"]]
## The subset of scores for categories deemed significantly different.
hs_celltype_gsva_c2_sig[["subset_plot"]]
## The raw heatmap of the C7 values
hs_celltype_gsva_c7_sig[["raw_plot"]]
## The 'significance' scores of the C7 values
hs_celltype_gsva_c7_sig[["score_plot"]]
## The subset of scores for categories deemed significantly different.
hs_celltype_gsva_c7_sig[["subset_plot"]]
The following blocks split the samples into a few groups by sample type and look at the distributions between them.
Get top/bottom n genes for each cell type, using clinical outcome as the factor of interest. For the moment, use sva for the DE analysis. Provide cpms for the top/bottom n genes.
Start with top/bottom 200. Perform default logFC and p-value as well.
mono <- subset_expt(hs_valid, subset = "typeofcells=='monocytes'") %>%
set_expt_conditions(fact = "clinicaloutcome") %>%
set_expt_batches(fact = "donor") %>%
set_expt_colors(colors = chosen_colors)
## subset_expt(): There were 152, now there are 28 samples.
## FIXME set_expt_colors should speak up if there are mismatches here!!!
save_result <- save(mono, file = "rda/monocyte_expt.rda")
mono_norm <- normalize_expt(mono, convert = "cpm", filter = TRUE,
transform = "log2", norm = "quant")
## Removing 8906 low-count genes (11035 remaining).
## transform_counts: Found 9 values equal to 0, adding 1 to the matrix.
plt <- plot_pca(mono_norm, plot_labels = FALSE)$plot
pp(file = glue("images/mono_pca_normalized-v{ver}.pdf"), image = plt)
mono_nb <- normalize_expt(mono, convert = "cpm", filter = TRUE,
transform = "log2", batch = "svaseq")
## Removing 8906 low-count genes (11035 remaining).
## batch_counts: Before batch/surrogate estimation, 1433 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 18423 entries are 0<x<1: 6%.
## Setting 512 low elements to zero.
## transform_counts: Found 512 values equal to 0, adding 1 to the matrix.
plt <- plot_pca(mono_nb, plot_labels = FALSE)$plot
pp(file = glue("images/mono_pca_normalized_batch-v{ver}.pdf"), image = plt)
mono_de <- sm(all_pairwise(mono, model_batch = FALSE, filter = TRUE))
mono_tables <- sm(combine_de_tables(
mono_de, keepers = keepers,
excel = glue::glue("excel/monocyte_clinical_all_tables-v{ver}.xlsx")))
written <- write_xlsx(data = mono_tables[["data"]][[1]],
excel = glue::glue("excel/monocyte_clinical_table-v{ver}.xlsx"))
mono_sig <- sm(extract_significant_genes(mono_tables, according_to = "deseq"))
written <- write_xlsx(data = mono_sig[["deseq"]][["ups"]][[1]],
excel = glue::glue("excel/monocyte_clinical_sigup-v{ver}.xlsx"))
written <- write_xlsx(data = mono_sig[["deseq"]][["downs"]][[1]],
excel = glue::glue("excel/monocyte_clinical_sigdown-v{ver}.xlsx"))
mono_pct_sig <- sm(extract_significant_genes(mono_tables, n = 200,
lfc = NULL, p = NULL, according_to = "deseq"))
written <- write_xlsx(data = mono_pct_sig[["deseq"]][["ups"]][[1]],
excel = glue::glue("excel/monocyte_clinical_sigup_pct-v{ver}.xlsx"))
written <- write_xlsx(data = mono_pct_sig[["deseq"]][["downs"]][[1]],
excel = glue::glue("excel/monocyte_clinical_sigdown_pct-v{ver}.xlsx"))
mono_sig$summary_df
## data frame with 0 columns and 1 row
## Print out a table of the cpm values for other explorations.
mono_cpm <- sm(normalize_expt(mono, convert = "cpm"))
written <- write_xlsx(data = exprs(mono_cpm),
excel = glue::glue("excel/monocyte_cpm_before_batch-v{ver}.xlsx"))
mono_bcpm <- sm(normalize_expt(mono, filter = TRUE, convert = "cpm", batch = "svaseq"))
written <- write_xlsx(data = exprs(mono_bcpm),
excel = glue::glue("excel/monocyte_cpm_after_batch-v{ver}.xlsx"))
mono_de_sva <- sm(all_pairwise(mono, model_batch = "svaseq", filter = TRUE))
mono_tables_sva <- sm(combine_de_tables(
mono_de_sva, keepers = keepers,
excel = glue::glue("excel/monocyte_clinical_all_tables_sva-v{ver}.xlsx")))
mono_sig_sva <- sm(extract_significant_genes(
mono_tables_sva,
excel = glue::glue("excel/monocyte_clinical_sig_tables_sva-v{ver}.xlsx"),
according_to = "deseq"))
First print out the DE plots without and then with sva estimates.
## DESeq2 MA plot of failure / cure
mono_tables[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot
## DESeq2 Volcano plot of failure / cure
mono_tables[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot
## DESeq2 MA plot of failure / cure with svaseq
mono_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot
## DESeq2 Volcano plot of failure / cure with svaseq
mono_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot
ups <- mono_sig[["deseq"]][["ups"]][["fail_vs_cure"]]
downs <- mono_sig[["deseq"]][["downs"]][["fail_vs_cure"]]
mono_up_gp <- simple_gprofiler(sig_genes = ups)
## Performing gProfiler GO search of 126 genes against hsapiens.
## GO search found 32 hits.
## Performing gProfiler KEGG search of 126 genes against hsapiens.
## KEGG search found 6 hits.
## Performing gProfiler REAC search of 126 genes against hsapiens.
## REAC search found 3 hits.
## Performing gProfiler MI search of 126 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 126 genes against hsapiens.
## TF search found 17 hits.
## Performing gProfiler CORUM search of 126 genes against hsapiens.
## CORUM search found 1 hits.
## Performing gProfiler HP search of 126 genes against hsapiens.
## HP search found 20 hits.
mono_up_gp[["pvalue_plots"]][["bpp_plot_over"]]
mono_up_gp[["pvalue_plots"]][["mfp_plot_over"]]
mono_up_gp[["pvalue_plots"]][["reactome_plot_over"]]
mono_down_gp <- simple_gprofiler(sig_genes = downs)
## Performing gProfiler GO search of 281 genes against hsapiens.
## GO search found 98 hits.
## Performing gProfiler KEGG search of 281 genes against hsapiens.
## KEGG search found 1 hits.
## Performing gProfiler REAC search of 281 genes against hsapiens.
## REAC search found 8 hits.
## Performing gProfiler MI search of 281 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 281 genes against hsapiens.
## TF search found 126 hits.
## Performing gProfiler CORUM search of 281 genes against hsapiens.
## CORUM search found 2 hits.
## Performing gProfiler HP search of 281 genes against hsapiens.
## HP search found 1 hits.
mono_down_gp[["pvalue_plots"]][["bpp_plot_over"]]
mono_down_gp[["pvalue_plots"]][["mfp_plot_over"]]
mono_down_gp[["pvalue_plots"]][["reactome_plot_over"]]
broad_c7 <- GSEABase::getGmt("reference/msigdb/c7.all.v7.2.entrez.gmt",
collectionType = GSEABase::BroadCollection(category = "c7"),
geneIdType = GSEABase::EntrezIdentifier())
mono_up_goseq_msig <- goseq_msigdb(sig_genes = ups, signatures = broad_c7,
signature_category = "c7", length_db = hs_length)
## Before conversion: 126, after conversion: 127.
## Before conversion: 227921, after conversion: 35341.
## Found 122 go_db genes and 127 length_db genes out of 127.
mono_down_goseq_msig <- goseq_msigdb(sig_genes = downs, signatures = broad_c7,
signature_category = "c7", length_db = hs_length)
## Before conversion: 281, after conversion: 280.
## Before conversion: 227921, after conversion: 35341.
## Found 269 go_db genes and 280 length_db genes out of 280.
## Monocyte genes with increased expression in the failed samples
## share genes with the following experiments
mono_up_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]
## Monocyte genes with increased expression in the cured samples
## share genes with the following experiments
mono_down_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]
neut <- subset_expt(hs_valid, subset = "typeofcells=='neutrophils'") %>%
set_expt_conditions(fact = "clinicaloutcome") %>%
set_expt_batches(fact = "donor") %>%
set_expt_colors(colors = chosen_colors)
## subset_expt(): There were 152, now there are 31 samples.
save_result <- save(neut, file = "rda/neutrophil_expt.rda")
neut_norm <- sm(normalize_expt(neut, convert = "cpm", filter = TRUE, transform = "log2"))
plt <- plot_pca(neut_norm, plot_labels = FALSE)$plot
pp(file = glue("images/neut_pca_normalized-v{ver}.pdf"), image = plt)
neut_nb <- sm(normalize_expt(neut, convert = "cpm", filter = TRUE,
transform = "log2", batch = "svaseq"))
plt <- plot_pca(neut_nb, plot_labels = FALSE)$plot
pp(file = glue("images/neut_pca_normalized_svaseq-v{ver}.pdf"), image = plt)
neut_de <- sm(all_pairwise(neut, model_batch = FALSE, filter = TRUE))
neut_tables <- sm(combine_de_tables(
neut_de, keepers = keepers,
excel = glue::glue("excel/neutrophil_clinical_all_tables-v{ver}.xlsx")))
written <- write_xlsx(data = neut_tables[["data"]][[1]],
excel = glue::glue("excel/neutrophil_clinical_table-v{ver}.xlsx"))
neut_sig <- sm(extract_significant_genes(neut_tables, according_to = "deseq"))
written <- write_xlsx(data = neut_sig[["deseq"]][["ups"]][[1]],
excel = glue::glue("excel/neutrophil_clinical_sigup-v{ver}.xlsx"))
written <- write_xlsx(data = neut_sig[["deseq"]][["downs"]][[1]],
excel = glue::glue("excel/neutrophil_clinical_sigdown-v{ver}.xlsx"))
neut_pct_sig <- sm(extract_significant_genes(neut_tables, n = 200, lfc = NULL,
p = NULL, according_to = "deseq"))
written <- write_xlsx(data = neut_sig[["deseq"]][["ups"]][[1]],
excel = glue::glue("excel/neutrophil_clinical_sigup_pct-v{ver}.xlsx"))
written <- write_xlsx(data = neut_sig[["deseq"]][["downs"]][[1]],
excel = glue::glue("excel/neutrophil_clinical_sigdown_pct-v{ver}.xlsx"))
neut_cpm <- sm(normalize_expt(neut, convert = "cpm"))
written <- write_xlsx(data = exprs(neut_cpm),
excel = glue::glue("excel/neutrophil_cpm_before_batch-v{ver}.xlsx"))
neut_bcpm <- sm(normalize_expt(neut, filter = TRUE, batch = "svaseq", convert = "cpm"))
written <- write_xlsx(data = exprs(neut_bcpm),
excel = glue::glue("excel/neutrophil_cpm_after_batch-v{ver}.xlsx"))
neut_de_sva <- sm(all_pairwise(neut, model_batch = "svaseq", filter = TRUE))
neut_tables_sva <- sm(combine_de_tables(
neut_de_sva, keepers = keepers,
excel = glue::glue("excel/neutrophil_clinical_all_tables_sva-v{ver}.xlsx")))
neut_sig_sva <- sm(extract_significant_genes(
neut_tables_sva,
excel = glue::glue("excel/neutrophil_clinical_sig_tables_sva-v{ver}.xlsx"),
according_to = "deseq"))
## DESeq2 MA plot of failure / cure
neut_tables[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot
## DESeq2 Volcano plot of failure / cure
neut_tables[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot
## DESeq2 MA plot of failure / cure with sva
neut_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot
## DESeq2 Volcano plot of failure / cure with sva
neut_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot
ups <- neut_sig[["deseq"]][["ups"]][["fail_vs_cure"]]
downs <- neut_sig[["deseq"]][["downs"]][["fail_vs_cure"]]
neut_up_gp <- simple_gprofiler(sig_genes = ups)
## Performing gProfiler GO search of 179 genes against hsapiens.
## GO search found 41 hits.
## Performing gProfiler KEGG search of 179 genes against hsapiens.
## KEGG search found 3 hits.
## Performing gProfiler REAC search of 179 genes against hsapiens.
## REAC search found 5 hits.
## Performing gProfiler MI search of 179 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 179 genes against hsapiens.
## TF search found 35 hits.
## Performing gProfiler CORUM search of 179 genes against hsapiens.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 179 genes against hsapiens.
## HP search found 4 hits.
neut_up_gp[["pvalue_plots"]][["bpp_plot_over"]]
neut_up_gp[["pvalue_plots"]][["mfp_plot_over"]]
neut_up_gp[["pvalue_plots"]][["reactome_plot_over"]]
neut_down_gp <- simple_gprofiler(downs)
## Performing gProfiler GO search of 138 genes against hsapiens.
## GO search found 0 hits.
## Performing gProfiler KEGG search of 138 genes against hsapiens.
## KEGG search found 0 hits.
## Performing gProfiler REAC search of 138 genes against hsapiens.
## REAC search found 0 hits.
## Performing gProfiler MI search of 138 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 138 genes against hsapiens.
## TF search found 2 hits.
## Performing gProfiler CORUM search of 138 genes against hsapiens.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 138 genes against hsapiens.
## HP search found 0 hits.
neut_down_gp[["pvalue_plots"]][["bpp_plot_over"]]
## NULL
neut_down_gp[["pvalue_plots"]][["mfp_plot_over"]]
## NULL
neut_down_gp[["pvalue_plots"]][["reactome_plot_over"]]
## NULL
neut_up_goseq_msig <- goseq_msigdb(sig_genes = ups, signatures = broad_c7,
signature_category = "c7", length_db = hs_length)
## Before conversion: 179, after conversion: 178.
## Before conversion: 227921, after conversion: 35341.
## Found 171 go_db genes and 178 length_db genes out of 178.
neut_down_goseq_msig <- goseq_msigdb(sig_genes = downs, signatures = broad_c7,
signature_category = "c7", length_db = hs_length)
## Before conversion: 138, after conversion: 133.
## Before conversion: 227921, after conversion: 35341.
## Found 128 go_db genes and 133 length_db genes out of 133.
## Neutrophil genes with increased expression in the failed samples
## share genes with the following experiments
neut_up_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]
## Neutrophil genes with increased expression in the cured samples
## share genes with the following experiments
neut_down_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]
eo <- subset_expt(hs_valid, subset = "typeofcells=='eosinophils'") %>%
set_expt_conditions(fact = "clinicaloutcome") %>%
set_expt_batches(fact = "donor") %>%
set_expt_colors(colors = chosen_colors)
## subset_expt(): There were 152, now there are 24 samples.
save_result <- save(eo, file = "rda/eosinophil_expt.rda")
eo_norm <- sm(normalize_expt(eo, convert = "cpm", transform = "log2",
norm = "quant", filter = TRUE))
plt <- plot_pca(eo_norm, plot_labels = FALSE)$plot
pp(file = glue("images/eo_pca_normalized-v{ver}.pdf"), image = plt)
eo_nb <- sm(normalize_expt(eo, convert = "cpm", transform = "log2",
filter = TRUE, batch = "svaseq"))
plot_pca(eo_nb)$plot
eo_de <- sm(all_pairwise(eo, model_batch = FALSE, filter = TRUE))
eo_tables <- sm(combine_de_tables(
eo_de, keepers = keepers,
excel = glue::glue("excel/eosinophil_clinical_all_tables-v{ver}.xlsx")))
written <- write_xlsx(data = eo_tables[["data"]][[1]],
excel = glue::glue("excel/eosinophil_clinical_table-v{ver}.xlsx"))
eo_sig <- sm(extract_significant_genes(eo_tables, according_to = "deseq"))
written <- write_xlsx(data = eo_sig[["deseq"]][["ups"]][[1]],
excel = glue::glue("excel/eosinophil_clinical_sigup-v{ver}.xlsx"))
written <- write_xlsx(data = eo_sig[["deseq"]][["downs"]][[1]],
excel = glue::glue("excel/eosinophil_clinical_sigdown-v{ver}.xlsx"))
eo_pct_sig <- sm(extract_significant_genes(eo_tables, n = 200,
lfc = NULL, p = NULL, according_to = "deseq"))
written <- write_xlsx(data = eo_pct_sig[["deseq"]][["ups"]][[1]],
excel = glue::glue("excel/eosinophil_clinical_sigup_pct-v{ver}.xlsx"))
written <- write_xlsx(data = eo_pct_sig[["deseq"]][["downs"]][[1]],
excel = glue::glue("excel/eosinophil_clinical_sigdown_pct-v{ver}.xlsx"))
eo_cpm <- sm(normalize_expt(eo, convert = "cpm"))
written <- write_xlsx(data = exprs(eo_cpm),
excel = glue::glue("excel/eosinophil_cpm_before_batch-v{ver}.xlsx"))
eo_bcpm <- sm(normalize_expt(eo, filter = TRUE, batch = "svaseq", convert = "cpm"))
written <- write_xlsx(data = exprs(eo_bcpm),
excel = glue::glue("excel/eosinophil_cpm_after_batch-v{ver}.xlsx"))
eo_de_sva <- sm(all_pairwise(eo, model_batch = "svaseq", filter = TRUE))
eo_tables_sva <- sm(combine_de_tables(
eo_de_sva, keepers = keepers,
excel = glue::glue("excel/eosinophil_clinical_all_tables_sva-v{ver}.xlsx")))
eo_sig_sva <- sm(extract_significant_genes(
eo_tables_sva,
excel = glue::glue("excel/eosinophil_clinical_sig_tables_sva-v{ver}.xlsx"),
according_to = "deseq"))
## DESeq2 MA plot of failure / cure
eo_tables[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot
## DESeq2 Volcano plot of failure / cure
eo_tables[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot
## DESeq2 MA plot of failure / cure with sva
eo_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot
## DESeq2 Volcano plot of failure / cure with sva
eo_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot
ups <- eo_sig[["deseq"]][["ups"]][["fail_vs_cure"]]
downs <- eo_sig[["deseq"]][["downs"]][["fail_vs_cure"]]
eo_up_gp <- simple_gprofiler(sig_genes = ups)
## Performing gProfiler GO search of 110 genes against hsapiens.
## GO search found 102 hits.
## Performing gProfiler KEGG search of 110 genes against hsapiens.
## KEGG search found 7 hits.
## Performing gProfiler REAC search of 110 genes against hsapiens.
## REAC search found 9 hits.
## Performing gProfiler MI search of 110 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 110 genes against hsapiens.
## TF search found 50 hits.
## Performing gProfiler CORUM search of 110 genes against hsapiens.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 110 genes against hsapiens.
## HP search found 0 hits.
eo_up_gp[["pvalue_plots"]][["bpp_plot_over"]]
eo_up_gp[["pvalue_plots"]][["mfp_plot_over"]]
eo_up_gp[["pvalue_plots"]][["reactome_plot_over"]]
eo_down_gp <- simple_gprofiler(downs)
## Performing gProfiler GO search of 38 genes against hsapiens.
## GO search found 0 hits.
## Performing gProfiler KEGG search of 38 genes against hsapiens.
## KEGG search found 0 hits.
## Performing gProfiler REAC search of 38 genes against hsapiens.
## REAC search found 0 hits.
## Performing gProfiler MI search of 38 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 38 genes against hsapiens.
## TF search found 0 hits.
## Performing gProfiler CORUM search of 38 genes against hsapiens.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 38 genes against hsapiens.
## HP search found 0 hits.
eo_down_gp[["pvalue_plots"]][["bpp_plot_over"]]
## NULL
eo_down_gp[["pvalue_plots"]][["mfp_plot_over"]]
## NULL
eo_down_gp[["pvalue_plots"]][["reactome_plot_over"]]
## NULL
eo_up_goseq_msig <- goseq_msigdb(sig_genes = ups, signatures = broad_c7,
signature_category = "c7", length_db = hs_length)
## Before conversion: 110, after conversion: 110.
## Before conversion: 227921, after conversion: 35341.
## Found 108 go_db genes and 110 length_db genes out of 110.
eo_down_goseq_msig <- goseq_msigdb(sig_genes = downs, signatures = broad_c7,
signature_category = "c7", length_db = hs_length)
## Before conversion: 38, after conversion: 37.
## Before conversion: 227921, after conversion: 35341.
## Found 33 go_db genes and 37 length_db genes out of 37.
## Eosinophil genes with increased expression in the failed samples
## share genes with the following experiments
eo_up_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]
## Eosinophil genes with increased expression in the cured samples
## share genes with the following experiments
eo_down_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]
biop <- subset_expt(hs_valid, subset = "typeofcells=='biopsy'") %>%
set_expt_conditions(fact = "clinicaloutcome") %>%
set_expt_batches(fact = "donor") %>%
set_expt_colors(colors = chosen_colors)
## subset_expt(): There were 152, now there are 49 samples.
save_result <- save(biop, file = "rda/biopsy_expt.rda")
biop_norm <- normalize_expt(biop, filter = TRUE, convert = "cpm",
transform = "log2", norm = "quant")
## Removing 5753 low-count genes (14188 remaining).
## transform_counts: Found 16 values equal to 0, adding 1 to the matrix.
plt <- plot_pca(biop_norm, plot_labels = FALSE)$plot
## Error in data.frame(sampleid = as.character(design[["sampleid"]]), condition = design[[cond_column]], : arguments imply differing number of rows: 49, 48
pp(file = glue("images/biop_pca_normalized-v{ver}.pdf"), image = plt)
biop_nb <- sm(normalize_expt(biop, convert = "cpm", filter = TRUE,
transform = "log2", batch = "svaseq"))
plt <- plot_pca(biop_nb, plot_labels = FALSE)$plot
## Error in data.frame(sampleid = as.character(design[["sampleid"]]), condition = design[[cond_column]], : arguments imply differing number of rows: 49, 48
pp(file = glue("images/biop_pca_normalized_svaseq-v{ver}.pdf"), image = plt)
biop_de <- sm(all_pairwise(biop, model_batch = FALSE, filter = TRUE))
## Error in data.frame(sampleid = as.character(design[["sampleid"]]), condition = design[[cond_column]], : arguments imply differing number of rows: 49, 48
biop_tables <- combine_de_tables(biop_de, keepers = keepers,
excel = glue::glue("excel/biopsy_clinical_all_tables-v{ver}.xlsx"))
## Deleting the file excel/biopsy_clinical_all_tables-v202106.xlsx before writing the tables.
## Error in combine_de_tables(biop_de, keepers = keepers, excel = glue::glue("excel/biopsy_clinical_all_tables-v{ver}.xlsx")): object 'biop_de' not found
written <- write_xlsx(data = biop_tables[["data"]][[1]],
excel = glue::glue("excel/biopsy_clinical_table-v{ver}.xlsx"))
## Error in write_xlsx(data = biop_tables[["data"]][[1]], excel = glue::glue("excel/biopsy_clinical_table-v{ver}.xlsx")): object 'biop_tables' not found
biop_sig <- extract_significant_genes(biop_tables, according_to = "deseq")
## Error in extract_significant_genes(biop_tables, according_to = "deseq"): object 'biop_tables' not found
##written <- write_xlsx(data = biop_sig[["deseq"]][["ups"]][[1]],
## excel = glue::glue("excel/biopsy_clinical_sigup-v{ver}.xlsx"))
written <- write_xlsx(data = biop_sig[["deseq"]][["downs"]][[1]],
excel = glue::glue("excel/biopsy_clinical_sigdown-v{ver}.xlsx"))
## Error in write_xlsx(data = biop_sig[["deseq"]][["downs"]][[1]], excel = glue::glue("excel/biopsy_clinical_sigdown-v{ver}.xlsx")): object 'biop_sig' not found
biop_pct_sig <- extract_significant_genes(biop_tables, n = 200, lfc = NULL, p = NULL, according_to = "deseq")
## Error in extract_significant_genes(biop_tables, n = 200, lfc = NULL, p = NULL, : object 'biop_tables' not found
written <- write_xlsx(data = biop_pct_sig[["deseq"]][["ups"]][[1]],
excel = glue::glue("excel/biopsy_clinical_sigup_pct-v{ver}.xlsx"))
## Error in write_xlsx(data = biop_pct_sig[["deseq"]][["ups"]][[1]], excel = glue::glue("excel/biopsy_clinical_sigup_pct-v{ver}.xlsx")): object 'biop_pct_sig' not found
written <- write_xlsx(data = biop_pct_sig[["deseq"]][["downs"]][[1]],
excel = glue::glue("excel/biopsy_clinical_sigdown_pct-v{ver}.xlsx"))
## Error in write_xlsx(data = biop_pct_sig[["deseq"]][["downs"]][[1]], excel = glue::glue("excel/biopsy_clinical_sigdown_pct-v{ver}.xlsx")): object 'biop_pct_sig' not found
biop_cpm <- sm(normalize_expt(biop, convert = "cpm"))
written <- write_xlsx(data = exprs(biop_cpm),
excel = glue::glue("excel/biopsy_cpm_before_batch-v{ver}.xlsx"))
biop_bcpm <- sm(normalize_expt(biop, filter = TRUE, batch = "svaseq", convert = "cpm"))
written <- write_xlsx(data = exprs(biop_bcpm),
excel = glue::glue("excel/biopsy_cpm_after_batch-v{ver}.xlsx"))
biop_de_sva <- sm(all_pairwise(biop, model_batch = "svaseq", filter = TRUE))
## Error in data.frame(sample = samples, factor = as.integer(as.factor(expt[["design"]][[batch_column]])), : arguments imply differing number of rows: 49, 48, 1
biop_tables_sva <- sm(combine_de_tables(
biop_de_sva, keepers = keepers,
excel = glue::glue("excel/biopsy_clinical_all_tables_sva-v{ver}.xlsx")))
## Error in combine_de_tables(biop_de_sva, keepers = keepers, excel = glue::glue("excel/biopsy_clinical_all_tables_sva-v{ver}.xlsx")): object 'biop_de_sva' not found
biop_sig_sva <- sm(extract_significant_genes(
biop_tables_sva,
excel = glue::glue("excel/biopsy_clinical_sig_tables_sva-v{ver}.xlsx"),
according_to = "deseq"))
## Error in extract_significant_genes(biop_tables_sva, excel = glue::glue("excel/biopsy_clinical_sig_tables_sva-v{ver}.xlsx"), : object 'biop_tables_sva' not found
## DESeq2 MA plot of failure / cure
biop_tables[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot
## Error in eval(expr, envir, enclos): object 'biop_tables' not found
## DESeq2 Volcano plot of failure / cure
biop_tables[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot
## Error in eval(expr, envir, enclos): object 'biop_tables' not found
## DESeq2 MA plot of failure / cure
biop_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot
## Error in eval(expr, envir, enclos): object 'biop_tables_sva' not found
## DESeq2 Volcano plot of failure / cure
biop_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot
## Error in eval(expr, envir, enclos): object 'biop_tables_sva' not found
visit_colors <- chosen_colors <- c("#D95F02", "#7570B3", "#1B9E77")
names(visit_colors) <- c(1, 2, 3)
mono_visit <- subset_expt(hs_valid, subset = "typeofcells=='monocytes'") %>%
set_expt_conditions(fact = "visitnumber") %>%
set_expt_batches(fact = "clinicaloutcome") %>%
set_expt_colors(colors = chosen_colors)
## subset_expt(): There were 152, now there are 28 samples.
mono_visit_norm <- normalize_expt(mono_visit, filter = TRUE, norm = "quant", convert = "cpm",
transform = "log2")
## Removing 8906 low-count genes (11035 remaining).
## transform_counts: Found 9 values equal to 0, adding 1 to the matrix.
mono_visit_pca <- plot_pca(mono_visit_norm)
pp(file = "images/monocyte_by_visit.png", image = mono_visit_pca$plot)
mono_visit_nb <- normalize_expt(mono_visit, filter = TRUE, convert = "cpm",
batch = "svaseq", transform = "log2")
## Removing 8906 low-count genes (11035 remaining).
## batch_counts: Before batch/surrogate estimation, 1433 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 18423 entries are 0<x<1: 6%.
## Setting 401 low elements to zero.
## transform_counts: Found 401 values equal to 0, adding 1 to the matrix.
mono_visit_nb_pca <- plot_pca(mono_visit_nb)
pp(file = "images/monocyte_by_visit_nb.png", image = mono_visit_nb_pca$plot)
table(pData(mono_visit_norm)$batch)
##
## cure failure lost
## 9 13 6
keepers <- list(
"second_vs_first" = c("c2", "c1"),
"third_vs_second" = c("c3", "c2"),
"third_vs_first" = c("c3", "c1"))
mono_visit_de <- all_pairwise(mono_visit, model_batch = "svaseq", filter = TRUE)
## batch_counts: Before batch/surrogate estimation, 1433 entries are x==0: 0%.
## Plotting a PCA before surrogate/batch inclusion.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## Removing 0 low-count genes (11035 remaining).
## batch_counts: Before batch/surrogate estimation, 1433 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 18423 entries are 0<x<1: 6%.
## Setting 401 low elements to zero.
## transform_counts: Found 401 values equal to 0, adding 1 to the matrix.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
mono_visit_tables <- combine_de_tables(
mono_visit_de,
keepers = keepers,
excel = glue::glue("excel/mono_visit_tables-v{ver}.xlsx"))
## Deleting the file excel/mono_visit_tables-v202106.xlsx before writing the tables.
new_factor <- as.character(pData(mono_visit)[["visitnumber"]])
not_one_idx <- new_factor != 1
new_factor[not_one_idx] <- "not_1"
mono_one_vs <- set_expt_conditions(mono_visit, new_factor)
mono_one_vs_de <- all_pairwise(mono_one_vs, model_batch = "svaseq", filter = TRUE)
## batch_counts: Before batch/surrogate estimation, 1433 entries are x==0: 0%.
## Plotting a PCA before surrogate/batch inclusion.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## Removing 0 low-count genes (11035 remaining).
## batch_counts: Before batch/surrogate estimation, 1433 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 18423 entries are 0<x<1: 6%.
## Setting 384 low elements to zero.
## transform_counts: Found 384 values equal to 0, adding 1 to the matrix.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
mono_one_vs_tables <- combine_de_tables(
mono_one_vs_de,
excel = glue::glue("excel/mono_one_vs_tables-v{ver}.xlsx"))
## Deleting the file excel/mono_one_vs_tables-v202106.xlsx before writing the tables.
In writing the following, I quickly realized that tspair was not joking when it said it is intended for small numbers of genes. For a full expressionset of human data it is struggling. I like the idea, it may prove worth while to spend some time optimizing the package so that it is more usable.
expt <- hs_clinical_nobiop
simple_tsp <- function(expt, column = "condition") {
facts <- levels(as.factor(pData(expt)[[column]]))
retlist <- list()
if (length(facts) < 2) {
stop("This requires factors with at least 2 levels.")
} else if (length(facts) == 2) {
retlist <- simple_tsp_pair(expt, column = column)
} else {
for (first in 1:(length(facts) - 1)) {
for (second in 2:(length(facts))) {
if (first < second) {
name <- glue::glue("{facts[first]}_vs_{facts[second]}")
message("Starting ", name, ".")
substring <- glue::glue("{column}=='{facts[first]}'|{column}=='{facts[second]}'")
subby <- subset_expt(expt, subset=as.character(substring))
retlist[[name]] <- simple_tsp_pair(subby, column = column)
}
}
}
}
}
simple_tsp_pair <- function(subby, column = "condition", repetitions = 50) {
tsp_input <- subby[["expressionset"]]
tsp_output <- tspcalc(tsp_input, column)
tsp_scores <- tspsig(tsp_input, column, B = repetitions)
}
tsp1 <- tspcalc(tsp_input, "condition")
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
message(paste0("Saving to ", savefile))
tmp <- sm(saveme(filename = savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 72947fcc6afe09da22d71967059edd84e3063341
## This is hpgltools commit: Tue Jun 1 15:57:56 2021 -0400: 72947fcc6afe09da22d71967059edd84e3063341
## Saving to tmrc3_02sample_estimation_v202106.rda.xz
tmp <- loadme(filename = savefile)