1 Annotation

We take the annotation data from ensembl’s biomart instance. The genome which was used to map the data was hg38 revision 100. My default when using biomart is to load the data from 1 year before the current date.

hs_annot <- sm(load_biomart_annotations(year = "2020"))
hs_annot <- hs_annot[["annotation"]]
hs_annot[["transcript"]] <- paste0(rownames(hs_annot), ".", hs_annot[["version"]])
rownames(hs_annot) <- make.names(hs_annot[["ensembl_gene_id"]], unique = TRUE)
tx_gene_map <- hs_annot[, c("transcript", "ensembl_gene_id")]

summary(hs_annot)
##  ensembl_transcript_id ensembl_gene_id       version     transcript_version
##  Length:227921         Length:227921      Min.   : 1.0   Min.   : 1.00     
##  Class :character      Class :character   1st Qu.: 6.0   1st Qu.: 1.00     
##  Mode  :character      Mode  :character   Median :12.0   Median : 1.00     
##                                           Mean   :10.7   Mean   : 3.08     
##                                           3rd Qu.:16.0   3rd Qu.: 5.00     
##                                           Max.   :29.0   Max.   :17.00     
##                                                                            
##  hgnc_symbol        description        gene_biotype         cds_length    
##  Length:227921      Length:227921      Length:227921      Min.   :     3  
##  Class :character   Class :character   Class :character   1st Qu.:   357  
##  Mode  :character   Mode  :character   Mode  :character   Median :   694  
##                                                           Mean   :  1139  
##                                                           3rd Qu.:  1446  
##                                                           Max.   :107976  
##                                                           NA's   :127343  
##  chromosome_name       strand          start_position      end_position     
##  Length:227921      Length:227921      Min.   :5.77e+02   Min.   :6.47e+02  
##  Class :character   Class :character   1st Qu.:3.11e+07   1st Qu.:3.12e+07  
##  Mode  :character   Mode  :character   Median :6.04e+07   Median :6.06e+07  
##                                        Mean   :7.41e+07   Mean   :7.42e+07  
##                                        3rd Qu.:1.09e+08   3rd Qu.:1.09e+08  
##                                        Max.   :2.49e+08   Max.   :2.49e+08  
##                                                                             
##   transcript       
##  Length:227921     
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
hs_go <- sm(load_biomart_go()[["go"]])
hs_length <- hs_annot[, c("ensembl_gene_id", "cds_length")]
colnames(hs_length) <- c("ID", "length")

2 Introduction

This document is intended to provide an overview of TMRC3 samples which have been sequenced. It includes some plots and analyses showing the relationships among the samples as well as some differential analyses when possible.

3 Sample Estimation

3.1 Generate expressionsets

The sample sheet is copied from our shared online sheet and updated with each release of sequencing data.

samplesheet <- "sample_sheets/tmrc3_samples_20210528.xlsx"

3.1.1 Hisat2 expressionsets

The first thing to note is the large range in coverage. There are multiple samples with coverage which is too low to use. These will be removed shortly.

In the following block I immediately exclude any non-coding reads as well.

## Create the expressionset and immediately pass it to a filter
## removing the non protein coding genes.
sanitize_columns <- c("visitnumber", "clinicaloutcome", "donor",
                      "typeofcells", "clinicalpresentation",
                      "condition", "batch")
hs_expt <- create_expt(samplesheet,
                       file_column = "hg38100hisatfile",
                       savefile = glue::glue("rda/hs_expt_all-v{ver}.rda"),
                       gene_info = hs_annot) %>%
  exclude_genes_expt(column = "gene_biotype", method = "keep",
                     pattern = "protein_coding", meta_column = "ncrna_lost") %>%
  sanitize_expt_metadata(columns = sanitize_columns) %>%
  set_expt_factors(columns = sanitize_columns, class = "factor")
## Reading the sample metadata.
## Dropped 98 rows from the sample metadata because they were blank.
## The sample definitions comprises: 146 rows(samples) and 74 columns(metadata fields).
## Warning in create_expt(samplesheet, file_column = "hg38100hisatfile", savefile =
## glue::glue("rda/hs_expt_all-v{ver}.rda"), : Some samples were removed when cross
## referencing the samples against the count data.
## Matched 21452 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## The final expressionset has 21481 rows and 121 columns.
## Before removal, there were 21481 genes, now there are 19941.
## There are 13 samples which kept less than 90 percent counts.
## TMRC30015 TMRC30017 TMRC30019 TMRC30044 TMRC30045 TMRC30097 TMRC30075 TMRC30087 
##     79.24     85.72     89.75     80.34     73.33     89.90     86.97     83.63 
## TMRC30101 TMRC30104 TMRC30114 TMRC30131 TMRC30073 
##     88.41     80.29     87.62     86.82     89.26
levels(pData(hs_expt[["expressionset"]])[["visitnumber"]]) <- list(
    '0' = "notapplicable", '1' = 1, '2' = 2, '3' = 3)

Split this data into CDS and lncRNA. Oh crap in order to do that I need to recount the data. Running now (20210518)

## lnc_expt <- create_expt(samplesheet,
##                         file_column = "hg38100lncfile",
##                         gene_info = hs_annot)

3.1.1.1 Initial metrics

Once the data was loaded, there are a couple of metrics which may be plotted immediately.

nonzero <- plot_nonzero(hs_expt)
nonzero$plot
## Warning: ggrepel: 92 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ncrna_lost_df <- as.data.frame(pData(hs_expt)[["ncrna_lost"]])
rownames(ncrna_lost_df) <- rownames(pData(hs_expt))
colnames(ncrna_lost_df) <- "ncrna_lost"

tmpdf <- merge(nonzero$table, ncrna_lost_df, by = "row.names")
rownames(tmpdf) <- tmpdf[["Row.names"]]
tmpdf[["Row.names"]] <- NULL

ggplot(tmpdf, aes(x=ncrna_lost, y=nonzero_genes)) +
  ggplot2::geom_point() +
  ggplot2::ggtitle("Nonzero genes with respect to percent counts 
lost when ncRNA was removed.")

Najib doesn’t want this plot, but I am using it to check new samples, so will hide it from general use.

libsize <- plot_libsize(hs_expt)
libsize$plot

3.2 Minimum coverage sample filtering

I arbitrarily chose 11,000 non-zero genes as a minimum. We may want this to be higher.

hs_valid <- subset_expt(hs_expt, nonzero = 11000)
## The samples (and read coverage) removed when filtering 11000 non-zero genes are:
## TMRC30010 TMRC30050 TMRC30052 
##     52471    808149   3087347
## subset_expt(): There were 121, now there are 118 samples.
valid_write <- sm(write_expt(hs_valid, excel = glue("excel/hs_valid-v{ver}.xlsx")))

4 Project Aims

The project seeks to determine the relationship of the innate immune response and inflammatory signaling to the clinical outcome of antileishmanial drug treatment. We will test the hypothesis that the profile of innate immune cell activation and their dynamics through the course of treatment differ between CL patients with prospectively determined therapeutic cure or failure.

This will be achieved through the characterization of the in vivo dynamics of blood-derived monocyte, neutrophil and eosinophil transcriptome before, during and at the end of treatment in CL patients. Cell-type specific transcriptomes, composite signatures and time-response expression profiles will be contrasted among patients with therapeutic cure or failure.

4.1 Preparation

To address these, I added to the end of the sample sheet columns named ‘condition’, ‘batch’, ‘donor’, and ‘time’. These are filled in with shorthand values according to the above.

4.2 Global view

Before addressing the questions explicitly by subsetting the data, I want to get a look at the samples as they are.

new_names <- pData(hs_valid)[["samplename"]]
hs_valid <- hs_valid %>%
  set_expt_batches(fact = "cellssource") %>%
  set_expt_conditions(fact = "typeofcells") %>%
  set_expt_samplenames(newnames = new_names)

all_norm <- sm(normalize_expt(hs_valid, transform = "log2", norm = "quant",
                              convert = "cpm", filter = TRUE))

all_pca <- plot_pca(all_norm, plot_labels = FALSE,
                    plot_title = "PCA - Cell type", size_column = "visitnumber")
pp(file = glue("images/tmrc3_pca_nolabels-v{ver}.png"), image = all_pca$plot)

write.csv(all_pca$table, file = "coords/hs_donor_pca_coords.csv")
plot_corheat(all_norm, plot_title = "Heirarchical clustering:
         cell types")$plot

4.3 Examine samples relevant to clinical outcome

Now let us consider only the samples for which we have a clinical outcome. These fall primarily into either ‘cured’ or ‘failed’, but some people have not yet returned to the clinic after the first or second visit. These are deemed ‘lost’.

hs_clinical <- hs_valid %>%
  set_expt_conditions(fact = "clinicaloutcome") %>%
  set_expt_batches(fact = "typeofcells") %>%
  subset_expt(subset = "typeofcells!='pbmcs'&typeofcells!='macrophages'")
## subset_expt(): There were 118, now there are 100 samples.
chosen_colors <- c("#D95F02", "#7570B3", "#1B9E77", "#FF0000", "#FF0000")
names(chosen_colors) <- c("cure", "failure", "lost", "null", "notapplicable")
hs_clinical <- set_expt_colors(hs_clinical, colors = chosen_colors)

newnames <- make.names(pData(hs_clinical)[["samplename"]], unique = TRUE)
hs_clinical <- set_expt_samplenames(hs_clinical, newnames = newnames)

hs_clinical_norm <- sm(normalize_expt(hs_clinical, filter = TRUE, transform = "log2",
                                      convert = "cpm", norm = "quant"))
clinical_pca <- plot_pca(hs_clinical_norm, plot_labels = FALSE,
                         size_column = "visitnumber", cis = NULL,
                         plot_title = "PCA - clinical samples")
pp(file = glue("images/all_clinical_nobatch_pca-v{ver}.png"), image = clinical_pca$plot,
   height = 8, width = 20)

4.3.1 Repeat without the biopsy samples

hs_clinical_nobiop <- hs_clinical %>%
  subset_expt(subset = "typeofcells!='biopsy'")
## subset_expt(): There were 100, now there are 60 samples.
hs_clinical_nobiop_norm <- sm(normalize_expt(hs_clinical_nobiop, filter = TRUE, transform = "log2",
                                             convert = "cpm", norm = "quant"))
clinical_nobiop_pca <- plot_pca(hs_clinical_nobiop_norm, plot_labels = FALSE, cis = NULL,
                                plot_title = "PCA - clinical samples without biopsies")
pp(file = glue("images/all_clinical_nobiop_nobatch_pca-v{ver}.png"),
   image = clinical_nobiop_pca$plot)

4.3.2 Attempt to correct for the surrogate variables

At this time we have two primary data structures of interest: hs_clinical and hs_clinical_nobiop

hs_clinical_nb <- normalize_expt(hs_clinical, filter = TRUE, batch = "svaseq",
                                 transform = "log2", convert = "cpm")
## Removing 5346 low-count genes (14595 remaining).
## batch_counts: Before batch/surrogate estimation, 87151 entries are x==0: 6%.
## batch_counts: Before batch/surrogate estimation, 253186 entries are 0<x<1: 17%.
## Setting 17364 low elements to zero.
## transform_counts: Found 17364 values equal to 0, adding 1 to the matrix.
clinical_batch_pca <- plot_pca(hs_clinical_nb, plot_labels = FALSE, cis = NULL,
                               size_column = "visitnumber", plot_title = "PCA - clinical samples")
clinical_batch_pca$plot

hs_clinical_nobiop_nb <- sm(normalize_expt(hs_clinical_nobiop, filter = TRUE, batch = "svaseq",
                                           transform = "log2", convert = "cpm"))
clinical_nobiop_batch_pca <- plot_pca(hs_clinical_nobiop_nb,
                                      plot_title = "PCA - clinical samples without biopsies",
                                      plot_labels = FALSE)
pp(file = "images/clinical_batch.png", image = clinical_nobiop_batch_pca$plot)

test <- plot_pca(hs_clinical_nobiop_nb, size_column = "visitnumber",
                 plot_title = "PCA - clinical samples without biopsies",
                 plot_labels = FALSE)
test$plot

clinical_nobiop_batch_tsne <- plot_tsne(hs_clinical_nobiop_nb,
                                        plot_title = "tSNE - clinical samples without biopsies",
                                        plot_labels = FALSE)
clinical_nobiop_batch_tsne$plot

4.3.2.1 Look at remaining variance with variancePartition

test <- simple_varpart(hs_clinical_nobiop)
## 
## Total:102 s
test$partition_plot

4.4 Perform DE of the clinical samples cure vs. fail

individual_celltypes <- subset_expt(hs_clinical_nobiop, subset="condition!='lost'")
## subset_expt(): There were 60, now there are 47 samples.
hs_clinic_de <- sm(all_pairwise(individual_celltypes, model_batch = "svaseq", filter = TRUE))
hs_clinic_table <- sm(combine_de_tables(
    hs_clinic_de,
    excel = glue::glue("excel/individual_celltypes_table-v{ver}.xlsx")))

hs_clinic_sig <- sm(extract_significant_genes(
    hs_clinic_table,
    excel = glue::glue("excel/individual_celltypes_sig-v{ver}.xlsx")))

hs_clinic_sig[["summary_df"]]
##   limma_V1 limma_V2 edger_V1 edger_V2 deseq_V1 deseq_V2 ebseq_V1 ebseq_V2
## 1      241      252      309      340      302      369       75      222
## 2      104       32      405      458      419      456      101        9
## 3       42       15      226      302      291      312        5        0
##   basic_V1 basic_V2
## 1       53       30
## 2      145      118
## 3      146      119
hs_clinic_de[["comparison"]][["heat"]]

4.4.1 Perform LRT with the clinical samples

I am not sure if we have enough samples across the three visit to completely work as well as we would like, but there is only 1 way to find out! Now that I think about it, one thing which might be awesome is to use cell type as an interacting factor…

4.4.1.1 With biopsy samples

I figure this might be a place where the biopsy samples might prove useful.

clinical_nolost <- subset_expt(hs_clinical, subset="condition!='lost'")
## subset_expt(): There were 100, now there are 85 samples.
lrt_visit_clinical_test <- deseq_lrt(clinical_nolost, transform = "vst",
                                     interactor_column = "visitnumber",
                                     interest_column = "clinicaloutcome")
## converting counts to integer mode
## Error in checkFullRank(modelMatrix): the model matrix is not full rank, so the model cannot be fit as specified.
##   Levels or combinations of levels without any samples have resulted in
##   column(s) of zeros in the model matrix.
## 
##   Please read the vignette section 'Model matrix not full rank':
## 
##   vignette('DESeq2')
lrt_visit_clinical_test[["favorite_genes"]]
## Error in eval(expr, envir, enclos): object 'lrt_visit_clinical_test' not found
lrt_celltype_clinical_test <- deseq_lrt(clinical_nolost, transform = "vst",
                                        interactor_column = "typeofcells",
                                        interest_column = "clinicaloutcome")
## converting counts to integer mode
## Error in checkFullRank(modelMatrix): the model matrix is not full rank, so the model cannot be fit as specified.
##   Levels or combinations of levels without any samples have resulted in
##   column(s) of zeros in the model matrix.
## 
##   Please read the vignette section 'Model matrix not full rank':
## 
##   vignette('DESeq2')
lrt_celltype_clinical_test[["favorite_genes"]]
## Error in eval(expr, envir, enclos): object 'lrt_celltype_clinical_test' not found

4.4.2 Look at only the differential genes

A good suggestion from Theresa was to examine only the most variant genes from failure vs. cure and see how they change the clustering/etc results. This is my attempt to address this query.

hs_clinic_topn <- sm(extract_significant_genes(hs_clinic_table, n = 100))
table <- "failure_vs_cure"
wanted <- rbind(hs_clinic_topn[["deseq"]][["ups"]][[table]],
                hs_clinic_topn[["deseq"]][["downs"]][[table]])

small_expt <- exclude_genes_expt(hs_clinical_nobiop, ids = rownames(wanted), method = "keep")
## Before removal, there were 19941 genes, now there are 200.
## There are 60 samples which kept less than 90 percent counts.
##  X1017n1  X1017m1  X1034n1  X1034n2  X1034m2 X1034m2.  X2052e1  X2052m2 
##   0.3861   0.3697   2.1770   2.5482   1.1261   1.1226   0.2551   0.6082 
##  X2052n2  X2052m3  X2052n3  X2065m1  X2065n1  X2066m1  X2066n1  X2065m2 
##   1.2114   0.6975   0.9878   0.9096   2.7314   0.4047   0.5796   0.4889 
##  X2065n2  X2065e2  X2066m2  X2068m1  X2068n1  X2068e1  X2072m1  X2072n1 
##   0.5044   0.8718   0.4739   0.4401   0.5095   0.8828   0.4535   0.3987 
##  X2072e1  X2073m1  X2073n1  X2073e1  X2068m2  X2068n2  X2068e2  X2072m2 
##   0.6552   0.6323   1.6327   0.5696   0.3536   0.4292   0.5612   0.4004 
##  X2072n2  X2072e2  X2073m2  X2073n2  X2073e2  X2066m3  X2068m3  X2068n3 
##   0.4677   0.3823   1.0324   2.5885   0.6825   0.4042   0.3850   0.5127 
##  X2068e3  X2072m3  X2072n3  X2072e3  X2073m3  X2073n3  X2073e3  X2162m1 
##   0.4305   0.6791   1.7089   0.4629   0.5387   0.6157   0.3988   0.4238 
##  X2162n1  X2162e1  X2168n1  X2168e1  X2168m2  X2168n2  X2168e2  X2168m3 
##   0.3884   0.5535   2.0907   0.7178   0.9330   2.1435   0.5955   1.3081 
##  X2168n3  X2168e3  X2172n1  X2172e1 
##   3.4334   0.7999   0.3056   0.2949
small_norm <- sm(normalize_expt(small_expt, transform = "log2", convert = "cpm",
                                norm = "quant", filter = TRUE))
plot_pca(small_norm)$plot

small_nb <- normalize_expt(small_expt, transform = "log2", convert = "cpm",
                           batch = "svaseq", norm = "quant", filter = TRUE)
## Warning in normalize_expt(small_expt, transform = "log2", convert = "cpm", :
## Quantile normalization and sva do not always play well together.
## Removing 0 low-count genes (200 remaining).
## batch_counts: Before batch/surrogate estimation, 11 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 269 entries are 0<x<1: 2%.
## Setting 27 low elements to zero.
## transform_counts: Found 27 values equal to 0, adding 1 to the matrix.
plot_pca(small_nb)$plot

## DESeq2 MA plot of failure / cure
hs_clinic_table[["plots"]][["failure_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure
hs_clinic_table[["plots"]][["failure_vs_cure"]][["deseq_vol_plots"]]$plot

4.4.3 g:Profiler results using the significant up and down genes

ups <- hs_clinic_sig[["deseq"]][["ups"]][[1]]
downs <- hs_clinic_sig[["deseq"]][["downs"]][[1]]

hs_clinic_gprofiler_ups <- simple_gprofiler(ups)
## Performing gProfiler GO search of 302 genes against hsapiens.
## GO search found 94 hits.
## Performing gProfiler KEGG search of 302 genes against hsapiens.
## KEGG search found 10 hits.
## Performing gProfiler REAC search of 302 genes against hsapiens.
## REAC search found 8 hits.
## Performing gProfiler MI search of 302 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 302 genes against hsapiens.
## TF search found 43 hits.
## Performing gProfiler CORUM search of 302 genes against hsapiens.
## CORUM search found 1 hits.
## Performing gProfiler HP search of 302 genes against hsapiens.
## HP search found 0 hits.
hs_clinic_gprofiler_ups[["pvalue_plots"]][["bpp_plot_over"]]

hs_clinic_gprofiler_ups[["pvalue_plots"]][["mfp_plot_over"]]

hs_clinic_gprofiler_ups[["pvalue_plots"]][["reactome_plot_over"]]

##hs_try2 <- simple_gprofiler2(ups)

hs_clinic_gprofiler_downs <- simple_gprofiler(downs)
## Performing gProfiler GO search of 369 genes against hsapiens.
## GO search found 116 hits.
## Performing gProfiler KEGG search of 369 genes against hsapiens.
## KEGG search found 7 hits.
## Performing gProfiler REAC search of 369 genes against hsapiens.
## REAC search found 5 hits.
## Performing gProfiler MI search of 369 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 369 genes against hsapiens.
## TF search found 17 hits.
## Performing gProfiler CORUM search of 369 genes against hsapiens.
## CORUM search found 3 hits.
## Performing gProfiler HP search of 369 genes against hsapiens.
## HP search found 5 hits.
hs_clinic_gprofiler_downs[["pvalue_plots"]][["bpp_plot_over"]]

hs_clinic_gprofiler_downs[["pvalue_plots"]][["mfp_plot_over"]]

hs_clinic_gprofiler_downs[["pvalue_plots"]][["reactome_plot_over"]]

4.5 Perform GSVA on the clinical samples

hs_celltype_gsva_c2 <- sm(simple_gsva(individual_celltypes))
hs_celltype_gsva_c2_sig <- sm(get_sig_gsva_categories(
    hs_celltype_gsva_c2,
    excel = "excel/individual_celltypes_gsva_c2.xlsx"))

broad_c7 <- GSEABase::getGmt("reference/msigdb/c7.all.v7.2.entrez.gmt",
                             collectionType = GSEABase::BroadCollection(category = "c7"),
                             geneIdType = GSEABase::EntrezIdentifier())
hs_celltype_gsva_c7 <- sm(simple_gsva(individual_celltypes, signatures = broad_c7,
                                      msig_xml = "reference/msigdb_v7.2.xml", cores = 10))
hs_celltype_gsva_c7_sig <- sm(get_sig_gsva_categories(
    hs_celltype_gsva_c7,
    excel = "excel/individual_celltypes_gsva_c7.xlsx"))

5 Individual Cell types

The following blocks split the samples into a few groups by sample type and look at the distributions between them.

5.1 Implementation details

Get top/bottom n genes for each cell type, using clinical outcome as the factor of interest. For the moment, use sva for the DE analysis. Provide cpms for the top/bottom n genes.

Start with top/bottom 200. Perform default logFC and p-value as well.

5.1.1 Shared contrasts

Here is the contrast we will use throughput, I am leaving open the option to add more.

keepers <- list(
  "fail_vs_cure" = c("failure", "cure"))

5.2 Monocytes

5.2.1 Evaluate Monocyte samples

mono <- subset_expt(hs_valid, subset = "typeofcells=='monocytes'") %>%
  set_expt_conditions(fact = "clinicaloutcome") %>%
  set_expt_batches(fact = "donor") %>%
  set_expt_colors(colors = chosen_colors)
## subset_expt(): There were 118, now there are 22 samples.
## FIXME set_expt_colors should speak up if there are mismatches here!!!

save_result <- save(mono, file = "rda/monocyte_expt.rda")
mono_norm <- normalize_expt(mono, convert = "cpm", filter = TRUE,
                            transform = "log2", norm = "quant")
## Removing 8966 low-count genes (10975 remaining).
## transform_counts: Found 9 values equal to 0, adding 1 to the matrix.
plt <- plot_pca(mono_norm, plot_labels = FALSE)$plot
pp(file = glue("images/mono_pca_normalized-v{ver}.pdf"), image = plt)

mono_nb <- normalize_expt(mono, convert = "cpm", filter = TRUE,
                          transform = "log2", batch = "svaseq")
## Removing 8966 low-count genes (10975 remaining).
## batch_counts: Before batch/surrogate estimation, 1202 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 13643 entries are 0<x<1: 6%.
## Setting 382 low elements to zero.
## transform_counts: Found 382 values equal to 0, adding 1 to the matrix.
plt <- plot_pca(mono_nb, plot_labels = FALSE)$plot
pp(file = glue("images/mono_pca_normalized_batch-v{ver}.pdf"), image = plt)

5.2.2 DE of Monocyte samples

5.2.2.1 Without sva

mono_de <- sm(all_pairwise(mono, model_batch = FALSE, filter = TRUE))
mono_tables <- sm(combine_de_tables(
    mono_de, keepers = keepers,
    excel = glue::glue("excel/monocyte_clinical_all_tables-v{ver}.xlsx")))
written <- write_xlsx(data = mono_tables[["data"]][[1]],
                      excel = glue::glue("excel/monocyte_clinical_table-v{ver}.xlsx"))
mono_sig <- sm(extract_significant_genes(mono_tables, according_to = "deseq"))
written <- write_xlsx(data = mono_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/monocyte_clinical_sigup-v{ver}.xlsx"))
written <- write_xlsx(data = mono_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/monocyte_clinical_sigdown-v{ver}.xlsx"))

mono_pct_sig <- sm(extract_significant_genes(mono_tables, n = 200,
                                             lfc = NULL, p = NULL, according_to = "deseq"))
written <- write_xlsx(data = mono_pct_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/monocyte_clinical_sigup_pct-v{ver}.xlsx"))
written <- write_xlsx(data = mono_pct_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/monocyte_clinical_sigdown_pct-v{ver}.xlsx"))
mono_sig$summary_df
## data frame with 0 columns and 1 row
## Print out a table of the cpm values for other explorations.
mono_cpm <- sm(normalize_expt(mono, convert = "cpm"))
written <- write_xlsx(data = exprs(mono_cpm),
                      excel = glue::glue("excel/monocyte_cpm_before_batch-v{ver}.xlsx"))
mono_bcpm <- sm(normalize_expt(mono, filter = TRUE, convert = "cpm", batch = "svaseq"))
written <- write_xlsx(data = exprs(mono_bcpm),
                      excel = glue::glue("excel/monocyte_cpm_after_batch-v{ver}.xlsx"))

5.2.2.2 With sva

mono_de_sva <- sm(all_pairwise(mono, model_batch = "svaseq", filter = TRUE))
mono_tables_sva <- sm(combine_de_tables(
    mono_de_sva, keepers = keepers,
    excel = glue::glue("excel/monocyte_clinical_all_tables_sva-v{ver}.xlsx")))
mono_sig_sva <- sm(extract_significant_genes(
    mono_tables_sva,
    excel = glue::glue("excel/monocyte_clinical_sig_tables_sva-v{ver}.xlsx"),
    according_to = "deseq"))

5.2.2.3 Monocyte DE plots

First print out the DE plots without and then with sva estimates.

## DESeq2 MA plot of failure / cure
mono_tables[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure
mono_tables[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot

## DESeq2 MA plot of failure / cure with svaseq
mono_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure with svaseq
mono_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot

5.2.2.5 Monocyte MSigDB query

broad_c7 <- GSEABase::getGmt("reference/msigdb/c7.all.v7.2.entrez.gmt",
                             collectionType = GSEABase::BroadCollection(category = "c7"),
                             geneIdType = GSEABase::EntrezIdentifier())

mono_up_goseq_msig <- goseq_msigdb(sig_genes = ups, signatures = broad_c7,
                                   signature_category = "c7", length_db = hs_length)
## Before conversion: 150, after conversion: 151.
## Before conversion: 227921, after conversion: 35341.
## Found 134 go_db genes and 151 length_db genes out of 151.
mono_down_goseq_msig <- goseq_msigdb(sig_genes = downs, signatures = broad_c7,
                                     signature_category = "c7", length_db = hs_length)
## Before conversion: 296, after conversion: 293.
## Before conversion: 227921, after conversion: 35341.
## Found 281 go_db genes and 293 length_db genes out of 293.

5.2.2.6 Plot of similar experiments

## Monocyte genes with increased expression in the failed samples
## share genes with the following experiments
mono_up_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]

## Monocyte genes with increased expression in the cured samples
## share genes with the following experiments
mono_down_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]

5.2.3 Evaluate Neutrophil samples

neut <- subset_expt(hs_valid, subset = "typeofcells=='neutrophils'") %>%
  set_expt_conditions(fact = "clinicaloutcome") %>%
  set_expt_batches(fact = "donor") %>%
  set_expt_colors(colors = chosen_colors)
## subset_expt(): There were 118, now there are 22 samples.
save_result <- save(neut, file = "rda/neutrophil_expt.rda")
neut_norm <- sm(normalize_expt(neut, convert = "cpm", filter = TRUE, transform = "log2"))
plt <- plot_pca(neut_norm, plot_labels = FALSE)$plot
pp(file = glue("images/neut_pca_normalized-v{ver}.pdf"), image = plt)

neut_nb <- sm(normalize_expt(neut, convert = "cpm", filter = TRUE,
                             transform = "log2", batch = "svaseq"))
plt <- plot_pca(neut_nb, plot_labels = FALSE)$plot
pp(file = glue("images/neut_pca_normalized_svaseq-v{ver}.pdf"), image = plt)

5.2.4 DE of Netrophil samples

5.2.4.1 Without sva

neut_de <- sm(all_pairwise(neut, model_batch = FALSE, filter = TRUE))
neut_tables <- sm(combine_de_tables(
    neut_de, keepers = keepers,
    excel = glue::glue("excel/neutrophil_clinical_all_tables-v{ver}.xlsx")))
written <- write_xlsx(data = neut_tables[["data"]][[1]],
                      excel = glue::glue("excel/neutrophil_clinical_table-v{ver}.xlsx"))
neut_sig <- sm(extract_significant_genes(neut_tables, according_to = "deseq"))
written <- write_xlsx(data = neut_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/neutrophil_clinical_sigup-v{ver}.xlsx"))
written <- write_xlsx(data = neut_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/neutrophil_clinical_sigdown-v{ver}.xlsx"))

neut_pct_sig <- sm(extract_significant_genes(neut_tables, n = 200, lfc = NULL,
                                             p = NULL, according_to = "deseq"))
written <- write_xlsx(data = neut_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/neutrophil_clinical_sigup_pct-v{ver}.xlsx"))
written <- write_xlsx(data = neut_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/neutrophil_clinical_sigdown_pct-v{ver}.xlsx"))
neut_cpm <- sm(normalize_expt(neut, convert = "cpm"))
written <- write_xlsx(data = exprs(neut_cpm),
                      excel = glue::glue("excel/neutrophil_cpm_before_batch-v{ver}.xlsx"))
neut_bcpm <- sm(normalize_expt(neut, filter = TRUE, batch = "svaseq", convert = "cpm"))
written <- write_xlsx(data = exprs(neut_bcpm),
                      excel = glue::glue("excel/neutrophil_cpm_after_batch-v{ver}.xlsx"))

5.2.4.2 With sva

neut_de_sva <- sm(all_pairwise(neut, model_batch = "svaseq", filter = TRUE))
neut_tables_sva <- sm(combine_de_tables(
    neut_de_sva, keepers = keepers,
    excel = glue::glue("excel/neutrophil_clinical_all_tables_sva-v{ver}.xlsx")))
neut_sig_sva <- sm(extract_significant_genes(
    neut_tables_sva,
    excel = glue::glue("excel/neutrophil_clinical_sig_tables_sva-v{ver}.xlsx"),
    according_to = "deseq"))

5.2.4.3 Neutrophil DE plots

## DESeq2 MA plot of failure / cure
neut_tables[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure
neut_tables[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot

## DESeq2 MA plot of failure / cure with sva
neut_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure with sva
neut_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot

5.2.4.5 Neutrophil GSVA query

neut_up_goseq_msig <- goseq_msigdb(sig_genes = ups, signatures = broad_c7,
                                   signature_category = "c7", length_db = hs_length)
## Before conversion: 217, after conversion: 215.
## Before conversion: 227921, after conversion: 35341.
## Found 200 go_db genes and 215 length_db genes out of 215.
neut_down_goseq_msig <- goseq_msigdb(sig_genes = downs, signatures = broad_c7,
                                     signature_category = "c7", length_db = hs_length)
## Before conversion: 199, after conversion: 194.
## Before conversion: 227921, after conversion: 35341.
## Found 187 go_db genes and 194 length_db genes out of 194.

5.2.4.6 Plot of similar experiments

## Neutrophil genes with increased expression in the failed samples
## share genes with the following experiments
neut_up_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]

## Neutrophil genes with increased expression in the cured samples
## share genes with the following experiments
neut_down_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]

5.3 Eosinophils

5.3.1 Evaluate Eosinophil samples

eo <- subset_expt(hs_valid, subset = "typeofcells=='eosinophils'") %>%
  set_expt_conditions(fact = "clinicaloutcome") %>%
  set_expt_batches(fact = "donor") %>%
  set_expt_colors(colors = chosen_colors)
## subset_expt(): There were 118, now there are 16 samples.
save_result <- save(eo, file = "rda/eosinophil_expt.rda")
eo_norm <- sm(normalize_expt(eo, convert = "cpm", transform = "log2",
                             norm = "quant", filter = TRUE))
plt <- plot_pca(eo_norm, plot_labels = FALSE)$plot
pp(file = glue("images/eo_pca_normalized-v{ver}.pdf"), image = plt)

eo_nb <- sm(normalize_expt(eo, convert = "cpm", transform = "log2",
                           filter = TRUE, batch = "svaseq"))
plot_pca(eo_nb)$plot

5.3.2 DE of Eosinophil samples

5.3.2.1 Withouth sva

eo_de <- sm(all_pairwise(eo, model_batch = FALSE, filter = TRUE))
eo_tables <- sm(combine_de_tables(
    eo_de, keepers = keepers,
    excel = glue::glue("excel/eosinophil_clinical_all_tables-v{ver}.xlsx")))
written <- write_xlsx(data = eo_tables[["data"]][[1]],
                      excel = glue::glue("excel/eosinophil_clinical_table-v{ver}.xlsx"))
eo_sig <- sm(extract_significant_genes(eo_tables, according_to = "deseq"))
written <- write_xlsx(data = eo_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/eosinophil_clinical_sigup-v{ver}.xlsx"))
written <- write_xlsx(data = eo_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/eosinophil_clinical_sigdown-v{ver}.xlsx"))

eo_pct_sig <- sm(extract_significant_genes(eo_tables, n = 200,
                                           lfc = NULL, p = NULL, according_to = "deseq"))
written <- write_xlsx(data = eo_pct_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/eosinophil_clinical_sigup_pct-v{ver}.xlsx"))
written <- write_xlsx(data = eo_pct_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/eosinophil_clinical_sigdown_pct-v{ver}.xlsx"))

eo_cpm <- sm(normalize_expt(eo, convert = "cpm"))
written <- write_xlsx(data = exprs(eo_cpm),
                      excel = glue::glue("excel/eosinophil_cpm_before_batch-v{ver}.xlsx"))
eo_bcpm <- sm(normalize_expt(eo, filter = TRUE, batch = "svaseq", convert = "cpm"))
written <- write_xlsx(data = exprs(eo_bcpm),
                      excel = glue::glue("excel/eosinophil_cpm_after_batch-v{ver}.xlsx"))

5.3.2.2 With sva

eo_de_sva <- sm(all_pairwise(eo, model_batch = "svaseq", filter = TRUE))
eo_tables_sva <- sm(combine_de_tables(
    eo_de_sva, keepers = keepers,
    excel = glue::glue("excel/eosinophil_clinical_all_tables_sva-v{ver}.xlsx")))
eo_sig_sva <- sm(extract_significant_genes(
    eo_tables_sva,
    excel = glue::glue("excel/eosinophil_clinical_sig_tables_sva-v{ver}.xlsx"),
    according_to = "deseq"))

5.3.2.3 Eosinophil DE plots

## DESeq2 MA plot of failure / cure
eo_tables[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure
eo_tables[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot

## DESeq2 MA plot of failure / cure with sva
eo_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure with sva
eo_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot

5.3.2.5 Eosinophil MSigDB query

eo_up_goseq_msig <- goseq_msigdb(sig_genes = ups, signatures = broad_c7,
                                 signature_category = "c7", length_db = hs_length)
## Before conversion: 191, after conversion: 188.
## Before conversion: 227921, after conversion: 35341.
## Found 179 go_db genes and 188 length_db genes out of 188.
eo_down_goseq_msig <- goseq_msigdb(sig_genes = downs, signatures = broad_c7,
                                   signature_category = "c7", length_db = hs_length)
## Before conversion: 144, after conversion: 143.
## Before conversion: 227921, after conversion: 35341.
## Found 134 go_db genes and 143 length_db genes out of 143.

5.3.2.6 Plot of similar experiments

## Eosinophil genes with increased expression in the failed samples
## share genes with the following experiments
eo_up_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]

## Eosinophil genes with increased expression in the cured samples
## share genes with the following experiments
eo_down_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]

5.4 Biopsies

5.4.1 Evaluate Biopsy samples

biop <- subset_expt(hs_valid, subset = "typeofcells=='biopsy'") %>%
  set_expt_conditions(fact = "clinicaloutcome") %>%
  set_expt_batches(fact = "donor") %>%
  set_expt_colors(colors = chosen_colors)
## subset_expt(): There were 118, now there are 40 samples.
save_result <- save(biop, file = "rda/biopsy_expt.rda")
biop_norm <- normalize_expt(biop, filter = TRUE, convert = "cpm",
                            transform = "log2", norm = "quant")
## Removing 5816 low-count genes (14125 remaining).
## transform_counts: Found 5 values equal to 0, adding 1 to the matrix.
plt <- plot_pca(biop_norm, plot_labels = FALSE)$plot
pp(file = glue("images/biop_pca_normalized-v{ver}.pdf"), image = plt)

biop_nb <- sm(normalize_expt(biop, convert = "cpm", filter = TRUE,
                             transform = "log2", batch = "svaseq"))
plt <- plot_pca(biop_nb, plot_labels = FALSE)$plot
pp(file = glue("images/biop_pca_normalized_svaseq-v{ver}.pdf"), image = plt)

5.4.2 DE of Biopsy samples

5.4.2.1 Without sva

biop_de <- sm(all_pairwise(biop, model_batch = FALSE, filter = TRUE))
biop_tables <- combine_de_tables(biop_de, keepers = keepers,
                                 excel = glue::glue("excel/biopsy_clinical_all_tables-v{ver}.xlsx"))
## Deleting the file excel/biopsy_clinical_all_tables-v202105.xlsx before writing the tables.
written <- write_xlsx(data = biop_tables[["data"]][[1]],
                      excel = glue::glue("excel/biopsy_clinical_table-v{ver}.xlsx"))
biop_sig <- extract_significant_genes(biop_tables, according_to = "deseq")
##written <- write_xlsx(data = biop_sig[["deseq"]][["ups"]][[1]],
##                      excel = glue::glue("excel/biopsy_clinical_sigup-v{ver}.xlsx"))
written <- write_xlsx(data = biop_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/biopsy_clinical_sigdown-v{ver}.xlsx"))
biop_pct_sig <- extract_significant_genes(biop_tables, n = 200, lfc = NULL, p = NULL, according_to = "deseq")
## Getting the top and bottom 200 genes.
written <- write_xlsx(data = biop_pct_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/biopsy_clinical_sigup_pct-v{ver}.xlsx"))
written <- write_xlsx(data = biop_pct_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/biopsy_clinical_sigdown_pct-v{ver}.xlsx"))

biop_cpm <- sm(normalize_expt(biop, convert = "cpm"))
written <- write_xlsx(data = exprs(biop_cpm),
                      excel = glue::glue("excel/biopsy_cpm_before_batch-v{ver}.xlsx"))
biop_bcpm <- sm(normalize_expt(biop, filter = TRUE, batch = "svaseq", convert = "cpm"))
written <- write_xlsx(data = exprs(biop_bcpm),
                      excel = glue::glue("excel/biopsy_cpm_after_batch-v{ver}.xlsx"))

5.4.2.2 with sva

biop_de_sva <- sm(all_pairwise(biop, model_batch = "svaseq", filter = TRUE))
biop_tables_sva <- sm(combine_de_tables(
    biop_de_sva, keepers = keepers,
    excel = glue::glue("excel/biopsy_clinical_all_tables_sva-v{ver}.xlsx")))
biop_sig_sva <- sm(extract_significant_genes(
    biop_tables_sva,
    excel = glue::glue("excel/biopsy_clinical_sig_tables_sva-v{ver}.xlsx"),
    according_to = "deseq"))

5.4.2.3 Biopsy DE plots

## DESeq2 MA plot of failure / cure
biop_tables[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure
biop_tables[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot

## DESeq2 MA plot of failure / cure
biop_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure
biop_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot

6 Look for shared genes among Monocytes/Neutrophils/Eosinophils

We have three variables containing the ‘significant’ DE genes for the three cell types. For this I am choosing (for the moment) to use the sva data.

## mono_sig_sva, neut_sig_sva, eo_sig_sva
sig_vectors <- list(
    "monocytes" = c(rownames(mono_sig_sva[["deseq"]][["ups"]][["fail_vs_cure"]]),
                    rownames(mono_sig_sva[["deseq"]][["downs"]][["fail_vs_cure"]])),
    "neutrophils" = c(rownames(neut_sig_sva[["deseq"]][["ups"]][["fail_vs_cure"]]),
                      rownames(neut_sig_sva[["deseq"]][["downs"]][["fail_vs_cure"]])),
    "eosinophils" =  c(rownames(eo_sig_sva[["deseq"]][["ups"]][["fail_vs_cure"]]),
                       rownames(eo_sig_sva[["deseq"]][["downs"]][["fail_vs_cure"]])))

shared_vector <- Vennerable::Venn(Sets = sig_vectors)
Vennerable::plot(shared_vector, doWeights = FALSE)

shared_ids <- shared_vector@IntersectionSets[["111"]]
shared_expt <- exclude_genes_expt(hs_expt, ids = shared_ids, method = "keep")
## Before removal, there were 19941 genes, now there are 36.
## There are 121 samples which kept less than 90 percent counts.
## TMRC30001 TMRC30002 TMRC30003 TMRC30004 TMRC30005 TMRC30006 TMRC30007 TMRC30008 
##   0.08177   0.06762   0.06471   0.07873   0.07563   0.06217   0.07219   0.06669 
## TMRC30009 TMRC30010 TMRC30015 TMRC30011 TMRC30012 TMRC30013 TMRC30016 TMRC30017 
##   0.10188   0.08386   0.24011   0.10816   0.07162   0.07298   0.20863   0.13210 
## TMRC30050 TMRC30052 TMRC30071 TMRC30056 TMRC30058 TMRC30105 TMRC30094 TMRC30080 
##   0.06323   0.04437   0.04255   0.08653   0.08791   0.09219   0.09530   0.13408 
## TMRC30103 TMRC30018 TMRC30107 TMRC30083 TMRC30019 TMRC30082 TMRC30093 TMRC30113 
##   0.07066   0.12386   0.03972   0.04390   0.12466   0.15406   0.09825   0.15463 
## TMRC30096 TMRC30014 TMRC30021 TMRC30029 TMRC30020 TMRC30038 TMRC30039 TMRC30023 
##   0.09395   0.09953   0.05239   0.06954   0.11810   0.09687   0.06104   0.07552 
## TMRC30025 TMRC30022 TMRC30046 TMRC30047 TMRC30048 TMRC30026 TMRC30030 TMRC30031 
##   0.13884   0.12129   0.04641   0.07020   0.04319   0.14056   0.05346   0.04636 
## TMRC30032 TMRC30024 TMRC30040 TMRC30033 TMRC30049 TMRC30053 TMRC30054 TMRC30115 
##   0.05441   0.06185   0.06554   0.04816   0.04721   0.08304   0.04960   0.03774 
## TMRC30037 TMRC30027 TMRC30028 TMRC30034 TMRC30035 TMRC30036 TMRC30044 TMRC30055 
##   0.06349   0.04673   0.04974   0.07119   0.10052   0.05484   0.13969   0.05329 
## TMRC30068 TMRC30070 TMRC30041 TMRC30042 TMRC30043 TMRC30045 TMRC30059 TMRC30060 
##   0.06077   0.04417   0.07084   0.05150   0.05177   0.10428   1.03130   1.13846 
## TMRC30061 TMRC30062 TMRC30063 TMRC30051 TMRC30064 TMRC30065 TMRC30066 TMRC30067 
##   0.67524   1.05412   0.70435   0.65118   0.71222   0.78794   0.56314   0.58736 
## TMRC30057 TMRC30069 TMRC30116 TMRC30074 TMRC30072 TMRC30076 TMRC30077 TMRC30078 
##   0.50930   0.47283   0.09468   0.06669   0.05467   0.08698   0.05958   0.05358 
## TMRC30088 TMRC30079 TMRC30134 TMRC30135 TMRC30097 TMRC30075 TMRC30085 TMRC30086 
##   0.08898   0.05834   0.04964   0.04720   0.13356   0.12563   0.08351   0.18174 
## TMRC30087 TMRC30101 TMRC30089 TMRC30090 TMRC30081 TMRC30092 TMRC30104 TMRC30106 
##   0.22070   0.10742   0.09324   0.09998   0.10374   0.11950   0.09434   0.10286 
## TMRC30114 TMRC30095 TMRC30108 TMRC30130 TMRC30124 TMRC30131 TMRC30109 TMRC30084 
##   0.13159   0.14593   0.13270   0.14117   0.14282   0.15597   0.10005   0.10111 
## TMRC30098 TMRC30099 TMRC30100 TMRC30110 TMRC30111 TMRC30102 TMRC30091 TMRC30112 
##   0.11361   0.10510   0.06507   0.07938   0.12262   0.11168   0.10842   0.08888 
## TMRC30073 
##   0.13284
shared_written <- sm(write_expt(shared_expt, excel="excel/genes_shared_across_celltypes.xlsx"))
## Error in if (any(abs(rowSums(as.matrix(varPart)) - 1) > 1e-04)) { : 
##   missing value where TRUE/FALSE needed
## Error in quantile.default(prop_median, p = c(1, 3)/4) : 
##   missing values and NaN's not allowed if 'na.rm' is FALSE
## Error in if (any(abs(rowSums(as.matrix(varPart)) - 1) > 1e-04)) { : 
##   missing value where TRUE/FALSE needed

7 Monocytes by visit

  1. Can you please share with us a PCA (SVA and non-SVA) of the monocytes of the TMRC3 project, but labeling them based on the visit (V1, V2, V3)?
  2. Can you please share DE lists of V1 vs V2, V1 vs V3, V1 vs. V2+V3 and V2 vs V3?
visit_colors <- chosen_colors <- c("#D95F02", "#7570B3", "#1B9E77")
names(visit_colors) <- c(1, 2, 3)
mono_visit <- subset_expt(hs_valid, subset = "typeofcells=='monocytes'") %>%
  set_expt_conditions(fact = "visitnumber") %>%
  set_expt_batches(fact = "clinicaloutcome") %>%
  set_expt_colors(colors = chosen_colors)
## subset_expt(): There were 118, now there are 22 samples.
mono_visit_norm <- normalize_expt(mono_visit, filter = TRUE, norm = "quant", convert = "cpm",
                                  transform = "log2")
## Removing 8966 low-count genes (10975 remaining).
## transform_counts: Found 9 values equal to 0, adding 1 to the matrix.
mono_visit_pca <- plot_pca(mono_visit_norm)
pp(file = "images/monocyte_by_visit.png", image = mono_visit_pca$plot)

mono_visit_nb <- normalize_expt(mono_visit, filter = TRUE, convert = "cpm",
                                batch = "svaseq", transform = "log2")
## Removing 8966 low-count genes (10975 remaining).
## batch_counts: Before batch/surrogate estimation, 1202 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 13643 entries are 0<x<1: 6%.
## Setting 333 low elements to zero.
## transform_counts: Found 333 values equal to 0, adding 1 to the matrix.
mono_visit_nb_pca <- plot_pca(mono_visit_nb)
pp(file = "images/monocyte_by_visit_nb.png", image = mono_visit_nb_pca$plot)

table(pData(mono_visit_norm)$batch)
## 
##          cure       failure          lost notapplicable 
##             6            10             5             1
keepers <- list(
    "second_vs_first" = c("c2", "c1"),
    "third_vs_second" = c("c3", "c2"),
    "third_vs_first" = c("c3", "c1"))
mono_visit_de <- all_pairwise(mono_visit, model_batch = "svaseq", filter = TRUE)
## batch_counts: Before batch/surrogate estimation, 1202 entries are x==0: 0%.
## Plotting a PCA before surrogate/batch inclusion.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## Removing 0 low-count genes (10975 remaining).
## batch_counts: Before batch/surrogate estimation, 1202 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 13643 entries are 0<x<1: 6%.
## Setting 333 low elements to zero.
## transform_counts: Found 333 values equal to 0, adding 1 to the matrix.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
mono_visit_tables <- combine_de_tables(
    mono_visit_de,
    keepers = keepers,
    excel = glue::glue("excel/mono_visit_tables-v{ver}.xlsx"))
## Deleting the file excel/mono_visit_tables-v202105.xlsx before writing the tables.
new_factor <- as.character(pData(mono_visit)[["visitnumber"]])
not_one_idx <- new_factor != 1
new_factor[not_one_idx] <- "not_1"
mono_one_vs <- set_expt_conditions(mono_visit, new_factor)

mono_one_vs_de <- all_pairwise(mono_one_vs, model_batch = "svaseq", filter = TRUE)
## batch_counts: Before batch/surrogate estimation, 1202 entries are x==0: 0%.
## Plotting a PCA before surrogate/batch inclusion.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## Removing 0 low-count genes (10975 remaining).
## batch_counts: Before batch/surrogate estimation, 1202 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 13643 entries are 0<x<1: 6%.
## Setting 333 low elements to zero.
## transform_counts: Found 333 values equal to 0, adding 1 to the matrix.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
mono_one_vs_tables <- combine_de_tables(
    mono_one_vs_de,
    excel = glue::glue("excel/mono_one_vs_tables-v{ver}.xlsx"))
## Deleting the file excel/mono_one_vs_tables-v202105.xlsx before writing the tables.

8 Test TSP

In writing the following, I quickly realized that tspair was not joking when it said it is intended for small numbers of genes. For a full expressionset of human data it is struggling. I like the idea, it may prove worth while to spend some time optimizing the package so that it is more usable.

expt <- hs_clinical_nobiop

simple_tsp <- function(expt, column = "condition") {
  facts <- levels(as.factor(pData(expt)[[column]]))
  retlist <- list()
  if (length(facts) < 2) {
    stop("This requires factors with at least 2 levels.")
  } else if (length(facts) == 2) {
    retlist <- simple_tsp_pair(expt, column = column)
  } else {
    for (first in 1:(length(facts) - 1)) {
      for (second in 2:(length(facts))) {
        if (first < second) {
          name <- glue::glue("{facts[first]}_vs_{facts[second]}")
          message("Starting ", name, ".")
          substring <- glue::glue("{column}=='{facts[first]}'|{column}=='{facts[second]}'")
          subby <- subset_expt(expt, subset=as.character(substring))
          retlist[[name]] <- simple_tsp_pair(subby, column = column)
        }
      }
    }
  }
}

simple_tsp_pair <- function(subby, column = "condition", repetitions = 50) {
  tsp_input <- subby[["expressionset"]]
  tsp_output <- tspcalc(tsp_input, column)
  tsp_scores <- tspsig(tsp_input, column, B = repetitions)
}

tsp1 <- tspcalc(tsp_input, "condition")
if (!isTRUE(get0("skip_load"))) {
  pander::pander(sessionInfo())
  message(paste0("This is hpgltools commit: ", get_git_commit()))
  message(paste0("Saving to ", savefile))
  tmp <- sm(saveme(filename = savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 68b1ce610bf0c750d9a3ed2f6bd2a529b1744c29
## This is hpgltools commit: Thu May 27 17:01:01 2021 -0400: 68b1ce610bf0c750d9a3ed2f6bd2a529b1744c29
## Saving to tmrc3_02sample_estimation_v202105.rda.xz
tmp <- loadme(filename = savefile)
---
title: "TMRC3 Comprehensive Data Analysis: 202105"
author: "atb abelew@gmail.com"
date: "`r Sys.Date()`"
output:
 html_document:
  code_download: true
  code_folding: show
  fig_caption: true
  fig_height: 7
  fig_width: 7
  highlight: default
  keep_md: false
  mode: selfcontained
  number_sections: true
  self_contained: true
  theme: readable
  toc: true
  toc_float:
   collapsed: false
   smooth_scroll: false
---

<style>
  body .main-container {
    max-width: 1600px;
  }
</style>

```{r options, include = FALSE}
library(hpgltools)
tt <- sm(devtools::load_all("~/hpgltools"))
knitr::opts_knit$set(progress = TRUE,
                     verbose = TRUE,
                     width = 120,
                     echo = TRUE)
knitr::opts_chunk$set(error = TRUE,
                      fig.width = 12,
                      fig.height = 12,
                      dpi = 96)
old_options <- options(digits = 4,
                       stringsAsFactors = FALSE,
                       knitr.duplicate.label = "allow")
ggplot2::theme_set(ggplot2::theme_bw(base_size = 12))
ver <- "202105"
rundate <- format(Sys.Date(), format = "%Y%m%d")

rmd_file <- glue::glue("tmrc3_02sample_estimation_v{ver}.Rmd")
savefile <- gsub(pattern = "\\.Rmd", replace = "\\.rda\\.xz", x = rmd_file)
```

# Annotation

We take the annotation data from ensembl's biomart instance.  The genome which
was used to map the data was hg38 revision 100.  My default when using biomart is
to load the data from 1 year before the current date.

```{r hs_annot}
hs_annot <- sm(load_biomart_annotations(year = "2020"))
hs_annot <- hs_annot[["annotation"]]
hs_annot[["transcript"]] <- paste0(rownames(hs_annot), ".", hs_annot[["version"]])
rownames(hs_annot) <- make.names(hs_annot[["ensembl_gene_id"]], unique = TRUE)
tx_gene_map <- hs_annot[, c("transcript", "ensembl_gene_id")]

summary(hs_annot)
```

```{r hs_go}
hs_go <- sm(load_biomart_go()[["go"]])
hs_length <- hs_annot[, c("ensembl_gene_id", "cds_length")]
colnames(hs_length) <- c("ID", "length")
```

# Introduction

This document is intended to provide an overview of TMRC3 samples which have
been sequenced.  It includes some plots and analyses showing the relationships
among the samples as well as some differential analyses when possible.

# Sample Estimation

## Generate expressionsets

The sample sheet is copied from our shared online sheet and updated with each release
of sequencing data.

```{r samplesheet}
samplesheet <- "sample_sheets/tmrc3_samples_20210528.xlsx"
```

### Hisat2 expressionsets

The first thing to note is the large range in coverage.  There are multiple
samples with coverage which is too low to use.  These will be removed shortly.

In the following block I immediately exclude any non-coding reads as well.

```{r all_new_hisat2}
## Create the expressionset and immediately pass it to a filter
## removing the non protein coding genes.
sanitize_columns <- c("visitnumber", "clinicaloutcome", "donor",
                      "typeofcells", "clinicalpresentation",
                      "condition", "batch")
hs_expt <- create_expt(samplesheet,
                       file_column = "hg38100hisatfile",
                       savefile = glue::glue("rda/hs_expt_all-v{ver}.rda"),
                       gene_info = hs_annot) %>%
  exclude_genes_expt(column = "gene_biotype", method = "keep",
                     pattern = "protein_coding", meta_column = "ncrna_lost") %>%
  sanitize_expt_metadata(columns = sanitize_columns) %>%
  set_expt_factors(columns = sanitize_columns, class = "factor")

levels(pData(hs_expt[["expressionset"]])[["visitnumber"]]) <- list(
    '0' = "notapplicable", '1' = 1, '2' = 2, '3' = 3)
```

Split this data into CDS and lncRNA.  Oh crap in order to do that I need to recount the data.
Running now (20210518)

```{r lnc_cds}
## lnc_expt <- create_expt(samplesheet,
##                         file_column = "hg38100lncfile",
##                         gene_info = hs_annot)
```

#### Initial metrics

Once the data was loaded, there are a couple of metrics which may be plotted immediately.

```{r initial_metrics}
nonzero <- plot_nonzero(hs_expt)
nonzero$plot

ncrna_lost_df <- as.data.frame(pData(hs_expt)[["ncrna_lost"]])
rownames(ncrna_lost_df) <- rownames(pData(hs_expt))
colnames(ncrna_lost_df) <- "ncrna_lost"

tmpdf <- merge(nonzero$table, ncrna_lost_df, by = "row.names")
rownames(tmpdf) <- tmpdf[["Row.names"]]
tmpdf[["Row.names"]] <- NULL

ggplot(tmpdf, aes(x=ncrna_lost, y=nonzero_genes)) +
  ggplot2::geom_point() +
  ggplot2::ggtitle("Nonzero genes with respect to percent counts 
lost when ncRNA was removed.")
```

Najib doesn't want this plot, but I am using it to check new samples,
so will hide it from general use.

```{r libsize}
libsize <- plot_libsize(hs_expt)
libsize$plot
```

## Minimum coverage sample filtering

I arbitrarily chose 11,000 non-zero genes as a minimum.  We may
want this to be higher.

```{r hisat2_write, fig.show = "hide"}
hs_valid <- subset_expt(hs_expt, nonzero = 11000)

valid_write <- sm(write_expt(hs_valid, excel = glue("excel/hs_valid-v{ver}.xlsx")))
```

# Project Aims

The project seeks to determine the relationship of the innate immune response
and inflammatory signaling to the clinical outcome of antileishmanial drug
treatment. We will test the hypothesis that the profile of innate immune cell
activation and their dynamics through the course of treatment differ between CL
patients with prospectively determined therapeutic cure or failure.

This will be achieved through the characterization of the in vivo dynamics of
blood-derived monocyte, neutrophil and eosinophil transcriptome before, during
and at the end of treatment in CL patients. Cell-type specific transcriptomes,
composite signatures and time-response expression profiles will be contrasted
among patients with therapeutic cure or failure.

## Preparation

To address these, I added to the end of the sample sheet columns named
'condition', 'batch', 'donor', and 'time'.  These are filled in with shorthand
values according to the above.

## Global view

Before addressing the questions explicitly by subsetting the data, I want to get
a look at the samples as they are.

```{r pre_questions}
new_names <- pData(hs_valid)[["samplename"]]
hs_valid <- hs_valid %>%
  set_expt_batches(fact = "cellssource") %>%
  set_expt_conditions(fact = "typeofcells") %>%
  set_expt_samplenames(newnames = new_names)

all_norm <- sm(normalize_expt(hs_valid, transform = "log2", norm = "quant",
                              convert = "cpm", filter = TRUE))

all_pca <- plot_pca(all_norm, plot_labels = FALSE,
                    plot_title = "PCA - Cell type", size_column = "visitnumber")
pp(file = glue("images/tmrc3_pca_nolabels-v{ver}.png"), image = all_pca$plot)

write.csv(all_pca$table, file = "coords/hs_donor_pca_coords.csv")
plot_corheat(all_norm, plot_title = "Heirarchical clustering:
         cell types")$plot
```

## Examine samples relevant to clinical outcome

Now let us consider only the samples for which we have a clinical outcome.
These fall primarily into either 'cured' or 'failed', but some people have not
yet returned to the clinic after the first or second visit.  These are deemed
'lost'.

```{r all_clinical}
hs_clinical <- hs_valid %>%
  set_expt_conditions(fact = "clinicaloutcome") %>%
  set_expt_batches(fact = "typeofcells") %>%
  subset_expt(subset = "typeofcells!='pbmcs'&typeofcells!='macrophages'")

chosen_colors <- c("#D95F02", "#7570B3", "#1B9E77", "#FF0000", "#FF0000")
names(chosen_colors) <- c("cure", "failure", "lost", "null", "notapplicable")
hs_clinical <- set_expt_colors(hs_clinical, colors = chosen_colors)

newnames <- make.names(pData(hs_clinical)[["samplename"]], unique = TRUE)
hs_clinical <- set_expt_samplenames(hs_clinical, newnames = newnames)

hs_clinical_norm <- sm(normalize_expt(hs_clinical, filter = TRUE, transform = "log2",
                                      convert = "cpm", norm = "quant"))
clinical_pca <- plot_pca(hs_clinical_norm, plot_labels = FALSE,
                         size_column = "visitnumber", cis = NULL,
                         plot_title = "PCA - clinical samples")
pp(file = glue("images/all_clinical_nobatch_pca-v{ver}.png"), image = clinical_pca$plot,
   height = 8, width = 20)
```

### Repeat without the biopsy samples

```{r ibid_nobiopsy}
hs_clinical_nobiop <- hs_clinical %>%
  subset_expt(subset = "typeofcells!='biopsy'")

hs_clinical_nobiop_norm <- sm(normalize_expt(hs_clinical_nobiop, filter = TRUE, transform = "log2",
                                             convert = "cpm", norm = "quant"))
clinical_nobiop_pca <- plot_pca(hs_clinical_nobiop_norm, plot_labels = FALSE, cis = NULL,
                                plot_title = "PCA - clinical samples without biopsies")
pp(file = glue("images/all_clinical_nobiop_nobatch_pca-v{ver}.png"),
   image = clinical_nobiop_pca$plot)
```

### Attempt to correct for the surrogate variables

At this time we have two primary data structures of interest: hs_clinical and hs_clinical_nobiop

```{r clinical_sva}
hs_clinical_nb <- normalize_expt(hs_clinical, filter = TRUE, batch = "svaseq",
                                 transform = "log2", convert = "cpm")
clinical_batch_pca <- plot_pca(hs_clinical_nb, plot_labels = FALSE, cis = NULL,
                               size_column = "visitnumber", plot_title = "PCA - clinical samples")
clinical_batch_pca$plot

hs_clinical_nobiop_nb <- sm(normalize_expt(hs_clinical_nobiop, filter = TRUE, batch = "svaseq",
                                           transform = "log2", convert = "cpm"))
clinical_nobiop_batch_pca <- plot_pca(hs_clinical_nobiop_nb,
                                      plot_title = "PCA - clinical samples without biopsies",
                                      plot_labels = FALSE)
pp(file = "images/clinical_batch.png", image = clinical_nobiop_batch_pca$plot)
test <- plot_pca(hs_clinical_nobiop_nb, size_column = "visitnumber",
                 plot_title = "PCA - clinical samples without biopsies",
                 plot_labels = FALSE)
test$plot

clinical_nobiop_batch_tsne <- plot_tsne(hs_clinical_nobiop_nb,
                                        plot_title = "tSNE - clinical samples without biopsies",
                                        plot_labels = FALSE)
clinical_nobiop_batch_tsne$plot
```

#### Look at remaining variance with variancePartition

```{r variance_partition}
test <- simple_varpart(hs_clinical_nobiop)
test$partition_plot
```

## Perform DE of the clinical samples cure vs. fail

```{r clinical_de, fig.show="hide"}
individual_celltypes <- subset_expt(hs_clinical_nobiop, subset="condition!='lost'")
hs_clinic_de <- sm(all_pairwise(individual_celltypes, model_batch = "svaseq", filter = TRUE))

hs_clinic_table <- sm(combine_de_tables(
    hs_clinic_de,
    excel = glue::glue("excel/individual_celltypes_table-v{ver}.xlsx")))

hs_clinic_sig <- sm(extract_significant_genes(
    hs_clinic_table,
    excel = glue::glue("excel/individual_celltypes_sig-v{ver}.xlsx")))

hs_clinic_sig[["summary_df"]]
```

```{r de_heatmap}
hs_clinic_de[["comparison"]][["heat"]]
```

### Perform LRT with the clinical samples

I am not sure if we have enough samples across the three visit to
completely work as well as we would like, but there is only 1 way to
find out!  Now that I think about it, one thing which might be awesome
is to use cell type as an interacting factor...

#### With biopsy samples

I figure this might be a place where the biopsy samples might prove useful.

```{r lrt_test}
clinical_nolost <- subset_expt(hs_clinical, subset="condition!='lost'")
lrt_visit_clinical_test <- deseq_lrt(clinical_nolost, transform = "vst",
                                     interactor_column = "visitnumber",
                                     interest_column = "clinicaloutcome")
lrt_visit_clinical_test[["favorite_genes"]]

lrt_celltype_clinical_test <- deseq_lrt(clinical_nolost, transform = "vst",
                                        interactor_column = "typeofcells",
                                        interest_column = "clinicaloutcome")
lrt_celltype_clinical_test[["favorite_genes"]]
```

### Look at only the differential genes

A good suggestion from Theresa was to examine only the most variant
genes from failure vs. cure and see how they change the clustering/etc
results.  This is my attempt to address this query.

```{r small_explore}
hs_clinic_topn <- sm(extract_significant_genes(hs_clinic_table, n = 100))
table <- "failure_vs_cure"
wanted <- rbind(hs_clinic_topn[["deseq"]][["ups"]][[table]],
                hs_clinic_topn[["deseq"]][["downs"]][[table]])

small_expt <- exclude_genes_expt(hs_clinical_nobiop, ids = rownames(wanted), method = "keep")
small_norm <- sm(normalize_expt(small_expt, transform = "log2", convert = "cpm",
                                norm = "quant", filter = TRUE))
plot_pca(small_norm)$plot

small_nb <- normalize_expt(small_expt, transform = "log2", convert = "cpm",
                           batch = "svaseq", norm = "quant", filter = TRUE)
plot_pca(small_nb)$plot
```

```{r clinical_plot}
## DESeq2 MA plot of failure / cure
hs_clinic_table[["plots"]][["failure_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure
hs_clinic_table[["plots"]][["failure_vs_cure"]][["deseq_vol_plots"]]$plot
```

### g:Profiler results using the significant up and down genes

```{r perform_gprofiler}
ups <- hs_clinic_sig[["deseq"]][["ups"]][[1]]
downs <- hs_clinic_sig[["deseq"]][["downs"]][[1]]

hs_clinic_gprofiler_ups <- simple_gprofiler(ups)
hs_clinic_gprofiler_ups[["pvalue_plots"]][["bpp_plot_over"]]
hs_clinic_gprofiler_ups[["pvalue_plots"]][["mfp_plot_over"]]
hs_clinic_gprofiler_ups[["pvalue_plots"]][["reactome_plot_over"]]

##hs_try2 <- simple_gprofiler2(ups)

hs_clinic_gprofiler_downs <- simple_gprofiler(downs)
hs_clinic_gprofiler_downs[["pvalue_plots"]][["bpp_plot_over"]]
hs_clinic_gprofiler_downs[["pvalue_plots"]][["mfp_plot_over"]]
hs_clinic_gprofiler_downs[["pvalue_plots"]][["reactome_plot_over"]]
```

## Perform GSVA on the clinical samples

```{r gsva, fig.show = "hide"}
hs_celltype_gsva_c2 <- sm(simple_gsva(individual_celltypes))
hs_celltype_gsva_c2_sig <- sm(get_sig_gsva_categories(
    hs_celltype_gsva_c2,
    excel = "excel/individual_celltypes_gsva_c2.xlsx"))

broad_c7 <- GSEABase::getGmt("reference/msigdb/c7.all.v7.2.entrez.gmt",
                             collectionType = GSEABase::BroadCollection(category = "c7"),
                             geneIdType = GSEABase::EntrezIdentifier())
hs_celltype_gsva_c7 <- sm(simple_gsva(individual_celltypes, signatures = broad_c7,
                                      msig_xml = "reference/msigdb_v7.2.xml", cores = 10))
hs_celltype_gsva_c7_sig <- sm(get_sig_gsva_categories(
    hs_celltype_gsva_c7,
    excel = "excel/individual_celltypes_gsva_c7.xlsx"))
```

### Print some plots of the GSVA outputs

```{r gsva_plots}
## The raw heatmap of the C2 values
hs_celltype_gsva_c2_sig[["raw_plot"]]
## The 'significance' scores of the C2 values
hs_celltype_gsva_c2_sig[["score_plot"]]
## The subset of scores for categories deemed significantly different.
hs_celltype_gsva_c2_sig[["subset_plot"]]

## The raw heatmap of the C7 values
hs_celltype_gsva_c7_sig[["raw_plot"]]
## The 'significance' scores of the C7 values
hs_celltype_gsva_c7_sig[["score_plot"]]
## The subset of scores for categories deemed significantly different.
hs_celltype_gsva_c7_sig[["subset_plot"]]
```

# Individual Cell types

The following blocks split the samples into a few groups by sample type and look
at the distributions between them.

## Implementation details

Get top/bottom n genes for each cell type, using clinical outcome as the factor of interest.
For the moment, use sva for the DE analysis.
Provide cpms for the top/bottom n genes.

Start with top/bottom 200.
Perform default logFC and p-value as well.

### Shared contrasts

Here is the contrast we will use throughput, I am leaving open the option to add more.

```{r keepers}
keepers <- list(
  "fail_vs_cure" = c("failure", "cure"))
```

## Monocytes

### Evaluate Monocyte samples

```{r monocytes}
mono <- subset_expt(hs_valid, subset = "typeofcells=='monocytes'") %>%
  set_expt_conditions(fact = "clinicaloutcome") %>%
  set_expt_batches(fact = "donor") %>%
  set_expt_colors(colors = chosen_colors)
## FIXME set_expt_colors should speak up if there are mismatches here!!!

save_result <- save(mono, file = "rda/monocyte_expt.rda")
mono_norm <- normalize_expt(mono, convert = "cpm", filter = TRUE,
                            transform = "log2", norm = "quant")
plt <- plot_pca(mono_norm, plot_labels = FALSE)$plot
pp(file = glue("images/mono_pca_normalized-v{ver}.pdf"), image = plt)

mono_nb <- normalize_expt(mono, convert = "cpm", filter = TRUE,
                          transform = "log2", batch = "svaseq")
plt <- plot_pca(mono_nb, plot_labels = FALSE)$plot
pp(file = glue("images/mono_pca_normalized_batch-v{ver}.pdf"), image = plt)
```

### DE of Monocyte samples

#### Without sva

```{r de_monocyte, fig.show = "hide"}
mono_de <- sm(all_pairwise(mono, model_batch = FALSE, filter = TRUE))
mono_tables <- sm(combine_de_tables(
    mono_de, keepers = keepers,
    excel = glue::glue("excel/monocyte_clinical_all_tables-v{ver}.xlsx")))
written <- write_xlsx(data = mono_tables[["data"]][[1]],
                      excel = glue::glue("excel/monocyte_clinical_table-v{ver}.xlsx"))
mono_sig <- sm(extract_significant_genes(mono_tables, according_to = "deseq"))
written <- write_xlsx(data = mono_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/monocyte_clinical_sigup-v{ver}.xlsx"))
written <- write_xlsx(data = mono_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/monocyte_clinical_sigdown-v{ver}.xlsx"))

mono_pct_sig <- sm(extract_significant_genes(mono_tables, n = 200,
                                             lfc = NULL, p = NULL, according_to = "deseq"))
written <- write_xlsx(data = mono_pct_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/monocyte_clinical_sigup_pct-v{ver}.xlsx"))
written <- write_xlsx(data = mono_pct_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/monocyte_clinical_sigdown_pct-v{ver}.xlsx"))
mono_sig$summary_df

## Print out a table of the cpm values for other explorations.
mono_cpm <- sm(normalize_expt(mono, convert = "cpm"))
written <- write_xlsx(data = exprs(mono_cpm),
                      excel = glue::glue("excel/monocyte_cpm_before_batch-v{ver}.xlsx"))
mono_bcpm <- sm(normalize_expt(mono, filter = TRUE, convert = "cpm", batch = "svaseq"))
written <- write_xlsx(data = exprs(mono_bcpm),
                      excel = glue::glue("excel/monocyte_cpm_after_batch-v{ver}.xlsx"))
```

#### With sva

```{r de_mono_sva, fig.show = "hide"}
mono_de_sva <- sm(all_pairwise(mono, model_batch = "svaseq", filter = TRUE))
mono_tables_sva <- sm(combine_de_tables(
    mono_de_sva, keepers = keepers,
    excel = glue::glue("excel/monocyte_clinical_all_tables_sva-v{ver}.xlsx")))
mono_sig_sva <- sm(extract_significant_genes(
    mono_tables_sva,
    excel = glue::glue("excel/monocyte_clinical_sig_tables_sva-v{ver}.xlsx"),
    according_to = "deseq"))
```

#### Monocyte DE plots

First print out the DE plots without and then with sva estimates.

```{r mono_de_plots}
## DESeq2 MA plot of failure / cure
mono_tables[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure
mono_tables[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot

## DESeq2 MA plot of failure / cure with svaseq
mono_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure with svaseq
mono_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot
```

#### Monocyte ontology search

```{r mono_gprofiler}
ups <- mono_sig[["deseq"]][["ups"]][["fail_vs_cure"]]
downs <- mono_sig[["deseq"]][["downs"]][["fail_vs_cure"]]

mono_up_gp <- simple_gprofiler(sig_genes = ups)
mono_up_gp[["pvalue_plots"]][["bpp_plot_over"]]
mono_up_gp[["pvalue_plots"]][["mfp_plot_over"]]
mono_up_gp[["pvalue_plots"]][["reactome_plot_over"]]

mono_down_gp <- simple_gprofiler(sig_genes = downs)
mono_down_gp[["pvalue_plots"]][["bpp_plot_over"]]
mono_down_gp[["pvalue_plots"]][["mfp_plot_over"]]
mono_down_gp[["pvalue_plots"]][["reactome_plot_over"]]
```

#### Monocyte MSigDB query

```{r msig_mono_goseq, fig.show = "hide"}
broad_c7 <- GSEABase::getGmt("reference/msigdb/c7.all.v7.2.entrez.gmt",
                             collectionType = GSEABase::BroadCollection(category = "c7"),
                             geneIdType = GSEABase::EntrezIdentifier())

mono_up_goseq_msig <- goseq_msigdb(sig_genes = ups, signatures = broad_c7,
                                   signature_category = "c7", length_db = hs_length)

mono_down_goseq_msig <- goseq_msigdb(sig_genes = downs, signatures = broad_c7,
                                     signature_category = "c7", length_db = hs_length)
```

#### Plot of similar experiments

```{r msig_plots}
## Monocyte genes with increased expression in the failed samples
## share genes with the following experiments
mono_up_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]

## Monocyte genes with increased expression in the cured samples
## share genes with the following experiments
mono_down_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]
```

### Evaluate Neutrophil samples

```{r neutrophils}
neut <- subset_expt(hs_valid, subset = "typeofcells=='neutrophils'") %>%
  set_expt_conditions(fact = "clinicaloutcome") %>%
  set_expt_batches(fact = "donor") %>%
  set_expt_colors(colors = chosen_colors)

save_result <- save(neut, file = "rda/neutrophil_expt.rda")
neut_norm <- sm(normalize_expt(neut, convert = "cpm", filter = TRUE, transform = "log2"))
plt <- plot_pca(neut_norm, plot_labels = FALSE)$plot
pp(file = glue("images/neut_pca_normalized-v{ver}.pdf"), image = plt)

neut_nb <- sm(normalize_expt(neut, convert = "cpm", filter = TRUE,
                             transform = "log2", batch = "svaseq"))
plt <- plot_pca(neut_nb, plot_labels = FALSE)$plot
pp(file = glue("images/neut_pca_normalized_svaseq-v{ver}.pdf"), image = plt)
```

### DE of Netrophil samples

#### Without sva

```{r neutrophil_de, fig.show = "hide"}
neut_de <- sm(all_pairwise(neut, model_batch = FALSE, filter = TRUE))
neut_tables <- sm(combine_de_tables(
    neut_de, keepers = keepers,
    excel = glue::glue("excel/neutrophil_clinical_all_tables-v{ver}.xlsx")))
written <- write_xlsx(data = neut_tables[["data"]][[1]],
                      excel = glue::glue("excel/neutrophil_clinical_table-v{ver}.xlsx"))
neut_sig <- sm(extract_significant_genes(neut_tables, according_to = "deseq"))
written <- write_xlsx(data = neut_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/neutrophil_clinical_sigup-v{ver}.xlsx"))
written <- write_xlsx(data = neut_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/neutrophil_clinical_sigdown-v{ver}.xlsx"))

neut_pct_sig <- sm(extract_significant_genes(neut_tables, n = 200, lfc = NULL,
                                             p = NULL, according_to = "deseq"))
written <- write_xlsx(data = neut_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/neutrophil_clinical_sigup_pct-v{ver}.xlsx"))
written <- write_xlsx(data = neut_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/neutrophil_clinical_sigdown_pct-v{ver}.xlsx"))
neut_cpm <- sm(normalize_expt(neut, convert = "cpm"))
written <- write_xlsx(data = exprs(neut_cpm),
                      excel = glue::glue("excel/neutrophil_cpm_before_batch-v{ver}.xlsx"))
neut_bcpm <- sm(normalize_expt(neut, filter = TRUE, batch = "svaseq", convert = "cpm"))
written <- write_xlsx(data = exprs(neut_bcpm),
                      excel = glue::glue("excel/neutrophil_cpm_after_batch-v{ver}.xlsx"))
```

#### With sva

```{r de_neut_sva, fig.show = "hide"}
neut_de_sva <- sm(all_pairwise(neut, model_batch = "svaseq", filter = TRUE))
neut_tables_sva <- sm(combine_de_tables(
    neut_de_sva, keepers = keepers,
    excel = glue::glue("excel/neutrophil_clinical_all_tables_sva-v{ver}.xlsx")))
neut_sig_sva <- sm(extract_significant_genes(
    neut_tables_sva,
    excel = glue::glue("excel/neutrophil_clinical_sig_tables_sva-v{ver}.xlsx"),
    according_to = "deseq"))
```

#### Neutrophil DE plots

```{r neut_de_plots}
## DESeq2 MA plot of failure / cure
neut_tables[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure
neut_tables[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot

## DESeq2 MA plot of failure / cure with sva
neut_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure with sva
neut_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot
```

#### Neutrophil ontology search

```{r neut_gp}
ups <- neut_sig[["deseq"]][["ups"]][["fail_vs_cure"]]
downs <- neut_sig[["deseq"]][["downs"]][["fail_vs_cure"]]

neut_up_gp <- simple_gprofiler(sig_genes = ups)
neut_up_gp[["pvalue_plots"]][["bpp_plot_over"]]
neut_up_gp[["pvalue_plots"]][["mfp_plot_over"]]
neut_up_gp[["pvalue_plots"]][["reactome_plot_over"]]

neut_down_gp <- simple_gprofiler(downs)
neut_down_gp[["pvalue_plots"]][["bpp_plot_over"]]
neut_down_gp[["pvalue_plots"]][["mfp_plot_over"]]
neut_down_gp[["pvalue_plots"]][["reactome_plot_over"]]
```

#### Neutrophil GSVA query

```{r msig_neut_goseq, fig.show = "hide"}
neut_up_goseq_msig <- goseq_msigdb(sig_genes = ups, signatures = broad_c7,
                                   signature_category = "c7", length_db = hs_length)

neut_down_goseq_msig <- goseq_msigdb(sig_genes = downs, signatures = broad_c7,
                                     signature_category = "c7", length_db = hs_length)
```

#### Plot of similar experiments

```{r msig_plots_neut}
## Neutrophil genes with increased expression in the failed samples
## share genes with the following experiments
neut_up_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]

## Neutrophil genes with increased expression in the cured samples
## share genes with the following experiments
neut_down_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]
```

## Eosinophils

### Evaluate Eosinophil samples

```{r eosinophils}
eo <- subset_expt(hs_valid, subset = "typeofcells=='eosinophils'") %>%
  set_expt_conditions(fact = "clinicaloutcome") %>%
  set_expt_batches(fact = "donor") %>%
  set_expt_colors(colors = chosen_colors)

save_result <- save(eo, file = "rda/eosinophil_expt.rda")
eo_norm <- sm(normalize_expt(eo, convert = "cpm", transform = "log2",
                             norm = "quant", filter = TRUE))
plt <- plot_pca(eo_norm, plot_labels = FALSE)$plot
pp(file = glue("images/eo_pca_normalized-v{ver}.pdf"), image = plt)

eo_nb <- sm(normalize_expt(eo, convert = "cpm", transform = "log2",
                           filter = TRUE, batch = "svaseq"))
plot_pca(eo_nb)$plot
```

### DE of Eosinophil samples

#### Withouth sva

```{r eosinophil_de, fig.show = "hide"}
eo_de <- sm(all_pairwise(eo, model_batch = FALSE, filter = TRUE))
eo_tables <- sm(combine_de_tables(
    eo_de, keepers = keepers,
    excel = glue::glue("excel/eosinophil_clinical_all_tables-v{ver}.xlsx")))
written <- write_xlsx(data = eo_tables[["data"]][[1]],
                      excel = glue::glue("excel/eosinophil_clinical_table-v{ver}.xlsx"))
eo_sig <- sm(extract_significant_genes(eo_tables, according_to = "deseq"))
written <- write_xlsx(data = eo_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/eosinophil_clinical_sigup-v{ver}.xlsx"))
written <- write_xlsx(data = eo_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/eosinophil_clinical_sigdown-v{ver}.xlsx"))

eo_pct_sig <- sm(extract_significant_genes(eo_tables, n = 200,
                                           lfc = NULL, p = NULL, according_to = "deseq"))
written <- write_xlsx(data = eo_pct_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/eosinophil_clinical_sigup_pct-v{ver}.xlsx"))
written <- write_xlsx(data = eo_pct_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/eosinophil_clinical_sigdown_pct-v{ver}.xlsx"))

eo_cpm <- sm(normalize_expt(eo, convert = "cpm"))
written <- write_xlsx(data = exprs(eo_cpm),
                      excel = glue::glue("excel/eosinophil_cpm_before_batch-v{ver}.xlsx"))
eo_bcpm <- sm(normalize_expt(eo, filter = TRUE, batch = "svaseq", convert = "cpm"))
written <- write_xlsx(data = exprs(eo_bcpm),
                      excel = glue::glue("excel/eosinophil_cpm_after_batch-v{ver}.xlsx"))
```

#### With sva

```{r de_eo_sva, fig.show = "hide"}
eo_de_sva <- sm(all_pairwise(eo, model_batch = "svaseq", filter = TRUE))
eo_tables_sva <- sm(combine_de_tables(
    eo_de_sva, keepers = keepers,
    excel = glue::glue("excel/eosinophil_clinical_all_tables_sva-v{ver}.xlsx")))
eo_sig_sva <- sm(extract_significant_genes(
    eo_tables_sva,
    excel = glue::glue("excel/eosinophil_clinical_sig_tables_sva-v{ver}.xlsx"),
    according_to = "deseq"))
```

#### Eosinophil DE plots

```{r eo_de_plots}
## DESeq2 MA plot of failure / cure
eo_tables[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure
eo_tables[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot

## DESeq2 MA plot of failure / cure with sva
eo_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure with sva
eo_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot
```

#### Eosinophil ontology search

```{r eo_gprofiler}
ups <- eo_sig[["deseq"]][["ups"]][["fail_vs_cure"]]
downs <- eo_sig[["deseq"]][["downs"]][["fail_vs_cure"]]

eo_up_gp <- simple_gprofiler(sig_genes = ups)
eo_up_gp[["pvalue_plots"]][["bpp_plot_over"]]
eo_up_gp[["pvalue_plots"]][["mfp_plot_over"]]
eo_up_gp[["pvalue_plots"]][["reactome_plot_over"]]

eo_down_gp <- simple_gprofiler(downs)
eo_down_gp[["pvalue_plots"]][["bpp_plot_over"]]
eo_down_gp[["pvalue_plots"]][["mfp_plot_over"]]
eo_down_gp[["pvalue_plots"]][["reactome_plot_over"]]
```

#### Eosinophil MSigDB query

```{r msig_eo_goseq, fig.show = "hide"}
eo_up_goseq_msig <- goseq_msigdb(sig_genes = ups, signatures = broad_c7,
                                 signature_category = "c7", length_db = hs_length)

eo_down_goseq_msig <- goseq_msigdb(sig_genes = downs, signatures = broad_c7,
                                   signature_category = "c7", length_db = hs_length)
```

#### Plot of similar experiments

```{r msig_plots_eo}
## Eosinophil genes with increased expression in the failed samples
## share genes with the following experiments
eo_up_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]

## Eosinophil genes with increased expression in the cured samples
## share genes with the following experiments
eo_down_goseq_msig[["pvalue_plots"]][["mfp_plot_over"]]
```

## Biopsies

### Evaluate Biopsy samples

```{r biopsies}
biop <- subset_expt(hs_valid, subset = "typeofcells=='biopsy'") %>%
  set_expt_conditions(fact = "clinicaloutcome") %>%
  set_expt_batches(fact = "donor") %>%
  set_expt_colors(colors = chosen_colors)

save_result <- save(biop, file = "rda/biopsy_expt.rda")
biop_norm <- normalize_expt(biop, filter = TRUE, convert = "cpm",
                            transform = "log2", norm = "quant")
plt <- plot_pca(biop_norm, plot_labels = FALSE)$plot
pp(file = glue("images/biop_pca_normalized-v{ver}.pdf"), image = plt)

biop_nb <- sm(normalize_expt(biop, convert = "cpm", filter = TRUE,
                             transform = "log2", batch = "svaseq"))
plt <- plot_pca(biop_nb, plot_labels = FALSE)$plot
pp(file = glue("images/biop_pca_normalized_svaseq-v{ver}.pdf"), image = plt)
```

### DE of Biopsy samples

#### Without sva

```{r de_biopsy, fig.show = "hide"}
biop_de <- sm(all_pairwise(biop, model_batch = FALSE, filter = TRUE))
biop_tables <- combine_de_tables(biop_de, keepers = keepers,
                                 excel = glue::glue("excel/biopsy_clinical_all_tables-v{ver}.xlsx"))
written <- write_xlsx(data = biop_tables[["data"]][[1]],
                      excel = glue::glue("excel/biopsy_clinical_table-v{ver}.xlsx"))
biop_sig <- extract_significant_genes(biop_tables, according_to = "deseq")
##written <- write_xlsx(data = biop_sig[["deseq"]][["ups"]][[1]],
##                      excel = glue::glue("excel/biopsy_clinical_sigup-v{ver}.xlsx"))
written <- write_xlsx(data = biop_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/biopsy_clinical_sigdown-v{ver}.xlsx"))
biop_pct_sig <- extract_significant_genes(biop_tables, n = 200, lfc = NULL, p = NULL, according_to = "deseq")
written <- write_xlsx(data = biop_pct_sig[["deseq"]][["ups"]][[1]],
                      excel = glue::glue("excel/biopsy_clinical_sigup_pct-v{ver}.xlsx"))
written <- write_xlsx(data = biop_pct_sig[["deseq"]][["downs"]][[1]],
                      excel = glue::glue("excel/biopsy_clinical_sigdown_pct-v{ver}.xlsx"))

biop_cpm <- sm(normalize_expt(biop, convert = "cpm"))
written <- write_xlsx(data = exprs(biop_cpm),
                      excel = glue::glue("excel/biopsy_cpm_before_batch-v{ver}.xlsx"))
biop_bcpm <- sm(normalize_expt(biop, filter = TRUE, batch = "svaseq", convert = "cpm"))
written <- write_xlsx(data = exprs(biop_bcpm),
                      excel = glue::glue("excel/biopsy_cpm_after_batch-v{ver}.xlsx"))
```

#### with sva

```{r de_biopsy_sva, fig.show = "hide"}
biop_de_sva <- sm(all_pairwise(biop, model_batch = "svaseq", filter = TRUE))
biop_tables_sva <- sm(combine_de_tables(
    biop_de_sva, keepers = keepers,
    excel = glue::glue("excel/biopsy_clinical_all_tables_sva-v{ver}.xlsx")))
biop_sig_sva <- sm(extract_significant_genes(
    biop_tables_sva,
    excel = glue::glue("excel/biopsy_clinical_sig_tables_sva-v{ver}.xlsx"),
    according_to = "deseq"))
```

#### Biopsy DE plots

```{r biop_de_plots}
## DESeq2 MA plot of failure / cure
biop_tables[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure
biop_tables[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot

## DESeq2 MA plot of failure / cure
biop_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]]$plot

## DESeq2 Volcano plot of failure / cure
biop_tables_sva[["plots"]][["fail_vs_cure"]][["deseq_vol_plots"]]$plot
```

# Look for shared genes among Monocytes/Neutrophils/Eosinophils

We have three variables containing the 'significant' DE genes for the
three cell types.  For this I am choosing (for the moment) to use the
sva data.

```{r shared_by_type}
## mono_sig_sva, neut_sig_sva, eo_sig_sva
sig_vectors <- list(
    "monocytes" = c(rownames(mono_sig_sva[["deseq"]][["ups"]][["fail_vs_cure"]]),
                    rownames(mono_sig_sva[["deseq"]][["downs"]][["fail_vs_cure"]])),
    "neutrophils" = c(rownames(neut_sig_sva[["deseq"]][["ups"]][["fail_vs_cure"]]),
                      rownames(neut_sig_sva[["deseq"]][["downs"]][["fail_vs_cure"]])),
    "eosinophils" =  c(rownames(eo_sig_sva[["deseq"]][["ups"]][["fail_vs_cure"]]),
                       rownames(eo_sig_sva[["deseq"]][["downs"]][["fail_vs_cure"]])))

shared_vector <- Vennerable::Venn(Sets = sig_vectors)
Vennerable::plot(shared_vector, doWeights = FALSE)

shared_ids <- shared_vector@IntersectionSets[["111"]]
shared_expt <- exclude_genes_expt(hs_expt, ids = shared_ids, method = "keep")
shared_written <- sm(write_expt(shared_expt, excel="excel/genes_shared_across_celltypes.xlsx"))
```

# Monocytes by visit

 1. Can you please share with us a PCA (SVA and non-SVA) of the
    monocytes of the TMRC3 project, but labeling them based on the visit
    (V1, V2, V3)?
 2. Can you please share DE lists of V1 vs V2, V1 vs V3, V1 vs. V2+V3
    and V2 vs V3?

```{r monocytes_by_visit}
visit_colors <- chosen_colors <- c("#D95F02", "#7570B3", "#1B9E77")
names(visit_colors) <- c(1, 2, 3)
mono_visit <- subset_expt(hs_valid, subset = "typeofcells=='monocytes'") %>%
  set_expt_conditions(fact = "visitnumber") %>%
  set_expt_batches(fact = "clinicaloutcome") %>%
  set_expt_colors(colors = chosen_colors)

mono_visit_norm <- normalize_expt(mono_visit, filter = TRUE, norm = "quant", convert = "cpm",
                                  transform = "log2")
mono_visit_pca <- plot_pca(mono_visit_norm)
pp(file = "images/monocyte_by_visit.png", image = mono_visit_pca$plot)

mono_visit_nb <- normalize_expt(mono_visit, filter = TRUE, convert = "cpm",
                                batch = "svaseq", transform = "log2")
mono_visit_nb_pca <- plot_pca(mono_visit_nb)
pp(file = "images/monocyte_by_visit_nb.png", image = mono_visit_nb_pca$plot)

table(pData(mono_visit_norm)$batch)
```

```{r mono_visit_de, fig.show = "hide"}
keepers <- list(
    "second_vs_first" = c("c2", "c1"),
    "third_vs_second" = c("c3", "c2"),
    "third_vs_first" = c("c3", "c1"))
mono_visit_de <- all_pairwise(mono_visit, model_batch = "svaseq", filter = TRUE)

mono_visit_tables <- combine_de_tables(
    mono_visit_de,
    keepers = keepers,
    excel = glue::glue("excel/mono_visit_tables-v{ver}.xlsx"))
```

```{r v1_vs_all}
new_factor <- as.character(pData(mono_visit)[["visitnumber"]])
not_one_idx <- new_factor != 1
new_factor[not_one_idx] <- "not_1"
mono_one_vs <- set_expt_conditions(mono_visit, new_factor)

mono_one_vs_de <- all_pairwise(mono_one_vs, model_batch = "svaseq", filter = TRUE)

mono_one_vs_tables <- combine_de_tables(
    mono_one_vs_de,
    excel = glue::glue("excel/mono_one_vs_tables-v{ver}.xlsx"))
```

# Test TSP

In writing the following, I quickly realized that tspair was not
joking when it said it is intended for small numbers of genes.  For a
full expressionset of human data it is struggling.  I like the idea,
it may prove worth while to spend some time optimizing the package so
that it is more usable.

```{r tsp, eval = FALSE}
expt <- hs_clinical_nobiop

simple_tsp <- function(expt, column = "condition") {
  facts <- levels(as.factor(pData(expt)[[column]]))
  retlist <- list()
  if (length(facts) < 2) {
    stop("This requires factors with at least 2 levels.")
  } else if (length(facts) == 2) {
    retlist <- simple_tsp_pair(expt, column = column)
  } else {
    for (first in 1:(length(facts) - 1)) {
      for (second in 2:(length(facts))) {
        if (first < second) {
          name <- glue::glue("{facts[first]}_vs_{facts[second]}")
          message("Starting ", name, ".")
          substring <- glue::glue("{column}=='{facts[first]}'|{column}=='{facts[second]}'")
          subby <- subset_expt(expt, subset=as.character(substring))
          retlist[[name]] <- simple_tsp_pair(subby, column = column)
        }
      }
    }
  }
}

simple_tsp_pair <- function(subby, column = "condition", repetitions = 50) {
  tsp_input <- subby[["expressionset"]]
  tsp_output <- tspcalc(tsp_input, column)
  tsp_scores <- tspsig(tsp_input, column, B = repetitions)
}

tsp1 <- tspcalc(tsp_input, "condition")

```

```{r saveme}
if (!isTRUE(get0("skip_load"))) {
  pander::pander(sessionInfo())
  message(paste0("This is hpgltools commit: ", get_git_commit()))
  message(paste0("Saving to ", savefile))
  tmp <- sm(saveme(filename = savefile))
}
```

```{r loadme_after, eval = FALSE}
tmp <- loadme(filename = savefile)
```
