starting_sheet <- "sample_sheets/cs_samples_from_persistence_v2.1_202510.xlsx"

1 Introduction

I want to use this document to examine our first round of persistence samples. I checked my email from Najib and did not find a sample sheet but did find an explanation of the three sample types we expect.

In preparation for this, I downloaded a new hg38 genome. Since the panamensis asembly has not significantly changed (excepting the putative long read genome which I have not yet seen), I am just using the same one.

2 Loading annotation

The hg38 genome I got is brand new (202405), so do not use the archive for a while.

## Ok, so useast.ensembl is failing today, let us use the jan2024 archive?
#hs_annot <- load_biomart_annotations(archive = FALSE, species = "hsapiens")
## Seems like the 202401 archive is a good choice, it is explicitly the hg38_111 release.
## and it is waaaaay faster (like 100x) than useast right now.
hs_annot <- load_biomart_annotations(archive = TRUE, species = "hsapiens",
                                     year = 2024, month = "01")

## The biomart annotations file already exists, loading from it.

panamensis_orgdb_idx <- grep(pattern = "^org.+panamen.+MHOM.+db$",
                             x = rownames(installed.packages()))
panamensis_orgdb <- tail(rownames(installed.packages())[panamensis_orgdb_idx], n = 1)
lp_annot <- load_orgdb_annotations(panamensis_orgdb, keytype = "gid")

## Loading required package: AnnotationDbi

## Loading required package: stats4

## Loading required package: BiocGenerics

## Loading required package: generics

## 
## Attaching package: 'generics'

## The following objects are masked from 'package:base':
## 
##     as.difftime, as.factor, as.ordered, intersect, is.element, setdiff, setequal, union

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:hpgltools':
## 
##     annotation<-, conditions, conditions<-

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, saveRDS, table, tapply, unique, unsplit, which.max, which.min

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for
##     packages 'citation("pkgname")'.

## 
## Attaching package: 'Biobase'

## The following object is masked from 'package:hpgltools':
## 
##     notes

## Loading required package: IRanges

## Loading required package: S4Vectors

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:utils':
## 
##     findMatches

## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname

## 
## Attaching package: 'IRanges'

## The following object is masked from 'package:hpgltools':
## 
##     trim

##

## Unable to find CDSNAME, setting it to ANNOT_EXTERNAL_DB_NAME.

## Unable to find CDSCHROM in the db, removing it.

## Unable to find CDSSTRAND in the db, removing it.

## Unable to find CDSSTART in the db, removing it.

## Unable to find CDSEND in the db, removing it.

## Extracted all gene ids.

## Attempting to select: ANNOT_EXTERNAL_DB_NAME, GENE_TYPE

## 'select()' returned 1:1 mapping between keys and columns

This is a little silly, but I am going to reload the annotations using the previous invocation to extract the annotation table without having to think. The previous block loads the orgdb for me, so I can just use that to get the fun annotations.

lp_annot <- load_orgdb_annotations(panamensis_orgdb, keytype = "gid", fields = "^annot")

## Unable to find CDSNAME, setting it to ANNOT_EXTERNAL_DB_NAME.

## Unable to find CDSCHROM in the db, removing it.

## Unable to find CDSSTRAND in the db, removing it.

## Unable to find CDSSTART in the db, removing it.

## Unable to find CDSEND in the db, removing it.

## Extracted all gene ids.

## Attempting to select: ANNOT_EXTERNAL_DB_NAME, GENE_TYPE, ANNOT_AA_SEQUENCE_ID, ANNOT_ANNOTATED_GO_COMPONENT, ANNOT_ANNOTATED_GO_FUNCTION, ANNOT_ANNOTATED_GO_ID_COMPONENT, ANNOT_ANNOTATED_GO_ID_FUNCTION, ANNOT_ANNOTATED_GO_ID_PROCESS, ANNOT_ANNOTATED_GO_PROCESS, ANNOT_ANTICODON, ANNOT_APOLLO_LINK_OUT, ANNOT_APOLLO_TRANSCRIPT_DESCRIPTION, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_CODING_END, ANNOT_CODING_START, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_END_MAX, ANNOT_EXON_COUNT, ANNOT_EXTERNAL_DB_NAME, ANNOT_EXTERNAL_DB_VERSION, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_CONTEXT_END, ANNOT_GENE_CONTEXT_START, ANNOT_GENE_END_MAX, ANNOT_GENE_END_MAX_TEXT, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ENTREZ_LINK, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_START_MIN, ANNOT_GENE_START_MIN_TEXT, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GENOMIC_SEQUENCE_LENGTH, ANNOT_GENUS_SPECIES, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_DEPRECATED, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MAP_LOCATION, ANNOT_MCMC_LOCATION, ANNOT_MOLECULAR_WEIGHT, ANNOT_NCBI_TAX_ID, ANNOT_ORTHOMCL_LINK, ANNOT_OVERVIEW, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PRIMARY_KEY, ANNOT_PROB_MAP, ANNOT_PROB_MCMC, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_PROTEIN_SOURCE_ID, ANNOT_PSEUDO_STRING, ANNOT_SEQUENCE_DATABASE_NAME, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SNPOVERVIEW, ANNOT_SO_ID, ANNOT_SO_TERM_DEFINITION, ANNOT_SO_TERM_NAME, ANNOT_SO_VERSION, ANNOT_START_MIN, ANNOT_STRAND, ANNOT_STRAND_PLUS_MINUS, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_IDS, ANNOT_UNIPROT_LINKS

## 'select()' returned 1:1 mapping between keys and columns

3 Collect preprocessed metadata

first_spec <- make_rnaseq_spec()
second_spec <- make_dnaseq_spec()
pre_meta <- gather_preprocessing_metadata(
  starting_metadata = starting_sheet, id_column = "hpgl_identifier",
  specification = first_spec,
  basedir = "preprocessing", species = "lpanamensis_v68",
  new_metadata = "sample_sheets/cs_samples_lpanamensis.xlsx")

## Did not find the condition column in the sample sheet.

## Filling it in as undefined.

## Did not find the batch column in the sample sheet.

## Filling it in as undefined.

## Checking the state of the condition column.

## Checking the state of the batch column.

## Checking the condition factor.

## Writing new metadata to: sample_sheets/cs_samples_lpanamensis.xlsx

## Deleting the file sample_sheets/cs_samples_lpanamensis.xlsx before writing the tables.

post_meta <- gather_preprocessing_metadata(
  starting_metadata = pre_meta[["new_meta"]], id_column = "hpgl_identifier",
  specification = second_spec, basedir = "preprocessing", species = "lpanamensis_v68")

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: trimomatic_input already exists, replacing it.

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: trimomatic_output already exists, replacing it.

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: trimomatic_percent already exists, replacing it.

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: fastqc_pct_gc already exists, replacing it.

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: hisat_genome_single_concordant already exists,
## replacing it.

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: hisat_genome_multi_concordant already exists,
## replacing it.

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: hisat_genome_single_all already exists, replacing it.

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: hisat_genome_multi_all already exists, replacing it.

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: hisat_genome_percent_log already exists, replacing it.
## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: hisat_genome_percent_log already exists, replacing it.

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: hisat_alignment already exists, replacing it.

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: input_r1 already exists, replacing it.

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: input_r2 already exists, replacing it.

## Warning in gather_preprocessing_metadata(starting_metadata = pre_meta[["new_meta"]], : Column: hisat_count_table already exists, replacing it.

## Not writing new metadata.

4 Collect gene annotations

I should have all my load_xyz_annotation functions return some of the same elements in their retlists.

lp_genes <- lp_annot[["genes"]]
hg_genes <- hs_annot[["gene_annotations"]]

5 Quick peek at the CS samples, lpanamensis v68

cs_se <- create_se(post_meta[["new_meta"]], gene_info = lp_genes,
                   species = NULL, file_column = "hisat_count_table") %>%
  set_conditions(fact = "initial_recurrent") %>%
  set_batches(fact = "participant_code")

## Reading the sample metadata.

## Checking the state of the condition column.

## Checking the state of the batch column.

## Checking the condition factor.

## The sample definitions comprises: 23 rows(samples) and 111 columns(metadata fields).

## Matched 8665 annotations and counts.

## Saving the summarized experiment to 'se.rda'.

## The final summarized experiment has 8665 rows and 111 columns.

## The numbers of samples by condition are:

## 
##  I  R 
## 16  7

## The number of samples by batch are:

## 
## PP1002 PP1003 PP1004 PP1007 PP1012 PP1013 PP1014 PP1015 PP1017 PP1018 PP2004 PP2008 PP2012 PP2015 PP2016 PP2019 
##      2      2      2      2      2      2      2      1      1      1      1      1      1      1      1      1

6 nonzero/libsize/etc

plot_legend(cs_se)

## Warning in ggplot2::guide_legend(overwrite.aes = list(size = plot_size)): Arguments in `...` must be used.
## ✖ Problematic argument:
## • overwrite.aes = list(size = plot_size)
## ℹ Did you misspell an argument name?

## The colors used in the expressionset are: #1B9E77, #7570B3.

plot_libsize(cs_se)

## Library sizes of 23 samples, 
## ranging from 3,530,715 to 7,324,639.

plot_nonzero(cs_se, y_intercept = 0.99)

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the hpgltools package.
##   Please report the issue to the authors.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

## A non-zero genes plot of 23 samples.
## These samples have an average 5.022 CPM coverage and 8558 genes observed, ranging from 8534 to
## 8587.

7 Paired initial/recurrent samples transcriptome

tmp_cs <- set_conditions(cs_se, fact = "batch")

## The numbers of samples by condition are:

## 
## PP1002 PP1003 PP1004 PP1007 PP1012 PP1013 PP1014 PP1015 PP1017 PP1018 PP2004 PP2008 PP2012 PP2015 PP2016 PP2019 
##      2      2      2      2      2      2      2      1      1      1      1      1      1      1      1      1

cs_paired <- subset_se(tmp_cs, min_replicates = 2) %>%
  set_conditions(fact = "initial_recurrent") %>%
  subset_se(subset = "batch!='PP1012'")

## Removing samples with less than 2 replicates.

## Removed: PRCS0015, PRCS0016, PRCS0017, PRCS0018, PRCS0019, PRCS0020, PRCS0021, PRCS0022, PRCS0023.

## The numbers of samples by condition are:

## 
## I R 
## 7 7

cs_paired_norm <- normalize(cs_paired, transform = "log2", convert = "cpm",
                            norm = "quant", filter = TRUE)

## Removing 145 low-count genes (8520 remaining).

## transform_counts: Found 12 values equal to 0, adding 1 to the matrix.

plot_pca(cs_paired_norm)

## Warning in ggplot2::guide_legend(overwrite.aes = list(size = plot_size)): Arguments in `...` must be used.
## ✖ Problematic argument:
## • overwrite.aes = list(size = plot_size)
## ℹ Did you misspell an argument name?

## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by I, R
## Shapes are defined by PP1002, PP1003, PP1004, PP1007, PP1013, PP1014.

cs_paired_combat <- normalize(cs_paired, transform = "log2", convert = "cpm",
                              filter = TRUE, batch = "combat")

## Removing 145 low-count genes (8520 remaining).

## transform_counts: Found 45 values less than 0.

## transform_counts: Found 100 values equal to 0, adding 1 to the matrix.

plot_pca(cs_paired_combat)

## Warning in ggplot2::guide_legend(overwrite.aes = list(size = plot_size)): Arguments in `...` must be used.
## ✖ Problematic argument:
## • overwrite.aes = list(size = plot_size)
## ℹ Did you misspell an argument name?

## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by I, R
## Shapes are defined by PP1002, PP1003, PP1004, PP1007, PP1013, PP1014.

cs_paired_nb <- normalize(cs_paired, transform = "log2", convert = "cpm",
                          filter = TRUE, batch = "sva")

## Removing 145 low-count genes (8520 remaining).

## transform_counts: Found 10 values less than 0.

## transform_counts: Found 10 values equal to 0, adding 1 to the matrix.

plot_pca(cs_paired_nb)

## Warning in ggplot2::guide_legend(overwrite.aes = list(size = plot_size)): Arguments in `...` must be used.
## ✖ Problematic argument:
## • overwrite.aes = list(size = plot_size)
## ℹ Did you misspell an argument name?

## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by I, R
## Shapes are defined by PP1002, PP1003, PP1004, PP1007, PP1013, PP1014.

8 Try DE nonetheless

cs_paired_counts <- write_se(cs_paired, excel = "excel/lp_cs_paired.xlsx")

## Writing the first sheet, containing a legend and some summary data.

## Warning in as.data.frame.DataFrame(pData(se), strinsAsFactors = FALSE): arguments in '...' ignored

## 1344 entries are 0.  We are on a log scale, adding 1 to the data.

## Warning in ggplot2::guide_legend(overwrite.aes = list(size = plot_size)): Arguments in `...` must be used.
## ✖ Problematic argument:
## • overwrite.aes = list(size = plot_size)
## ℹ Did you misspell an argument name?

## Warning in ggplot2::guide_legend(overwrite.aes = list(size = plot_size)): Arguments in `...` must be used.
## ✖ Problematic argument:
## • overwrite.aes = list(size = plot_size)
## ℹ Did you misspell an argument name?

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the directlabels package.
##   Please report the issue at <https://github.com/tdhock/directlabels/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

## Naively calculating coefficient of variation/dispersion with respect to condition.

## Finished calculating dispersion estimates.

## Plot describing the gene distribution from a dataset.

## `geom_smooth()` using formula = 'y ~ x'

## Warning in ggplot2::guide_legend(overwrite.aes = list(size = plot_size)): Arguments in `...` must be used.
## ✖ Problematic argument:
## • overwrite.aes = list(size = plot_size)
## ℹ Did you misspell an argument name?

## Warning in ggplot2::guide_legend(overwrite.aes = list(size = plot_size)): Arguments in `...` must be used.
## ✖ Problematic argument:
## • overwrite.aes = list(size = plot_size)
## ℹ Did you misspell an argument name?

## Removing 145 low-count genes (8520 remaining).
## transform_counts: Found 78 values equal to 0, adding 1 to the matrix.
## Plot describing the gene distribution from a dataset.
## `geom_smooth()` using formula = 'y ~ x'The factor I has 6 rows.
## The factor R has 6 rows.

cs_paired_de <- all_pairwise(cs_paired, model_svs = "sva", model_fstring = "~ 0 + condition",
                             filter = TRUE)

## I R 
## 6 6

## Removing 145 low-count genes (8520 remaining).

## Warning in ggplot2::guide_legend(overwrite.aes = list(size = plot_size)): Arguments in `...` must be used.
## ✖ Problematic argument:
## • overwrite.aes = list(size = plot_size)
## ℹ Did you misspell an argument name?
## Arguments in `...` must be used.
## ✖ Problematic argument:
## • overwrite.aes = list(size = plot_size)
## ℹ Did you misspell an argument name?

## Basic step 0/3: Normalizing data.
## Basic step 0/3: Converting data.
## I think this is failing? SummarizedExperiment
## Basic step 0/3: Transforming data.
## Setting 238 entries to zero.
## This received a matrix of SVs.
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates

## conditions
## I R 
## 6 6

## conditions
## I R 
## 6 6

## conditions
## I R 
## 6 6

cs_paired_de

## A pairwise differential expression with results from: basic, deseq, ebseq, edger, limma, noiseq.
## This used a surrogate/batch estimate from: sva.
## The primary analysis performed 1 comparisons.
## The logFC agreement among the methods follows:

##                 R_vs_I
## basic_vs_deseq  0.5107
## basic_vs_dream  0.4496
## basic_vs_ebseq  0.8951
## basic_vs_edger  0.5122
## basic_vs_limma  0.4956
## basic_vs_noiseq 0.9554
## deseq_vs_dream  0.8761
## deseq_vs_ebseq  0.5691
## deseq_vs_edger  0.9982
## deseq_vs_limma  0.8689
## deseq_vs_noiseq 0.5606
## dream_vs_ebseq  0.4742
## dream_vs_edger  0.8845
## dream_vs_limma  0.9741
## dream_vs_noiseq 0.4768
## ebseq_vs_edger  0.5722
## ebseq_vs_limma  0.4933
## ebseq_vs_noiseq 0.9343
## edger_vs_limma  0.8756
## edger_vs_noiseq 0.5620
## limma_vs_noiseq 0.4942

cs_paired_table <- combine_de_tables(
  cs_paired_de,
  excel = "excel/lp_cs_paired_initial_vs_recurrent_table.xlsx")

## Looking for subscript invalid names, start of extract_keepers.
## Looking for subscript invalid names, end of extract_keepers.

cs_paired_table

## A set of combined differential expression results.

##    table deseq_sigup deseq_sigdown edger_sigup edger_sigdown limma_sigup limma_sigdown
## 1 R_vs_I           0             0           0             0           0             0

## Only  has information, cannot create an UpSet.
## Plot describing unique/shared genes in a differential expression table.

## NULL

cs_paired_sig <- extract_significant_genes(
  cs_paired_table,
  excel = "excel/cs_paired_initial_vs_recurrent_sig.xlsx")

## Deleting the file excel/cs_paired_initial_vs_recurrent_sig.xlsx before writing the tables.

cs_paired_sig

## A set of genes deemed significant according to limma, edger, deseq, ebseq, basic.
## The parameters defining significant were:
## LFC cutoff: 1 adj P cutoff: 0.05

##        limma_up limma_down edger_up edger_down deseq_up deseq_down ebseq_up ebseq_down basic_up basic_down
## R_vs_I        0          0        0          0        0          0        0          0        0          0

cs_bim_de <- all_pairwise(cs_paired, model_fstring = "~ 0 + condition + batch", filter = TRUE)

## I R 
## 6 6 
## PP1002 PP1003 PP1004 PP1007 PP1013 PP1014 
##      2      2      2      2      2      2

## Warning: attributes are not identical across measure variables; they will be dropped

## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the ggsankey package.
##   Please report the issue at <https://github.com/davidsjoberg/ggsankey/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

## Removing 145 low-count genes (8520 remaining).

## Warning in ggplot2::guide_legend(overwrite.aes = list(size = plot_size)): Arguments in `...` must be used.
## ✖ Problematic argument:
## • overwrite.aes = list(size = plot_size)
## ℹ Did you misspell an argument name?

## Warning in ggplot2::guide_legend(overwrite.aes = list(size = plot_size)): Arguments in `...` must be used.
## ✖ Problematic argument:
## • overwrite.aes = list(size = plot_size)
## ℹ Did you misspell an argument name?

## Basic step 0/3: Normalizing data.
## Basic step 0/3: Converting data.
## I think this is failing? SummarizedExperiment
## Basic step 0/3: Transforming data.
## Setting 238 entries to zero.
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates

## conditions
## I R 
## 6 6

## conditions
## I R 
## 6 6

## conditions
## I R 
## 6 6

cs_bim_de

## A pairwise differential expression with results from: basic, deseq, ebseq, edger, limma, noiseq.
## This used a surrogate/batch estimate from: Existing surrogate matrix.
## The primary analysis performed 1 comparisons.
## The logFC agreement among the methods follows:

##                 R_vs_I
## basic_vs_deseq  0.9575
## basic_vs_dream  0.9472
## basic_vs_ebseq  0.8951
## basic_vs_edger  0.9614
## basic_vs_limma  0.9664
## basic_vs_noiseq 0.9554
## deseq_vs_dream  0.9682
## deseq_vs_ebseq  0.9280
## deseq_vs_edger  0.9994
## deseq_vs_limma  0.9465
## deseq_vs_noiseq 0.9782
## dream_vs_ebseq  0.9313
## dream_vs_edger  0.9703
## dream_vs_limma  0.9751
## dream_vs_noiseq 0.9554
## ebseq_vs_edger  0.9320
## ebseq_vs_limma  0.9220
## ebseq_vs_noiseq 0.9343
## edger_vs_limma  0.9496
## edger_vs_noiseq 0.9799
## limma_vs_noiseq 0.9385

cs_bim_table <- combine_de_tables(cs_bim_de, excel = "excel/lp_cs_paired_de_batch_in_model.xlsx")

## Deleting the file excel/lp_cs_paired_de_batch_in_model.xlsx before writing the tables.
## Looking for subscript invalid names, start of extract_keepers.
## Looking for subscript invalid names, end of extract_keepers.

cs_bim_table

## A set of combined differential expression results.

##    table deseq_sigup deseq_sigdown edger_sigup edger_sigdown limma_sigup limma_sigdown
## 1 R_vs_I           0             0           0             0           0             0

## Only  has information, cannot create an UpSet.
## Plot describing unique/shared genes in a differential expression table.

## NULL

9 Pull out the variants

## classifications <- classify_variants(colData(cs_se), coverage_column = NULL)
round1_variant_se <- count_snps(cs_se, annot_column = "freebayes_variants_table",
                                snp_column = "PAIRED")

## Using the snp column: PAIRED from the sample annotations.

## Warning: NAs introduced by coercion

colData(round1_variant_se)[["variant_number"]] <- "low"
high_variant_samples <- colData(round1_variant_se)[["freebayes_observed"]] >= 20000
colData(round1_variant_se)[high_variant_samples, "variant_number"] <- "high"
colData(round1_variant_se)[["person_time"]] <- paste0(
  colData(round1_variant_se)[["participant_code"]], "_",
  colData(round1_variant_se)[["initial_recurrent"]])
round1_variant_se <- set_batches(round1_variant_se, fact = "initial_recurrent") %>%
  set_conditions(fact = "participant_code")

## The number of samples by batch are:

## 
##  I  R 
## 16  7

## The numbers of samples by condition are:

## 
## PP1002 PP1003 PP1004 PP1007 PP1012 PP1013 PP1014 PP1015 PP1017 PP1018 PP2004 PP2008 PP2012 PP2015 PP2016 PP2019 
##      2      2      2      2      2      2      2      1      1      1      1      1      1      1      1      1

variant_norm <- normalize(round1_variant_se, transform = "log2",
                          convert = "cpm", filter = TRUE)

## Removing 0 low-count genes (146420 remaining).

## transform_counts: Found 2390744 values equal to 0, adding 1 to the matrix.

variant_pca <- plot_pca(variant_norm)
## variant_pca <- plot_pca(variant_norm, plot_labels = TRUE, max_overlaps = 1000)
pp(file = "images/variants_pca.png")
variant_pca$plot
dev.off()

## png 
##   2

variant_pca

## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by PP1002, PP1003, PP1004, PP1007, PP1012, PP1013, PP1014, PP1015, PP1017, PP1018, PP2004, PP2008, PP2012, PP2015, PP2016, PP2019
## Shapes are defined by I, R.

9.0.1 Repeat with only the paired samples

variant_paired <- subset_se(round1_variant_se, min_replicates = 2)

## Removing samples with less than 2 replicates.

## Removed: PRCS0015, PRCS0016, PRCS0017, PRCS0018, PRCS0019, PRCS0020, PRCS0021, PRCS0022, PRCS0023.

paired_norm <- normalize(variant_paired, transform = "log2", convert = "cpm", filter = TRUE)

## Removing 0 low-count genes (146420 remaining).

## transform_counts: Found 1532229 values equal to 0, adding 1 to the matrix.

paired_pca <- plot_pca(paired_norm, plot_labels = TRUE)
pp(file = "images/paired_pca.png")
paired_pca$plot
dev.off()

## png 
##   2

paried_pca

## Error:
## ! object 'paried_pca' not found

9.1 Groups by zymodeme

variants_by_person <- set_conditions(round1_variant_se, fact = "participant_code")

## The numbers of samples by condition are:

## 
## PP1002 PP1003 PP1004 PP1007 PP1012 PP1013 PP1014 PP1015 PP1017 PP1018 PP2004 PP2008 PP2012 PP2015 PP2016 PP2019 
##      2      2      2      2      2      2      2      1      1      1      1      1      1      1      1      1

paired <- c("PP1002", "PP1003", "PP1007", "PP1012", "PP1013", "PP1014")
paired_z23 <- c("PP1013", "PP1007")
paired_z22 <- c("PP1014", "PP1003", "PP1004")
paired_z21 <- c("PP1002")
paired_z23_samples <- colData(round1_variant_se)[["participant_code"]] %in% paired_z23
paired_z23_variant <- round1_variant_se[, paired_z23_samples]
paired_z22_samples <- colData(round1_variant_se)[["participant_code"]] %in% paired_z22
paired_z22_variant <- round1_variant_se[, paired_z22_samples]
paired_z21_samples <- colData(round1_variant_se)[["participant_code"]] %in% paired_z21
paired_z21_variant <- round1_variant_se[, paired_z21_samples]

9.2 z2.3 Sets

paired_z23_snp_sets <- get_snp_sets(paired_z23_variant, factor = "person_time")

## The samples represent the following categories:

## 
## PP1007_I PP1007_R PP1013_I PP1013_R 
##        1        1        1        1

## Using a proportion of observed variants, converting the data to binary observations.

## The factor PP1007_I has only 1 row.

## The factor PP1007_R has only 1 row.

## The factor PP1013_I has only 1 row.

## The factor PP1013_R has only 1 row.

## Finished iterating over the chromosomes.

paired_z23_snp_sets[["set_names"]]

## $`0000`
## [1] ""
## 
## $`1000`
## [1] "PP1007_I"
## 
## $`0100`
## [1] "PP1007_R"
## 
## $`1100`
## [1] "PP1007_I, PP1007_R"
## 
## $`0010`
## [1] "PP1013_I"
## 
## $`1010`
## [1] "PP1007_I, PP1013_I"
## 
## $`0110`
## [1] "PP1007_R, PP1013_I"
## 
## $`1110`
## [1] "PP1007_I, PP1007_R, PP1013_I"
## 
## $`0001`
## [1] "PP1013_R"
## 
## $`1001`
## [1] "PP1007_I, PP1013_R"
## 
## $`0101`
## [1] "PP1007_R, PP1013_R"
## 
## $`1101`
## [1] "PP1007_I, PP1007_R, PP1013_R"
## 
## $`0011`
## [1] "PP1013_I, PP1013_R"
## 
## $`1011`
## [1] "PP1007_I, PP1013_I, PP1013_R"
## 
## $`0111`
## [1] "PP1007_R, PP1013_I, PP1013_R"
## 
## $`1111`
## [1] "PP1007_I, PP1007_R, PP1013_I, PP1013_R"

## 1010 are the two initial samples and 0101 the recurrent
z23_shared_initial <- paired_z23_snp_sets[["intersections"]][["1010"]]
length(z23_shared_initial)

## [1] 1252

z23_shared_recurrent <- paired_z23_snp_sets[["intersections"]][["0101"]]
length(z23_shared_recurrent)

## [1] 1060

person_1107_vars <- paired_z23_snp_sets[["intersections"]][["1100"]]
person_1013_vars <- paired_z23_snp_sets[["intersections"]][["0011"]]

z23_proportion <- get_proportion_snp_sets(paired_z23_variant, factor = "person_time")

## The samples represent the following categories:

## 
## PP1007_I PP1007_R PP1013_I PP1013_R 
##        1        1        1        1

## Using a proportion column to recast each position as a categorical, 0 (ref), 1 (heter), 2 (homo)

## The factor PP1007_I has only 1 row.

## The factor PP1007_R has only 1 row.

## The factor PP1013_I has only 1 row.

## The factor PP1013_R has only 1 row.

## Gathering the set of exclusive homozygous and heterozygous positions.

## Counting observed positions across 690 chromosomes/contigs.

## Counting homozygous positions by chromosome/contig.

## Counting heterozygous positions by chromosome/contig.

## Counting exclusive homozygous positions by chromosome/contig.

## Counting exclusive heterozygous positions by chromosome/contig.

## Counting exclusive notzygous positions by chromosome/contig.

z23_intersections <- snps_intersections(cs_se, paired_z23_snp_sets, start_column = "annot_coding_start",
                                        end_column = "annot_coding_end", chr_column = "annot_sequence_id")
## The top few genes with variants shared exclusively among the initial samples:
head(z23_intersections$gene_summaries[["PP1007_I, PP1013_I"]])

## LPAL13_270011800 LPAL13_090013100 LPAL13_250017600 LPAL13_270011600 LPAL13_310018100 LPAL13_060013800 
##               11                6                6                6                5                4

head(z23_intersections$gene_summaries[["PP1007_R, PP1013_R"]])

## LPAL13_000024200 LPAL13_100010100 LPAL13_260025500 LPAL13_260026900 LPAL13_330040200 LPAL13_340010300 
##                3                2                2                2                2                2

z23_vs_genes <- snps_vs_genes(cs_se, paired_z23_snp_sets, start_column = "annot_coding_start",
                              end_column = "annot_coding_end", chr_column = "annot_sequence_id")

## The snp grange data has 146420 elements.

## The first few snp chromosomes are: LPAL13_SCAF000001, LPAL13_SCAF000002, LPAL13_SCAF000003, LPAL13_SCAF000005, LPAL13_SCAF000007, LPAL13_SCAF000009

## The first few exp chromosomes are: LPAL13_SCAF000001, LPAL13_SCAF000010, LPAL13_SCAF000011, LPAL13_SCAF000017, LPAL13_SCAF000021, LPAL13_SCAF000025

## There are 52510 overlapping variants and genes.

head(z23_vs_genes[["count_by_gene"]])

## LPAL13_090013100 LPAL13_340060500 LPAL13_200008400 LPAL13_350020500 LPAL13_330040200 LPAL13_270011800 
##               97               86               85               73               71               70

9.3 z2.2 Sets

paired_z22_snp_sets <- get_snp_sets(paired_z22_variant, factor = "person_time")

## The samples represent the following categories:

## 
## PP1003_I PP1003_R PP1004_I PP1004_R PP1014_I PP1014_R 
##        1        1        1        1        1        1

## Using a proportion of observed variants, converting the data to binary observations.

## The factor PP1003_I has only 1 row.

## The factor PP1003_R has only 1 row.

## The factor PP1004_I has only 1 row.

## The factor PP1004_R has only 1 row.

## The factor PP1014_I has only 1 row.

## The factor PP1014_R has only 1 row.

## Finished iterating over the chromosomes.

paired_z22_snp_sets[["set_names"]]

## $`000000`
## [1] ""
## 
## $`100000`
## [1] "PP1003_I"
## 
## $`010000`
## [1] "PP1003_R"
## 
## $`110000`
## [1] "PP1003_I, PP1003_R"
## 
## $`001000`
## [1] "PP1004_I"
## 
## $`101000`
## [1] "PP1003_I, PP1004_I"
## 
## $`011000`
## [1] "PP1003_R, PP1004_I"
## 
## $`111000`
## [1] "PP1003_I, PP1003_R, PP1004_I"
## 
## $`000100`
## [1] "PP1004_R"
## 
## $`100100`
## [1] "PP1003_I, PP1004_R"
## 
## $`010100`
## [1] "PP1003_R, PP1004_R"
## 
## $`110100`
## [1] "PP1003_I, PP1003_R, PP1004_R"
## 
## $`001100`
## [1] "PP1004_I, PP1004_R"
## 
## $`101100`
## [1] "PP1003_I, PP1004_I, PP1004_R"
## 
## $`011100`
## [1] "PP1003_R, PP1004_I, PP1004_R"
## 
## $`111100`
## [1] "PP1003_I, PP1003_R, PP1004_I, PP1004_R"
## 
## $`000010`
## [1] "PP1014_I"
## 
## $`100010`
## [1] "PP1003_I, PP1014_I"
## 
## $`010010`
## [1] "PP1003_R, PP1014_I"
## 
## $`110010`
## [1] "PP1003_I, PP1003_R, PP1014_I"
## 
## $`001010`
## [1] "PP1004_I, PP1014_I"
## 
## $`101010`
## [1] "PP1003_I, PP1004_I, PP1014_I"
## 
## $`011010`
## [1] "PP1003_R, PP1004_I, PP1014_I"
## 
## $`111010`
## [1] "PP1003_I, PP1003_R, PP1004_I, PP1014_I"
## 
## $`000110`
## [1] "PP1004_R, PP1014_I"
## 
## $`100110`
## [1] "PP1003_I, PP1004_R, PP1014_I"
## 
## $`010110`
## [1] "PP1003_R, PP1004_R, PP1014_I"
## 
## $`110110`
## [1] "PP1003_I, PP1003_R, PP1004_R, PP1014_I"
## 
## $`001110`
## [1] "PP1004_I, PP1004_R, PP1014_I"
## 
## $`101110`
## [1] "PP1003_I, PP1004_I, PP1004_R, PP1014_I"
## 
## $`011110`
## [1] "PP1003_R, PP1004_I, PP1004_R, PP1014_I"
## 
## $`111110`
## [1] "PP1003_I, PP1003_R, PP1004_I, PP1004_R, PP1014_I"
## 
## $`000001`
## [1] "PP1014_R"
## 
## $`100001`
## [1] "PP1003_I, PP1014_R"
## 
## $`010001`
## [1] "PP1003_R, PP1014_R"
## 
## $`110001`
## [1] "PP1003_I, PP1003_R, PP1014_R"
## 
## $`001001`
## [1] "PP1004_I, PP1014_R"
## 
## $`101001`
## [1] "PP1003_I, PP1004_I, PP1014_R"
## 
## $`011001`
## [1] "PP1003_R, PP1004_I, PP1014_R"
## 
## $`111001`
## [1] "PP1003_I, PP1003_R, PP1004_I, PP1014_R"
## 
## $`000101`
## [1] "PP1004_R, PP1014_R"
## 
## $`100101`
## [1] "PP1003_I, PP1004_R, PP1014_R"
## 
## $`010101`
## [1] "PP1003_R, PP1004_R, PP1014_R"
## 
## $`110101`
## [1] "PP1003_I, PP1003_R, PP1004_R, PP1014_R"
## 
## $`001101`
## [1] "PP1004_I, PP1004_R, PP1014_R"
## 
## $`101101`
## [1] "PP1003_I, PP1004_I, PP1004_R, PP1014_R"
## 
## $`011101`
## [1] "PP1003_R, PP1004_I, PP1004_R, PP1014_R"
## 
## $`111101`
## [1] "PP1003_I, PP1003_R, PP1004_I, PP1004_R, PP1014_R"
## 
## $`000011`
## [1] "PP1014_I, PP1014_R"
## 
## $`100011`
## [1] "PP1003_I, PP1014_I, PP1014_R"
## 
## $`010011`
## [1] "PP1003_R, PP1014_I, PP1014_R"
## 
## $`110011`
## [1] "PP1003_I, PP1003_R, PP1014_I, PP1014_R"
## 
## $`001011`
## [1] "PP1004_I, PP1014_I, PP1014_R"
## 
## $`101011`
## [1] "PP1003_I, PP1004_I, PP1014_I, PP1014_R"
## 
## $`011011`
## [1] "PP1003_R, PP1004_I, PP1014_I, PP1014_R"
## 
## $`111011`
## [1] "PP1003_I, PP1003_R, PP1004_I, PP1014_I, PP1014_R"
## 
## $`000111`
## [1] "PP1004_R, PP1014_I, PP1014_R"
## 
## $`100111`
## [1] "PP1003_I, PP1004_R, PP1014_I, PP1014_R"
## 
## $`010111`
## [1] "PP1003_R, PP1004_R, PP1014_I, PP1014_R"
## 
## $`110111`
## [1] "PP1003_I, PP1003_R, PP1004_R, PP1014_I, PP1014_R"
## 
## $`001111`
## [1] "PP1004_I, PP1004_R, PP1014_I, PP1014_R"
## 
## $`101111`
## [1] "PP1003_I, PP1004_I, PP1004_R, PP1014_I, PP1014_R"
## 
## $`011111`
## [1] "PP1003_R, PP1004_I, PP1004_R, PP1014_I, PP1014_R"
## 
## $`111111`
## [1] "PP1003_I, PP1003_R, PP1004_I, PP1004_R, PP1014_I, PP1014_R"

## 101010 are the initials once again
## 010101 are the recurrent samples once again
z22_shared_initial <- paired_z22_snp_sets[["intersections"]][["101010"]]
length(z22_shared_initial)

## [1] 25

z22_shared_recurrent <- paired_z22_snp_sets[["intersections"]][["010101"]]
length(z22_shared_recurrent)

## [1] 28

z22_proportion <- get_proportion_snp_sets(paired_z22_variant, factor = "person_time")

## The samples represent the following categories:

## 
## PP1003_I PP1003_R PP1004_I PP1004_R PP1014_I PP1014_R 
##        1        1        1        1        1        1

## Using a proportion column to recast each position as a categorical, 0 (ref), 1 (heter), 2 (homo)

## The factor PP1003_I has only 1 row.

## The factor PP1003_R has only 1 row.

## The factor PP1004_I has only 1 row.

## The factor PP1004_R has only 1 row.

## The factor PP1014_I has only 1 row.

## The factor PP1014_R has only 1 row.

## Gathering the set of exclusive homozygous and heterozygous positions.

## Counting observed positions across 690 chromosomes/contigs.

## Counting homozygous positions by chromosome/contig.

## Counting heterozygous positions by chromosome/contig.

## Counting exclusive homozygous positions by chromosome/contig.

## Counting exclusive heterozygous positions by chromosome/contig.

## Counting exclusive notzygous positions by chromosome/contig.

z22_intersections <- snps_intersections(cs_se, paired_z22_snp_sets, start_column = "annot_coding_start",
                                        end_column = "annot_coding_end", chr_column = "annot_sequence_id")
z22_vs_genes <- snps_vs_genes(cs_se, paired_z22_snp_sets, start_column = "annot_coding_start",
                              end_column = "annot_coding_end", chr_column = "annot_sequence_id")

## The snp grange data has 146420 elements.

## The first few snp chromosomes are: LPAL13_SCAF000001, LPAL13_SCAF000002, LPAL13_SCAF000003, LPAL13_SCAF000005, LPAL13_SCAF000007, LPAL13_SCAF000009

## The first few exp chromosomes are: LPAL13_SCAF000001, LPAL13_SCAF000010, LPAL13_SCAF000011, LPAL13_SCAF000017, LPAL13_SCAF000021, LPAL13_SCAF000025

## There are 52510 overlapping variants and genes.

9.4 z2.1 Sets

z21_proportion <- get_proportion_snp_sets(paired_z21_variant, factor = "person_time")

## The samples represent the following categories:

## 
## PP1002_I PP1002_R 
##        1        1

## Using a proportion column to recast each position as a categorical, 0 (ref), 1 (heter), 2 (homo)

## The factor PP1002_I has only 1 row.

## The factor PP1002_R has only 1 row.

## Gathering the set of exclusive homozygous and heterozygous positions.

## Counting observed positions across 690 chromosomes/contigs.

## Counting homozygous positions by chromosome/contig.

## Counting heterozygous positions by chromosome/contig.

## Counting exclusive homozygous positions by chromosome/contig.

## Counting exclusive heterozygous positions by chromosome/contig.

## Counting exclusive notzygous positions by chromosome/contig.

10 Explicit I/R for individuals using their individual genomes

In the following I hope to perform comparisons of the initial->recurrent comparisons of each individual against his/her own initial strain. I therefore created a series of files with high/low stringency filters for the AB tag produced by freebayes.

The relevant columns are ‘controlled_strain_high_filter’ and ‘controlled_strain_low_filter’.

## Note to self, 'high_filter' is the low stringency.
round2_high_variants <- count_snps(cs_se, annot_column = "controlled_strain_high_filter",
                                   snp_column = "AB", scale = c(1,0)) %>%
  set_batches(fact = "pca_zymodeme") %>%
  set_conditions(fact = "initial_recurrent")

## Using the snp column: AB from the sample annotations.

## The number of samples by batch are:

## 
## Z2.1 Z2.2 Z2.3 
##    3    6    5

## The numbers of samples by condition are:

## 
## I R 
## 7 7

round2_norm <- normalize(round2_high_variants, convert = "cpm", filter = TRUE)

## Removing 0 low-count genes (34504 remaining).

plot_pca(round2_norm)

## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by I, R
## Shapes are defined by Z2.1, Z2.2, Z2.3.

## Warning in MASS::cov.trob(data[, vars], wt = weight * nrow(data)): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars], wt = weight * nrow(data)): Probable convergence failure

high_sets <- get_snp_sets(round2_high_variants, factor = "initial_recurrent")

## The samples represent the following categories:

## 
## I R 
## 7 7

## Using a proportion of observed variants, converting the data to binary observations.

## The factor I has 7 rows.

## The factor R has 7 rows.

## Finished iterating over the chromosomes.

Ok, I have a problem, the AB column is from 0->1 where 0 is a high-confidence hit. I think, therefore, that I must do a 1-x to the result of count_snps.

round2_low_variants <- count_snps(cs_se, annot_column = "controlled_strain_low_filter",
                                  snp_column = "AB", scale = c(1,0)) %>%
  set_batches(fact = "pca_zymodeme")

## Using the snp column: AB from the sample annotations.

## The number of samples by batch are:

## 
## Z2.1 Z2.2 Z2.3 
##    3    6    5

plot_libsize(round2_low_variants)

## Library sizes of 14 samples, 
## ranging from 335.2 to 6,007.

round2_norm <- normalize(round2_low_variants, convert = "cpm", filter = TRUE, plot_labels = TRUE,
                         expt_names = "participant_code", max_overlaps = 1000)

## Removing 0 low-count genes (18764 remaining).

plot_pca(round2_norm)

## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by I, R
## Shapes are defined by Z2.1, Z2.2, Z2.3.

## Warning in MASS::cov.trob(data[, vars], wt = weight * nrow(data)): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars], wt = weight * nrow(data)): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars], wt = weight * nrow(data)): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars], wt = weight * nrow(data)): Probable convergence failure

Collect the variants by initial/recurrent

low_sets <- get_snp_sets(round2_low_variants, factor = "initial_recurrent")

## The samples represent the following categories:

## 
## I R 
## 7 7

## Using a proportion of observed variants, converting the data to binary observations.

## The factor I has 7 rows.

## The factor R has 7 rows.

## Finished iterating over the chromosomes.

low_sets[["set_names"]]

## $`00`
## [1] ""
## 
## $`10`
## [1] "I"
## 
## $`01`
## [1] "R"
## 
## $`11`
## [1] "I, R"

head(low_sets[["intersections"]][["10"]], n = 20)

## [1] "chr_LpaL13-20.1_pos_347915_ref_G_alt_T" "chr_LpaL13-20.1_pos_347924_ref_G_alt_T" "chr_LpaL13-34_pos_1237188_ref_T_alt_G"

low_sets[["intersections"]][["01"]]

## NULL

low_sets[["intersections"]][["11"]]

## [1] "chr_LpaL13-26_pos_938857_ref_C_alt_A"

low_sets provides a set of coordinates and their state with respect to if they were observed in only initial/recurrent/both.

get_proportion_snp_sets() should give us a catalog of variants by chromosome/contig with respect to sample type. There is a bit of a problem here: usually the tag I use is higher for more confident values, but AB is lower for more confident values; I need to do something to fix this…

low_sets_prop <- get_proportion_snp_sets(round2_low_variants, factor = "initial_recurrent")

## The samples represent the following categories:

## 
## I R 
## 7 7

## Using a proportion column to recast each position as a categorical, 0 (ref), 1 (heter), 2 (homo)

## The factor I has 7 rows.

## The factor R has 7 rows.

## Gathering the set of exclusive homozygous and heterozygous positions.

## Counting observed positions across 468 chromosomes/contigs.

## Counting homozygous positions by chromosome/contig.

## Counting heterozygous positions by chromosome/contig.

## Counting exclusive homozygous positions by chromosome/contig.

## Counting exclusive heterozygous positions by chromosome/contig.

## Counting exclusive notzygous positions by chromosome/contig.

low_intersection <- snps_intersections(
  cs_se, low_sets, start_column = "annot_coding_start",
  end_column = "annot_coding_end", chr_column = "annot_sequence_id")
low_intersection

## The combinations of variants, chromosomes, and genes which are unique to every factor
## and combination of factors in the data.

low_vs_genes <- snps_vs_genes(cs_se, low_sets, start_column = "annot_coding_start",
                              end_column = "annot_coding_end", chr_column = "annot_sequence_id")

## The snp grange data has 18764 elements.

## The first few snp chromosomes are: LPAL13_SCAF000002, LPAL13_SCAF000003, LPAL13_SCAF000005, LPAL13_SCAF000009, LPAL13_SCAF000010, LPAL13_SCAF000011

## The first few exp chromosomes are: LPAL13_SCAF000001, LPAL13_SCAF000010, LPAL13_SCAF000011, LPAL13_SCAF000017, LPAL13_SCAF000021, LPAL13_SCAF000025

## There are 5598 overlapping variants and genes.

LS0tCnRpdGxlOiAiRXhhbWluaW5nIHBlcnNpc3RlbmNlIGNsaW5pY2FsIHNhbXBsZXM6IDIwMjUxMCIKYXV0aG9yOiAiYXRiIGFiZWxld0BnbWFpbC5jb20iCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCIKYmlibGlvZ3JhcGh5OiBhdGIuYmliCm91dHB1dDoKICBodG1sX2RvY3VtZW50OgogICAgY29kZV9kb3dubG9hZDogdHJ1ZQogICAgY29kZV9mb2xkaW5nOiBzaG93CiAgICBmaWdfY2FwdGlvbjogdHJ1ZQogICAgZmlnX2hlaWdodDogNwogICAgZmlnX3dpZHRoOiA3CiAgICBoaWdobGlnaHQ6IHplbmJ1cm4KICAgIGtlZXBfbWQ6IGZhbHNlCiAgICBtb2RlOiBzZWxmY29udGFpbmVkCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIHNlbGZfY29udGFpbmVkOiB0cnVlCiAgICB0aGVtZTogcmVhZGFibGUKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OgogICAgICBjb2xsYXBzZWQ6IGZhbHNlCiAgICAgIHNtb290aF9zY3JvbGw6IGZhbHNlCi0tLQoKPHN0eWxlIHR5cGU9InRleHQvY3NzIj4KYm9keSwgdGQgewogIGZvbnQtc2l6ZTogMTZweDsKfQpjb2RlLnJ7CiAgZm9udC1zaXplOiAxNnB4Owp9CnByZSB7CiAgZm9udC1zaXplOiAxNnB4Cn0KYm9keSAubWFpbi1jb250YWluZXIgewogIG1heC13aWR0aDogMTYwMHB4Owp9Cjwvc3R5bGU+CgpgYGB7ciBvcHRpb25zLCBpbmNsdWRlPUZBTFNFfQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoaHBnbHRvb2xzKQpsaWJyYXJ5KHJldGljdWxhdGUpCnR0IDwtIGRldnRvb2xzOjpsb2FkX2FsbCgifi9ocGdsdG9vbHMiKQprbml0cjo6b3B0c19rbml0JHNldCgKICBwcm9ncmVzcyA9IFRSVUUsIHZlcmJvc2UgPSBUUlVFLCB3aWR0aCA9IDkwLCBlY2hvID0gVFJVRSkKa25pdHI6Om9wdHNfY2h1bmskc2V0KAogIGVycm9yID0gVFJVRSwgZmlnLndpZHRoID0gOCwgZmlnLmhlaWdodCA9IDgsIGZpZy5yZXRpbmEgPSAyLAogIG91dC53aWR0aCA9ICIxMDAlIiwgZGV2ID0gInBuZyIsCiAgZGV2LmFyZ3MgPSBsaXN0KHBuZyA9IGxpc3QodHlwZSA9ICJjYWlyby1wbmciKSkpCm9sZF9vcHRpb25zIDwtIG9wdGlvbnMoZGlnaXRzID0gNCwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFLCBrbml0ci5kdXBsaWNhdGUubGFiZWwgPSAiYWxsb3ciKQpnZ3Bsb3QyOjp0aGVtZV9zZXQoZ2dwbG90Mjo6dGhlbWVfYncoYmFzZV9zaXplID0gMTIpKQp2ZXIgPC0gIjIwMjQxMCIKcHJldmlvdXNfZmlsZSA8LSAiIgp2ZXIgPC0gZm9ybWF0KFN5cy5EYXRlKCksICIlWSVtJWQiKQoKIyN0bXAgPC0gc20obG9hZG1lKGZpbGVuYW1lPXBhc3RlMChnc3ViKHBhdHRlcm49IlxcLlJtZCIsIHJlcGxhY2U9IiIsIHg9cHJldmlvdXNfZmlsZSksICItdiIsIHZlciwgIi5yZGEueHoiKSkpCnJtZF9maWxlIDwtICJjc19zYW1wbGVzXzIwMjUxMC5SbWQiCmBgYAoKYGBge3J9CnN0YXJ0aW5nX3NoZWV0IDwtICJzYW1wbGVfc2hlZXRzL2NzX3NhbXBsZXNfZnJvbV9wZXJzaXN0ZW5jZV92Mi4xXzIwMjUxMC54bHN4IgpgYGAKCiMgSW50cm9kdWN0aW9uCgpJIHdhbnQgdG8gdXNlIHRoaXMgZG9jdW1lbnQgdG8gZXhhbWluZSBvdXIgZmlyc3Qgcm91bmQgb2YgcGVyc2lzdGVuY2UKc2FtcGxlcy4gIEkgY2hlY2tlZCBteSBlbWFpbCBmcm9tIE5hamliIGFuZCBkaWQgbm90IGZpbmQgYSBzYW1wbGUKc2hlZXQgYnV0IGRpZCBmaW5kIGFuIGV4cGxhbmF0aW9uIG9mIHRoZSB0aHJlZSBzYW1wbGUgdHlwZXMgd2UgZXhwZWN0LgoKSW4gcHJlcGFyYXRpb24gZm9yIHRoaXMsIEkgZG93bmxvYWRlZCBhIG5ldyBoZzM4IGdlbm9tZS4gIFNpbmNlIHRoZQpwYW5hbWVuc2lzIGFzZW1ibHkgaGFzIG5vdCBzaWduaWZpY2FudGx5IGNoYW5nZWQgKGV4Y2VwdGluZyB0aGUKcHV0YXRpdmUgbG9uZyByZWFkIGdlbm9tZSB3aGljaCBJIGhhdmUgbm90IHlldCBzZWVuKSwgSSBhbSBqdXN0IHVzaW5nCnRoZSBzYW1lIG9uZS4KCiMgTG9hZGluZyBhbm5vdGF0aW9uCgpUaGUgaGczOCBnZW5vbWUgSSBnb3QgaXMgYnJhbmQgbmV3ICgyMDI0MDUpLCBzbyBkbyBub3QgdXNlIHRoZSBhcmNoaXZlCmZvciBhIHdoaWxlLgoKYGBge3J9CiMjIE9rLCBzbyB1c2Vhc3QuZW5zZW1ibCBpcyBmYWlsaW5nIHRvZGF5LCBsZXQgdXMgdXNlIHRoZSBqYW4yMDI0IGFyY2hpdmU/CiNoc19hbm5vdCA8LSBsb2FkX2Jpb21hcnRfYW5ub3RhdGlvbnMoYXJjaGl2ZSA9IEZBTFNFLCBzcGVjaWVzID0gImhzYXBpZW5zIikKIyMgU2VlbXMgbGlrZSB0aGUgMjAyNDAxIGFyY2hpdmUgaXMgYSBnb29kIGNob2ljZSwgaXQgaXMgZXhwbGljaXRseSB0aGUgaGczOF8xMTEgcmVsZWFzZS4KIyMgYW5kIGl0IGlzIHdhYWFhYXkgZmFzdGVyIChsaWtlIDEwMHgpIHRoYW4gdXNlYXN0IHJpZ2h0IG5vdy4KaHNfYW5ub3QgPC0gbG9hZF9iaW9tYXJ0X2Fubm90YXRpb25zKGFyY2hpdmUgPSBUUlVFLCBzcGVjaWVzID0gImhzYXBpZW5zIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHllYXIgPSAyMDI0LCBtb250aCA9ICIwMSIpCgpwYW5hbWVuc2lzX29yZ2RiX2lkeCA8LSBncmVwKHBhdHRlcm4gPSAiXm9yZy4rcGFuYW1lbi4rTUhPTS4rZGIkIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICB4ID0gcm93bmFtZXMoaW5zdGFsbGVkLnBhY2thZ2VzKCkpKQpwYW5hbWVuc2lzX29yZ2RiIDwtIHRhaWwocm93bmFtZXMoaW5zdGFsbGVkLnBhY2thZ2VzKCkpW3BhbmFtZW5zaXNfb3JnZGJfaWR4XSwgbiA9IDEpCmxwX2Fubm90IDwtIGxvYWRfb3JnZGJfYW5ub3RhdGlvbnMocGFuYW1lbnNpc19vcmdkYiwga2V5dHlwZSA9ICJnaWQiKQpgYGAKClRoaXMgaXMgYSBsaXR0bGUgc2lsbHksIGJ1dCBJIGFtIGdvaW5nIHRvIHJlbG9hZCB0aGUgYW5ub3RhdGlvbnMgdXNpbmcKdGhlIHByZXZpb3VzIGludm9jYXRpb24gdG8gZXh0cmFjdCB0aGUgYW5ub3RhdGlvbiB0YWJsZSB3aXRob3V0IGhhdmluZwp0byB0aGluay4gIFRoZSBwcmV2aW91cyBibG9jayBsb2FkcyB0aGUgb3JnZGIgZm9yIG1lLCBzbyBJIGNhbiBqdXN0CnVzZSB0aGF0IHRvIGdldCB0aGUgZnVuIGFubm90YXRpb25zLgoKYGBge3J9CmxwX2Fubm90IDwtIGxvYWRfb3JnZGJfYW5ub3RhdGlvbnMocGFuYW1lbnNpc19vcmdkYiwga2V5dHlwZSA9ICJnaWQiLCBmaWVsZHMgPSAiXmFubm90IikKYGBgCgojIENvbGxlY3QgcHJlcHJvY2Vzc2VkIG1ldGFkYXRhCgpgYGB7cn0KZmlyc3Rfc3BlYyA8LSBtYWtlX3JuYXNlcV9zcGVjKCkKc2Vjb25kX3NwZWMgPC0gbWFrZV9kbmFzZXFfc3BlYygpCnByZV9tZXRhIDwtIGdhdGhlcl9wcmVwcm9jZXNzaW5nX21ldGFkYXRhKAogIHN0YXJ0aW5nX21ldGFkYXRhID0gc3RhcnRpbmdfc2hlZXQsIGlkX2NvbHVtbiA9ICJocGdsX2lkZW50aWZpZXIiLAogIHNwZWNpZmljYXRpb24gPSBmaXJzdF9zcGVjLAogIGJhc2VkaXIgPSAicHJlcHJvY2Vzc2luZyIsIHNwZWNpZXMgPSAibHBhbmFtZW5zaXNfdjY4IiwKICBuZXdfbWV0YWRhdGEgPSAic2FtcGxlX3NoZWV0cy9jc19zYW1wbGVzX2xwYW5hbWVuc2lzLnhsc3giKQpwb3N0X21ldGEgPC0gZ2F0aGVyX3ByZXByb2Nlc3NpbmdfbWV0YWRhdGEoCiAgc3RhcnRpbmdfbWV0YWRhdGEgPSBwcmVfbWV0YVtbIm5ld19tZXRhIl1dLCBpZF9jb2x1bW4gPSAiaHBnbF9pZGVudGlmaWVyIiwKICBzcGVjaWZpY2F0aW9uID0gc2Vjb25kX3NwZWMsIGJhc2VkaXIgPSAicHJlcHJvY2Vzc2luZyIsIHNwZWNpZXMgPSAibHBhbmFtZW5zaXNfdjY4IikKYGBgCgojIENvbGxlY3QgZ2VuZSBhbm5vdGF0aW9ucwoKSSBzaG91bGQgaGF2ZSBhbGwgbXkgbG9hZF94eXpfYW5ub3RhdGlvbiBmdW5jdGlvbnMgcmV0dXJuIHNvbWUgb2YgdGhlCnNhbWUgZWxlbWVudHMgaW4gdGhlaXIgcmV0bGlzdHMuCgpgYGB7cn0KbHBfZ2VuZXMgPC0gbHBfYW5ub3RbWyJnZW5lcyJdXQpoZ19nZW5lcyA8LSBoc19hbm5vdFtbImdlbmVfYW5ub3RhdGlvbnMiXV0KYGBgCgojIFF1aWNrIHBlZWsgYXQgdGhlIENTIHNhbXBsZXMsIGxwYW5hbWVuc2lzIHY2OAoKYGBge3J9CmNzX3NlIDwtIGNyZWF0ZV9zZShwb3N0X21ldGFbWyJuZXdfbWV0YSJdXSwgZ2VuZV9pbmZvID0gbHBfZ2VuZXMsCiAgICAgICAgICAgICAgICAgICBzcGVjaWVzID0gTlVMTCwgZmlsZV9jb2x1bW4gPSAiaGlzYXRfY291bnRfdGFibGUiKSAlPiUKICBzZXRfY29uZGl0aW9ucyhmYWN0ID0gImluaXRpYWxfcmVjdXJyZW50IikgJT4lCiAgc2V0X2JhdGNoZXMoZmFjdCA9ICJwYXJ0aWNpcGFudF9jb2RlIikKYGBgCgojIG5vbnplcm8vbGlic2l6ZS9ldGMKCmBgYHtyfQpwbG90X2xlZ2VuZChjc19zZSkKcGxvdF9saWJzaXplKGNzX3NlKQpwbG90X25vbnplcm8oY3Nfc2UsIHlfaW50ZXJjZXB0ID0gMC45OSkKYGBgCgojIFBhaXJlZCBpbml0aWFsL3JlY3VycmVudCBzYW1wbGVzIHRyYW5zY3JpcHRvbWUKCmBgYHtyfQp0bXBfY3MgPC0gc2V0X2NvbmRpdGlvbnMoY3Nfc2UsIGZhY3QgPSAiYmF0Y2giKQpjc19wYWlyZWQgPC0gc3Vic2V0X3NlKHRtcF9jcywgbWluX3JlcGxpY2F0ZXMgPSAyKSAlPiUKICBzZXRfY29uZGl0aW9ucyhmYWN0ID0gImluaXRpYWxfcmVjdXJyZW50IikgJT4lCiAgc3Vic2V0X3NlKHN1YnNldCA9ICJiYXRjaCE9J1BQMTAxMiciKQoKY3NfcGFpcmVkX25vcm0gPC0gbm9ybWFsaXplKGNzX3BhaXJlZCwgdHJhbnNmb3JtID0gImxvZzIiLCBjb252ZXJ0ID0gImNwbSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBub3JtID0gInF1YW50IiwgZmlsdGVyID0gVFJVRSkKcGxvdF9wY2EoY3NfcGFpcmVkX25vcm0pCmNzX3BhaXJlZF9jb21iYXQgPC0gbm9ybWFsaXplKGNzX3BhaXJlZCwgdHJhbnNmb3JtID0gImxvZzIiLCBjb252ZXJ0ID0gImNwbSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGZpbHRlciA9IFRSVUUsIGJhdGNoID0gImNvbWJhdCIpCnBsb3RfcGNhKGNzX3BhaXJlZF9jb21iYXQpCgpjc19wYWlyZWRfbmIgPC0gbm9ybWFsaXplKGNzX3BhaXJlZCwgdHJhbnNmb3JtID0gImxvZzIiLCBjb252ZXJ0ID0gImNwbSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgZmlsdGVyID0gVFJVRSwgYmF0Y2ggPSAic3ZhIikKcGxvdF9wY2EoY3NfcGFpcmVkX25iKQpgYGAKCiMgVHJ5IERFIG5vbmV0aGVsZXNzCgpgYGB7cn0KY3NfcGFpcmVkX2NvdW50cyA8LSB3cml0ZV9zZShjc19wYWlyZWQsIGV4Y2VsID0gImV4Y2VsL2xwX2NzX3BhaXJlZC54bHN4IikKY3NfcGFpcmVkX2RlIDwtIGFsbF9wYWlyd2lzZShjc19wYWlyZWQsIG1vZGVsX3N2cyA9ICJzdmEiLCBtb2RlbF9mc3RyaW5nID0gIn4gMCArIGNvbmRpdGlvbiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZmlsdGVyID0gVFJVRSkKY3NfcGFpcmVkX2RlCmNzX3BhaXJlZF90YWJsZSA8LSBjb21iaW5lX2RlX3RhYmxlcygKICBjc19wYWlyZWRfZGUsCiAgZXhjZWwgPSAiZXhjZWwvbHBfY3NfcGFpcmVkX2luaXRpYWxfdnNfcmVjdXJyZW50X3RhYmxlLnhsc3giKQpjc19wYWlyZWRfdGFibGUKY3NfcGFpcmVkX3NpZyA8LSBleHRyYWN0X3NpZ25pZmljYW50X2dlbmVzKAogIGNzX3BhaXJlZF90YWJsZSwKICBleGNlbCA9ICJleGNlbC9jc19wYWlyZWRfaW5pdGlhbF92c19yZWN1cnJlbnRfc2lnLnhsc3giKQpjc19wYWlyZWRfc2lnCgpjc19iaW1fZGUgPC0gYWxsX3BhaXJ3aXNlKGNzX3BhaXJlZCwgbW9kZWxfZnN0cmluZyA9ICJ+IDAgKyBjb25kaXRpb24gKyBiYXRjaCIsIGZpbHRlciA9IFRSVUUpCmNzX2JpbV9kZQpjc19iaW1fdGFibGUgPC0gY29tYmluZV9kZV90YWJsZXMoY3NfYmltX2RlLCBleGNlbCA9ICJleGNlbC9scF9jc19wYWlyZWRfZGVfYmF0Y2hfaW5fbW9kZWwueGxzeCIpCmNzX2JpbV90YWJsZQpgYGAKCiMgUHVsbCBvdXQgdGhlIHZhcmlhbnRzCgpgYGB7cn0KIyMgY2xhc3NpZmljYXRpb25zIDwtIGNsYXNzaWZ5X3ZhcmlhbnRzKGNvbERhdGEoY3Nfc2UpLCBjb3ZlcmFnZV9jb2x1bW4gPSBOVUxMKQpyb3VuZDFfdmFyaWFudF9zZSA8LSBjb3VudF9zbnBzKGNzX3NlLCBhbm5vdF9jb2x1bW4gPSAiZnJlZWJheWVzX3ZhcmlhbnRzX3RhYmxlIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzbnBfY29sdW1uID0gIlBBSVJFRCIpCmNvbERhdGEocm91bmQxX3ZhcmlhbnRfc2UpW1sidmFyaWFudF9udW1iZXIiXV0gPC0gImxvdyIKaGlnaF92YXJpYW50X3NhbXBsZXMgPC0gY29sRGF0YShyb3VuZDFfdmFyaWFudF9zZSlbWyJmcmVlYmF5ZXNfb2JzZXJ2ZWQiXV0gPj0gMjAwMDAKY29sRGF0YShyb3VuZDFfdmFyaWFudF9zZSlbaGlnaF92YXJpYW50X3NhbXBsZXMsICJ2YXJpYW50X251bWJlciJdIDwtICJoaWdoIgpjb2xEYXRhKHJvdW5kMV92YXJpYW50X3NlKVtbInBlcnNvbl90aW1lIl1dIDwtIHBhc3RlMCgKICBjb2xEYXRhKHJvdW5kMV92YXJpYW50X3NlKVtbInBhcnRpY2lwYW50X2NvZGUiXV0sICJfIiwKICBjb2xEYXRhKHJvdW5kMV92YXJpYW50X3NlKVtbImluaXRpYWxfcmVjdXJyZW50Il1dKQpyb3VuZDFfdmFyaWFudF9zZSA8LSBzZXRfYmF0Y2hlcyhyb3VuZDFfdmFyaWFudF9zZSwgZmFjdCA9ICJpbml0aWFsX3JlY3VycmVudCIpICU+JQogIHNldF9jb25kaXRpb25zKGZhY3QgPSAicGFydGljaXBhbnRfY29kZSIpCmBgYAoKYGBge3J9CnZhcmlhbnRfbm9ybSA8LSBub3JtYWxpemUocm91bmQxX3ZhcmlhbnRfc2UsIHRyYW5zZm9ybSA9ICJsb2cyIiwKICAgICAgICAgICAgICAgICAgICAgICAgICBjb252ZXJ0ID0gImNwbSIsIGZpbHRlciA9IFRSVUUpCnZhcmlhbnRfcGNhIDwtIHBsb3RfcGNhKHZhcmlhbnRfbm9ybSkKIyMgdmFyaWFudF9wY2EgPC0gcGxvdF9wY2EodmFyaWFudF9ub3JtLCBwbG90X2xhYmVscyA9IFRSVUUsIG1heF9vdmVybGFwcyA9IDEwMDApCnBwKGZpbGUgPSAiaW1hZ2VzL3ZhcmlhbnRzX3BjYS5wbmciKQp2YXJpYW50X3BjYSRwbG90CmRldi5vZmYoKQp2YXJpYW50X3BjYQpgYGAKCiMjIyBSZXBlYXQgd2l0aCBvbmx5IHRoZSBwYWlyZWQgc2FtcGxlcwoKYGBge3J9CnZhcmlhbnRfcGFpcmVkIDwtIHN1YnNldF9zZShyb3VuZDFfdmFyaWFudF9zZSwgbWluX3JlcGxpY2F0ZXMgPSAyKQpwYWlyZWRfbm9ybSA8LSBub3JtYWxpemUodmFyaWFudF9wYWlyZWQsIHRyYW5zZm9ybSA9ICJsb2cyIiwgY29udmVydCA9ICJjcG0iLCBmaWx0ZXIgPSBUUlVFKQpwYWlyZWRfcGNhIDwtIHBsb3RfcGNhKHBhaXJlZF9ub3JtLCBwbG90X2xhYmVscyA9IFRSVUUpCnBwKGZpbGUgPSAiaW1hZ2VzL3BhaXJlZF9wY2EucG5nIikKcGFpcmVkX3BjYSRwbG90CmRldi5vZmYoKQpwYXJpZWRfcGNhCmBgYAoKIyMgR3JvdXBzIGJ5IHp5bW9kZW1lCgpgYGB7cn0KdmFyaWFudHNfYnlfcGVyc29uIDwtIHNldF9jb25kaXRpb25zKHJvdW5kMV92YXJpYW50X3NlLCBmYWN0ID0gInBhcnRpY2lwYW50X2NvZGUiKQpwYWlyZWQgPC0gYygiUFAxMDAyIiwgIlBQMTAwMyIsICJQUDEwMDciLCAiUFAxMDEyIiwgIlBQMTAxMyIsICJQUDEwMTQiKQpwYWlyZWRfejIzIDwtIGMoIlBQMTAxMyIsICJQUDEwMDciKQpwYWlyZWRfejIyIDwtIGMoIlBQMTAxNCIsICJQUDEwMDMiLCAiUFAxMDA0IikKcGFpcmVkX3oyMSA8LSBjKCJQUDEwMDIiKQpwYWlyZWRfejIzX3NhbXBsZXMgPC0gY29sRGF0YShyb3VuZDFfdmFyaWFudF9zZSlbWyJwYXJ0aWNpcGFudF9jb2RlIl1dICVpbiUgcGFpcmVkX3oyMwpwYWlyZWRfejIzX3ZhcmlhbnQgPC0gcm91bmQxX3ZhcmlhbnRfc2VbLCBwYWlyZWRfejIzX3NhbXBsZXNdCnBhaXJlZF96MjJfc2FtcGxlcyA8LSBjb2xEYXRhKHJvdW5kMV92YXJpYW50X3NlKVtbInBhcnRpY2lwYW50X2NvZGUiXV0gJWluJSBwYWlyZWRfejIyCnBhaXJlZF96MjJfdmFyaWFudCA8LSByb3VuZDFfdmFyaWFudF9zZVssIHBhaXJlZF96MjJfc2FtcGxlc10KcGFpcmVkX3oyMV9zYW1wbGVzIDwtIGNvbERhdGEocm91bmQxX3ZhcmlhbnRfc2UpW1sicGFydGljaXBhbnRfY29kZSJdXSAlaW4lIHBhaXJlZF96MjEKcGFpcmVkX3oyMV92YXJpYW50IDwtIHJvdW5kMV92YXJpYW50X3NlWywgcGFpcmVkX3oyMV9zYW1wbGVzXQpgYGAKCiMjIHoyLjMgU2V0cwoKYGBge3J9CnBhaXJlZF96MjNfc25wX3NldHMgPC0gZ2V0X3NucF9zZXRzKHBhaXJlZF96MjNfdmFyaWFudCwgZmFjdG9yID0gInBlcnNvbl90aW1lIikKcGFpcmVkX3oyM19zbnBfc2V0c1tbInNldF9uYW1lcyJdXQojIyAxMDEwIGFyZSB0aGUgdHdvIGluaXRpYWwgc2FtcGxlcyBhbmQgMDEwMSB0aGUgcmVjdXJyZW50CnoyM19zaGFyZWRfaW5pdGlhbCA8LSBwYWlyZWRfejIzX3NucF9zZXRzW1siaW50ZXJzZWN0aW9ucyJdXVtbIjEwMTAiXV0KbGVuZ3RoKHoyM19zaGFyZWRfaW5pdGlhbCkKejIzX3NoYXJlZF9yZWN1cnJlbnQgPC0gcGFpcmVkX3oyM19zbnBfc2V0c1tbImludGVyc2VjdGlvbnMiXV1bWyIwMTAxIl1dCmxlbmd0aCh6MjNfc2hhcmVkX3JlY3VycmVudCkKcGVyc29uXzExMDdfdmFycyA8LSBwYWlyZWRfejIzX3NucF9zZXRzW1siaW50ZXJzZWN0aW9ucyJdXVtbIjExMDAiXV0KcGVyc29uXzEwMTNfdmFycyA8LSBwYWlyZWRfejIzX3NucF9zZXRzW1siaW50ZXJzZWN0aW9ucyJdXVtbIjAwMTEiXV0KCnoyM19wcm9wb3J0aW9uIDwtIGdldF9wcm9wb3J0aW9uX3NucF9zZXRzKHBhaXJlZF96MjNfdmFyaWFudCwgZmFjdG9yID0gInBlcnNvbl90aW1lIikKejIzX2ludGVyc2VjdGlvbnMgPC0gc25wc19pbnRlcnNlY3Rpb25zKGNzX3NlLCBwYWlyZWRfejIzX3NucF9zZXRzLCBzdGFydF9jb2x1bW4gPSAiYW5ub3RfY29kaW5nX3N0YXJ0IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGVuZF9jb2x1bW4gPSAiYW5ub3RfY29kaW5nX2VuZCIsIGNocl9jb2x1bW4gPSAiYW5ub3Rfc2VxdWVuY2VfaWQiKQojIyBUaGUgdG9wIGZldyBnZW5lcyB3aXRoIHZhcmlhbnRzIHNoYXJlZCBleGNsdXNpdmVseSBhbW9uZyB0aGUgaW5pdGlhbCBzYW1wbGVzOgpoZWFkKHoyM19pbnRlcnNlY3Rpb25zJGdlbmVfc3VtbWFyaWVzW1siUFAxMDA3X0ksIFBQMTAxM19JIl1dKQpoZWFkKHoyM19pbnRlcnNlY3Rpb25zJGdlbmVfc3VtbWFyaWVzW1siUFAxMDA3X1IsIFBQMTAxM19SIl1dKQp6MjNfdnNfZ2VuZXMgPC0gc25wc192c19nZW5lcyhjc19zZSwgcGFpcmVkX3oyM19zbnBfc2V0cywgc3RhcnRfY29sdW1uID0gImFubm90X2NvZGluZ19zdGFydCIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGVuZF9jb2x1bW4gPSAiYW5ub3RfY29kaW5nX2VuZCIsIGNocl9jb2x1bW4gPSAiYW5ub3Rfc2VxdWVuY2VfaWQiKQpoZWFkKHoyM192c19nZW5lc1tbImNvdW50X2J5X2dlbmUiXV0pCmBgYAoKIyMgejIuMiBTZXRzCgpgYGB7cn0KcGFpcmVkX3oyMl9zbnBfc2V0cyA8LSBnZXRfc25wX3NldHMocGFpcmVkX3oyMl92YXJpYW50LCBmYWN0b3IgPSAicGVyc29uX3RpbWUiKQpwYWlyZWRfejIyX3NucF9zZXRzW1sic2V0X25hbWVzIl1dCiMjIDEwMTAxMCBhcmUgdGhlIGluaXRpYWxzIG9uY2UgYWdhaW4KIyMgMDEwMTAxIGFyZSB0aGUgcmVjdXJyZW50IHNhbXBsZXMgb25jZSBhZ2Fpbgp6MjJfc2hhcmVkX2luaXRpYWwgPC0gcGFpcmVkX3oyMl9zbnBfc2V0c1tbImludGVyc2VjdGlvbnMiXV1bWyIxMDEwMTAiXV0KbGVuZ3RoKHoyMl9zaGFyZWRfaW5pdGlhbCkKejIyX3NoYXJlZF9yZWN1cnJlbnQgPC0gcGFpcmVkX3oyMl9zbnBfc2V0c1tbImludGVyc2VjdGlvbnMiXV1bWyIwMTAxMDEiXV0KbGVuZ3RoKHoyMl9zaGFyZWRfcmVjdXJyZW50KQoKejIyX3Byb3BvcnRpb24gPC0gZ2V0X3Byb3BvcnRpb25fc25wX3NldHMocGFpcmVkX3oyMl92YXJpYW50LCBmYWN0b3IgPSAicGVyc29uX3RpbWUiKQp6MjJfaW50ZXJzZWN0aW9ucyA8LSBzbnBzX2ludGVyc2VjdGlvbnMoY3Nfc2UsIHBhaXJlZF96MjJfc25wX3NldHMsIHN0YXJ0X2NvbHVtbiA9ICJhbm5vdF9jb2Rpbmdfc3RhcnQiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZW5kX2NvbHVtbiA9ICJhbm5vdF9jb2RpbmdfZW5kIiwgY2hyX2NvbHVtbiA9ICJhbm5vdF9zZXF1ZW5jZV9pZCIpCnoyMl92c19nZW5lcyA8LSBzbnBzX3ZzX2dlbmVzKGNzX3NlLCBwYWlyZWRfejIyX3NucF9zZXRzLCBzdGFydF9jb2x1bW4gPSAiYW5ub3RfY29kaW5nX3N0YXJ0IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZW5kX2NvbHVtbiA9ICJhbm5vdF9jb2RpbmdfZW5kIiwgY2hyX2NvbHVtbiA9ICJhbm5vdF9zZXF1ZW5jZV9pZCIpCmBgYAoKIyMgejIuMSBTZXRzCgpgYGB7cn0KejIxX3Byb3BvcnRpb24gPC0gZ2V0X3Byb3BvcnRpb25fc25wX3NldHMocGFpcmVkX3oyMV92YXJpYW50LCBmYWN0b3IgPSAicGVyc29uX3RpbWUiKQpgYGAKCiMgRXhwbGljaXQgSS9SIGZvciBpbmRpdmlkdWFscyB1c2luZyB0aGVpciBpbmRpdmlkdWFsIGdlbm9tZXMKCkluIHRoZSBmb2xsb3dpbmcgSSBob3BlIHRvIHBlcmZvcm0gY29tcGFyaXNvbnMgb2YgdGhlCmluaXRpYWwtPnJlY3VycmVudCBjb21wYXJpc29ucyBvZiBlYWNoIGluZGl2aWR1YWwgYWdhaW5zdCBoaXMvaGVyIG93bgppbml0aWFsIHN0cmFpbi4gIEkgdGhlcmVmb3JlIGNyZWF0ZWQgYSBzZXJpZXMgb2YgZmlsZXMgd2l0aCBoaWdoL2xvdwpzdHJpbmdlbmN5IGZpbHRlcnMgZm9yIHRoZSBBQiB0YWcgcHJvZHVjZWQgYnkgZnJlZWJheWVzLgoKVGhlIHJlbGV2YW50IGNvbHVtbnMgYXJlICdjb250cm9sbGVkX3N0cmFpbl9oaWdoX2ZpbHRlcicgYW5kCidjb250cm9sbGVkX3N0cmFpbl9sb3dfZmlsdGVyJy4KCmBgYHtyfQojIyBOb3RlIHRvIHNlbGYsICdoaWdoX2ZpbHRlcicgaXMgdGhlIGxvdyBzdHJpbmdlbmN5Lgpyb3VuZDJfaGlnaF92YXJpYW50cyA8LSBjb3VudF9zbnBzKGNzX3NlLCBhbm5vdF9jb2x1bW4gPSAiY29udHJvbGxlZF9zdHJhaW5faGlnaF9maWx0ZXIiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNucF9jb2x1bW4gPSAiQUIiLCBzY2FsZSA9IGMoMSwwKSkgJT4lCiAgc2V0X2JhdGNoZXMoZmFjdCA9ICJwY2Ffenltb2RlbWUiKSAlPiUKICBzZXRfY29uZGl0aW9ucyhmYWN0ID0gImluaXRpYWxfcmVjdXJyZW50IikKCnJvdW5kMl9ub3JtIDwtIG5vcm1hbGl6ZShyb3VuZDJfaGlnaF92YXJpYW50cywgY29udmVydCA9ICJjcG0iLCBmaWx0ZXIgPSBUUlVFKQpwbG90X3BjYShyb3VuZDJfbm9ybSkKCmhpZ2hfc2V0cyA8LSBnZXRfc25wX3NldHMocm91bmQyX2hpZ2hfdmFyaWFudHMsIGZhY3RvciA9ICJpbml0aWFsX3JlY3VycmVudCIpCmBgYAoKT2ssIEkgaGF2ZSBhIHByb2JsZW0sIHRoZSBBQiBjb2x1bW4gaXMgZnJvbSAwLT4xIHdoZXJlIDAgaXMgYQpoaWdoLWNvbmZpZGVuY2UgaGl0LiAgSSB0aGluaywgdGhlcmVmb3JlLCB0aGF0IEkgbXVzdCBkbyBhIDEteCB0byB0aGUKcmVzdWx0IG9mIGNvdW50X3NucHMuCgpgYGB7cn0Kcm91bmQyX2xvd192YXJpYW50cyA8LSBjb3VudF9zbnBzKGNzX3NlLCBhbm5vdF9jb2x1bW4gPSAiY29udHJvbGxlZF9zdHJhaW5fbG93X2ZpbHRlciIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzbnBfY29sdW1uID0gIkFCIiwgc2NhbGUgPSBjKDEsMCkpICU+JQogIHNldF9iYXRjaGVzKGZhY3QgPSAicGNhX3p5bW9kZW1lIikKcGxvdF9saWJzaXplKHJvdW5kMl9sb3dfdmFyaWFudHMpCgpyb3VuZDJfbm9ybSA8LSBub3JtYWxpemUocm91bmQyX2xvd192YXJpYW50cywgY29udmVydCA9ICJjcG0iLCBmaWx0ZXIgPSBUUlVFLCBwbG90X2xhYmVscyA9IFRSVUUsCiAgICAgICAgICAgICAgICAgICAgICAgICBleHB0X25hbWVzID0gInBhcnRpY2lwYW50X2NvZGUiLCBtYXhfb3ZlcmxhcHMgPSAxMDAwKQpwbG90X3BjYShyb3VuZDJfbm9ybSkKYGBgCgpDb2xsZWN0IHRoZSB2YXJpYW50cyBieSBpbml0aWFsL3JlY3VycmVudAoKYGBge3J9Cmxvd19zZXRzIDwtIGdldF9zbnBfc2V0cyhyb3VuZDJfbG93X3ZhcmlhbnRzLCBmYWN0b3IgPSAiaW5pdGlhbF9yZWN1cnJlbnQiKQpsb3dfc2V0c1tbInNldF9uYW1lcyJdXQpoZWFkKGxvd19zZXRzW1siaW50ZXJzZWN0aW9ucyJdXVtbIjEwIl1dLCBuID0gMjApCmxvd19zZXRzW1siaW50ZXJzZWN0aW9ucyJdXVtbIjAxIl1dCmxvd19zZXRzW1siaW50ZXJzZWN0aW9ucyJdXVtbIjExIl1dCmBgYAoKbG93X3NldHMgcHJvdmlkZXMgYSBzZXQgb2YgY29vcmRpbmF0ZXMgYW5kIHRoZWlyIHN0YXRlIHdpdGggcmVzcGVjdCB0bwppZiB0aGV5IHdlcmUgb2JzZXJ2ZWQgaW4gb25seSBpbml0aWFsL3JlY3VycmVudC9ib3RoLgoKZ2V0X3Byb3BvcnRpb25fc25wX3NldHMoKSBzaG91bGQgZ2l2ZSB1cyBhIGNhdGFsb2cgb2YgdmFyaWFudHMgYnkKY2hyb21vc29tZS9jb250aWcgd2l0aCByZXNwZWN0IHRvIHNhbXBsZSB0eXBlLiAgVGhlcmUgaXMgYSBiaXQgb2YgYQpwcm9ibGVtIGhlcmU6IHVzdWFsbHkgdGhlIHRhZyBJIHVzZSBpcyBoaWdoZXIgZm9yIG1vcmUgY29uZmlkZW50CnZhbHVlcywgYnV0IEFCIGlzIGxvd2VyIGZvciBtb3JlIGNvbmZpZGVudCB2YWx1ZXM7IEkgbmVlZCB0byBkbwpzb21ldGhpbmcgdG8gZml4IHRoaXMuLi4KCmBgYHtyfQpsb3dfc2V0c19wcm9wIDwtIGdldF9wcm9wb3J0aW9uX3NucF9zZXRzKHJvdW5kMl9sb3dfdmFyaWFudHMsIGZhY3RvciA9ICJpbml0aWFsX3JlY3VycmVudCIpCmBgYAoKYGBge3J9Cmxvd19pbnRlcnNlY3Rpb24gPC0gc25wc19pbnRlcnNlY3Rpb25zKAogIGNzX3NlLCBsb3dfc2V0cywgc3RhcnRfY29sdW1uID0gImFubm90X2NvZGluZ19zdGFydCIsCiAgZW5kX2NvbHVtbiA9ICJhbm5vdF9jb2RpbmdfZW5kIiwgY2hyX2NvbHVtbiA9ICJhbm5vdF9zZXF1ZW5jZV9pZCIpCmxvd19pbnRlcnNlY3Rpb24KYGBgCgpgYGB7cn0KbG93X3ZzX2dlbmVzIDwtIHNucHNfdnNfZ2VuZXMoY3Nfc2UsIGxvd19zZXRzLCBzdGFydF9jb2x1bW4gPSAiYW5ub3RfY29kaW5nX3N0YXJ0IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZW5kX2NvbHVtbiA9ICJhbm5vdF9jb2RpbmdfZW5kIiwgY2hyX2NvbHVtbiA9ICJhbm5vdF9zZXF1ZW5jZV9pZCIpCmBgYAo=

Examining persistence clinical samples: 202510

atb abelew@gmail.com

2026-01-29