This document is intended to provide a general overview of the TMRC2 samples which have thus far been sequenced. In some cases, this includes only those samples starting in 2019; in other instances I am including our previous (2015-2016) samples.
In all cases the processing performed was:
The analyses in this document use the matrices of counts/gene from #3 and variants/position from #4 in order to provide some images and metrics describing the samples we have sequenced so far.
Everything which follows depends on the Existing TriTrypDB annotations revision 46, circa 2019. The following block loads a database of these annotations and turns it into a matrix where the rows are genes and columns are all the annotation types provided by TriTrypDB.
The same database was used to create a matrix of orthologous genes between L.panamensis and all of the other species in the TriTrypDB.
tt <- sm(library(EuPathDB))
tt <- sm(library(org.Lpanamensis.MHOMCOL81L13.v46.eg.db))
pan_db <- org.Lpanamensis.MHOMCOL81L13.v46.eg.db
all_fields <- columns(pan_db)
all_lp_annot <- sm(load_orgdb_annotations(
pan_db,
keytype = "gid",
fields = c("annot_gene_entrez_id", "annot_gene_name",
"annot_strand", "annot_chromosome", "annot_cds_length",
"annot_gene_product")))$genes
lp_go <- sm(load_orgdb_go(pan_db))
lp_lengths <- all_lp_annot[, c("gid", "annot_cds_length")]
colnames(lp_lengths) <- c("ID", "length")
orthos <- sm(EuPathDB::extract_eupath_orthologs(db = pan_db))
hisat_annot <- all_lp_annot
## rownames(hisat_annot) <- paste0("exon_", rownames(hisat_annot), ".E1")
Resequence samples: TMRC20002, TMRC20006, TMRC20004 (maybe TMRC20008 and TMRC20029)
The first lines of the following block create the Expressionset. All of the following lines perform various normalizations and generate plots from it.
sample_sheet <- glue::glue("sample_sheets/tmrc2_samples_20210512.xlsx")
lp_expt <- sm(create_expt(sample_sheet,
gene_info = hisat_annot,
id_column = "hpglidentifier",
file_column = "lpanamensisv36hisatfile")) %>%
set_expt_conditions(fact = "zymodemecategorical") %>%
subset_expt(nonzero = 8550)
## The samples (and read coverage) removed when filtering 8550 non-zero genes are:
## TMRC20002 TMRC20006
## 11681227 6670348
## There were 36, now there are 34 samples.
libsizes <- plot_libsize(lp_expt)
libsizes$plot
## I think samples 7,10 should be removed at minimum, probably also 9,11
nonzero <- plot_nonzero(lp_expt)
nonzero$plot
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
plot_boxplot(lp_expt)
## 2520 entries are 0. We are on a log scale, adding 1 to the data.
Najib’s favorite plots are of course the PCA/TNSE. These are nice to look at in order to get a sense of the relationships between samples. They also provide a good opportunity to see what happens when one applies different normalizations, surrogate analyses, filters, etc. In addition, one may set different experimental factors as the primary ‘condition’ (usually the color of plots) and surrogate ‘batches’.
Column ‘Q’ in the sample sheet, make a categorical version of it with these parameters:
starting <- as.numeric(pData(lp_expt)[["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]])
sus_categorical <- starting
na_idx <- is.na(starting)
sus_categorical[na_idx] <- "unknown"
resist_idx <- starting <= 0.35
sus_categorical[resist_idx] <- "resistant"
indeterminant_idx <- starting >= 0.36 & starting <= 0.48
sus_categorical[indeterminant_idx] <- "ambiguous"
susceptible_idx <- starting >= 0.49
sus_categorical[susceptible_idx] <- "sensitive"
pData(lp_expt$expressionset)[["sus_category"]] <- sus_categorical
clinical_samples <- lp_expt %>%
set_expt_batches(fact = sus_categorical)
clinical_norm <- sm(normalize_expt(clinical_samples, norm = "quant", transform = "log2",
convert = "cpm", batch = FALSE, filter = TRUE))
zymo_pca <- plot_pca(clinical_norm, plot_title = "PCA of parasite expression values")
pp(file = "images/zymo_pca_sus_shape.png", image = zymo_pca$plot)
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
zymo_3dpca <- plot_3d_pca(zymo_pca)
zymo_3dpca$plot
zymo_tsne <- plot_tsne(clinical_norm, plot_title = "TSNE of parasite expression values")
zymo_tsne$plot
clinical_nb <- normalize_expt(clinical_samples, convert = "cpm", transform = "log2",
filter = TRUE, batch = "svaseq")
## Removing 142 low-count genes (8636 remaining).
## batch_counts: Before batch/surrogate estimation, 616 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 1614 entries are 0<x<1: 1%.
## Setting 158 low elements to zero.
## transform_counts: Found 158 values equal to 0, adding 1 to the matrix.
clinical_nb_pca <- plot_pca(clinical_nb, plot_title = TRUE)
pp(file = "images/clinical_nb_pca_sus_shape.png", image = clinical_nb_pca$plot)
clinical_nb_tsne <- plot_tsne(clinical_nb)
clinical_nb_tsne$plot
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
corheat <- plot_corheat(clinical_norm, title = "Correlation heatmap of parasite expression values
(Same legend as above)")
corheat$plot
plot_sm(clinical_norm)$plot
## Performing correlation.
cf_expt <- set_expt_conditions(lp_expt, fact = "clinicalcategorical") %>%
set_expt_batches(fact = sus_categorical)
cf_norm <- normalize_expt(cf_expt, convert = "cpm", transform = "log2",
norm = "quant", filter = TRUE)
## Removing 142 low-count genes (8636 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
start_cf <- plot_pca(cf_norm)
pp(file = "images/cf_sus_shape.png", image = start_cf$plot)
cf_nb <- normalize_expt(cf_expt, convert = "cpm", transform = "log2",
norm = "quant", filter = TRUE, batch = "svaseq")
## Warning in normalize_expt(cf_expt, convert = "cpm", transform = "log2", :
## Quantile normalization and sva do not always play well together.
## Removing 142 low-count genes (8636 remaining).
## batch_counts: Before batch/surrogate estimation, 2 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 2074 entries are 0<x<1: 1%.
## Setting 48 low elements to zero.
## transform_counts: Found 48 values equal to 0, adding 1 to the matrix.
cf_nb_pca <- plot_pca(cf_nb)
pp(file = "images/cf_sus_share_nb.png", image = cf_nb_pca$plot)
cf_norm <- normalize_expt(cf_expt, transform = "log2", convert = "cpm",
filter = TRUE, norm = "quant")
## Removing 142 low-count genes (8636 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
test <- pca_information(cf_norm,
expt_factors = c("clinicalcategorical", "zymodemecategorical",
"pathogenstrain", "passagenumber"),
num_components = 6, plot_pcas = TRUE)
## More shallow curves in these plots suggest more genes in this principle component.
sus_expt <- set_expt_conditions(lp_expt, fact = "sus_category") %>%
set_expt_batches(fact = "zymodemecategorical")
sus_norm <- normalize_expt(sus_expt, transform = "log2", convert = "cpm",
norm = "quant", filter = TRUE)
## Removing 142 low-count genes (8636 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
sus_pca <- plot_pca(sus_norm)
sus_pca$plot
sus_nb <- normalize_expt(sus_expt, transform = "log2", convert = "cpm",
batch = "svaseq", filter = TRUE)
## Removing 142 low-count genes (8636 remaining).
## batch_counts: Before batch/surrogate estimation, 616 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 1614 entries are 0<x<1: 1%.
## Setting 103 low elements to zero.
## transform_counts: Found 103 values equal to 0, adding 1 to the matrix.
sus_nb_pca <- plot_pca(sus_nb)
pp(file = "images/sus_nb_pca.png", image = sus_nb_pca$plot)
The following samples are much lower coverage:
At this time, we do not have very many samples, so the set of metrics/plots is fairly limited. There is really only one factor in the metadata which we can use for performing differential expression analyses, the ‘zymodeme’.
The process of sample estimation takes two primary inputs:
An expressionset is primary data structure used in R to examine RNASeq data. It is comprised of annotations, metadata, and expression data. In the case of our processing pipeline, the location of the expression data is provided by the filenames in the metadata.
The following sections perform a series of analyses which seek to elucidate differences between the zymodemes 2.2 and 2.3 either through differential expression or variant profiles.
TODO: Do this with and without sva and compare the results.
zy_expt <- subset_expt(lp_expt, subset = "condition=='z2.2'|condition=='z2.3'")
## There were 34, now there are 19 samples.
zy_norm <- normalize_expt(zy_expt, filter = TRUE, convert = "cpm", norm = "quant")
## Removing 168 low-count genes (8610 remaining).
zy_de_nobatch <- sm(all_pairwise(zy_expt, filter = TRUE, model_batch = "svaseq"))
zy_de <- sm(all_pairwise(zy_expt, filter = TRUE, model_batch = "svaseq"))
zy_table <- sm(combine_de_tables(zy_de, excel = glue::glue("excel/zy_tables-v{ver}.xlsx")))
zy_sig <- sm(extract_significant_genes(zy_table, excel = glue::glue("excel/zy_sig-v{ver}.xlsx")))
zy_table[["plots"]][["z23_vs_z22"]][["deseq_ma_plots"]][["plot"]]
cf_de <- sm(all_pairwise(cf_expt, filter = TRUE, model_batch = "svaseq"))
cf_table <- sm(combine_de_tables(cf_de, excel = glue::glue("excel/cf_tables-v{ver}.xlsx")))
cf_sig <- sm(extract_significant_genes(cf_table, excel = glue::glue("excel/cf_sig-v{ver}.xlsx")))
sus_de <- sm(all_pairwise(sus_expt, filter = TRUE, model_batch = "svaseq"))
sus_table <- sm(combine_de_tables(sus_de, excel = glue::glue("excel/sus_tables-v{ver}.xlsx")))
sus_sig <- sm(extract_significant_genes(sus_table, excel = glue::glue("excel/sus_sig-v{ver}.xlsx")))
## Error: Sheet 'down_limma_sensitive_vs_ambiguous' does not exist.
## Gene categories more represented in the 2.3 group.
zy_go_up <- sm(simple_goseq(sig_genes = zy_sig[["deseq"]][["ups"]][[1]],
go_db = lp_go, length_db = lp_lengths))
## Gene categories more represented in the 2.2 group.
zy_go_down <- sm(simple_goseq(sig_genes = zy_sig[["deseq"]][["downs"]][[1]],
go_db = lp_go, length_db = lp_lengths))
zy_table[["venns"]][[1]][["p_lfc1"]][["up_noweight"]]
zy_table[["venns"]][[1]][["p_lfc1"]][["down_noweight"]]
zy_table$plots[[1]][["deseq_ma_plots"]][["plot"]]
zy_go_up$pvalue_plots$bpp_plot_over
zy_go_down$pvalue_plots$bpp_plot_over
Najib read me an email listing off the gene names associated with the zymodeme classification. I took those names and cross referenced them against the Leishmania panamensis gene annotations and found the following:
They are:
Given these 6 gene IDs (NH has two gene IDs associated with it), I can do some looking for specific differences among the various samples.
The following creates a colorspace (red to green) heatmap showing the observed expression of these genes in every sample.
my_genes <- c("LPAL13_120010900", "LPAL13_340013000", "LPAL13_000054100",
"LPAL13_140006100", "LPAL13_180018500", "LPAL13_320022300",
"other")
my_names <- c("ALAT", "ASAT", "G6PD", "NHv1", "NHv2", "MPI", "other")
zymo_expt <- exclude_genes_expt(zy_norm, ids = my_genes, method = "keep")
## Before removal, there were 8610 genes, now there are 6.
## There are 19 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20004 TMRC20005 TMRC20009 TMRC20010 TMRC20011 TMRC20012 TMRC20013
## 0.1284 0.1179 0.1289 0.1106 0.1078 0.1076 0.1180 0.1181
## TMRC20014 TMRC20015 TMRC20016 TMRC20017 TMRC20018 TMRC20021 TMRC20022 TMRC20037
## 0.1065 0.1124 0.1040 0.1039 0.1119 0.1043 0.1273 0.1077
## TMRC20038 TMRC20039 TMRC20041
## 0.1104 0.1265 0.1149
test <- plot_sample_heatmap(zymo_expt, row_label = my_names)
In contrast, the following plots take the set of genes which are shared among all differential expression methods (|lfc| >= 1.0 and adjp <= 0.05) and use them to make categories of genes which are increased in 2.3 or 2.2.
shared_zymo <- intersect_significant(zy_table)
## Deleting the file excel/intersect_significant.xlsx before writing the tables.
## The png file name did not exist: /tmp/RtmpluwWvL/figureImage2686111d05598.png
## The png file name did not exist: /tmp/RtmpluwWvL/figureImage268611e04d22c.png
up_shared <- shared_zymo[["ups"]][[1]][["data"]][["all"]]
rownames(up_shared)
## [1] "LPAL13_000033300" "LPAL13_000012000" "LPAL13_310031300" "LPAL13_000038400"
## [5] "LPAL13_000038500" "LPAL13_340039600" "LPAL13_000012100" "LPAL13_050005000"
## [9] "LPAL13_310039200" "LPAL13_310031000" "LPAL13_210015500" "LPAL13_270034100"
## [13] "LPAL13_250006300" "LPAL13_200013000" "LPAL13_180013900" "LPAL13_340039700"
## [17] "LPAL13_240009700" "LPAL13_000041000" "LPAL13_170015400" "LPAL13_330021800"
## [21] "LPAL13_140019300" "LPAL13_000052700" "LPAL13_350044000" "LPAL13_140019100"
## [25] "LPAL13_230011200" "LPAL13_210005000" "LPAL13_350073200" "LPAL13_320038700"
## [29] "LPAL13_000045100" "LPAL13_140019200" "LPAL13_250025700" "LPAL13_110015700"
## [33] "LPAL13_310028500" "LPAL13_230011500" "LPAL13_000010600" "LPAL13_300031600"
## [37] "LPAL13_230011400" "LPAL13_290016200" "LPAL13_230011300" "LPAL13_160014500"
upshared_expt <- exclude_genes_expt(zy_norm, ids = rownames(up_shared), method = "keep")
## Before removal, there were 8610 genes, now there are 40.
## There are 19 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20004 TMRC20005 TMRC20009 TMRC20010 TMRC20011 TMRC20012 TMRC20013
## 0.5356 0.1920 0.1769 0.2296 0.5546 0.2145 0.1687 0.5566
## TMRC20014 TMRC20015 TMRC20016 TMRC20017 TMRC20018 TMRC20021 TMRC20022 TMRC20037
## 0.2280 0.6536 0.4918 0.2996 0.6599 0.6270 0.1976 0.7428
## TMRC20038 TMRC20039 TMRC20041
## 0.8350 0.2498 0.2058
test <- plot_sample_heatmap(upshared_expt, row_label = rownames(up_shared))
down_shared <- shared_zymo[["downs"]][[1]][["data"]][["all"]]
downshared_expt <- exclude_genes_expt(zy_norm, ids = rownames(down_shared), method = "keep")
## Before removal, there were 8610 genes, now there are 80.
## There are 19 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20004 TMRC20005 TMRC20009 TMRC20010 TMRC20011 TMRC20012 TMRC20013
## 0.3529 1.3024 1.3099 1.4313 0.2267 1.2342 1.0394 0.2470
## TMRC20014 TMRC20015 TMRC20016 TMRC20017 TMRC20018 TMRC20021 TMRC20022 TMRC20037
## 1.3079 0.2921 0.2820 1.3420 0.3103 0.2909 1.5385 0.3659
## TMRC20038 TMRC20039 TMRC20041
## 0.3413 1.4869 1.4842
test <- plot_sample_heatmap(downshared_expt, row_label = rownames(down_shared))
In this block, I am combining our previous samples and our new samples in the hopes of finding variant positions which help elucidate aspects of either the new or old samples. In other words, we do not know the zymodeme annotations for the old samples nor the strain identities (or the shortcut ‘chronic vs. self-healing’) for the new samples. We may be able to make educated guesses given the variant profiles. There are some differences in how the previous and current data sets were analyzed (though I have since redone the old samples so it should be trivial to remove those differences now).
old_expt <- sm(create_expt("sample_sheets/tmrc2_samples_20191203.xlsx",
file_column = "tophat2file"))
tt <- lp_expt$expressionset
rownames(tt) <- gsub(pattern = "^exon_", replacement = "", x = rownames(tt))
rownames(tt) <- gsub(pattern = "\\.E1$", replacement = "", x = rownames(tt))
lp_expt$expressionset <- tt
tt <- old_expt$expressionset
rownames(tt) <- gsub(pattern = "^exon_", replacement = "", x = rownames(tt))
rownames(tt) <- gsub(pattern = "\\.1$", replacement = "", x = rownames(tt))
old_expt$expressionset <- tt
lp_snp <- subset_expt(lp_expt, subset="!is.na(pData(lp_expt)[['bcftable']])")
## There were 34, now there are 28 samples.
new_snps <- sm(count_expt_snps(lp_snp, annot_column = "bcftable"))
old_snps <- sm(count_expt_snps(old_expt, annot_column = "bcftable", snp_column = 2))
both_snps <- combine_expts(new_snps, old_snps)
both_norm <- sm(normalize_expt(both_snps, transform = "log2", convert = "cpm", filter = TRUE))
## strains <- both_norm[["design"]][["strain"]]
both_norm <- set_expt_conditions(both_norm, fact = "strain")
The following plot shows the SNP profiles of all samples (old and new) where the colors at the top show either the 2.2 strains (orange), 2.3 strains (green), the previous samples (purple), or the various lab strains (pink etc).
tt <- plot_disheat(both_norm)
pp(file = "images/raw_snp_disheat.png", image = tt, height = 12, width = 12)
snp_sets <- get_snp_sets(both_snps, factor = "condition")
## The factor z2.3 has 7 rows.
## The factor z2.2 has 8 rows.
## The factor unknown has 13 rows.
## The factor sh has 13 rows.
## The factor chr has 14 rows.
## The factor inf has 6 rows.
## Iterating over 727 elements.
both_expt <- combine_expts(lp_expt, old_expt)
snp_genes <- sm(snps_vs_genes(both_expt, snp_sets, expt_name_col = "chromosome"))
summary(snp_sets$medians)
## z2.3 z2.2 unknown sh
## Min. : 0 Min. : 0 Min. : 0 Min. : 0.0
## 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0.0
## Median : 0 Median : 0 Median : 0 Median : 0.0
## Mean : 15 Mean : 0 Mean : 0 Mean : 0.1
## 3rd Qu.: 0 3rd Qu.: 0 3rd Qu.: 0 3rd Qu.: 0.0
## Max. :6407 Max. :4868 Max. :6539 Max. :1229.0
## chr inf
## Length:635506 Min. : 0.00
## Class :character 1st Qu.: 0.00
## Mode :character Median : 0.00
## Mean : 0.01
## 3rd Qu.: 0.00
## Max. :151.00
head(snp_sets$medians, n=100)
## z2.3 z2.2 unknown sh
## chr_LPAL13-SCAF000001_pos_1019_ref_G_alt_A 95 0 0 0
## chr_LPAL13-SCAF000001_pos_106_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000001_pos_1092_ref_A_alt_G 93 0 0 0
## chr_LPAL13-SCAF000001_pos_111_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000001_pos_1138_ref_C_alt_A 0 0 0 0
## chr_LPAL13-SCAF000001_pos_1147_ref_C_alt_A 0 0 0 0
## chr_LPAL13-SCAF000001_pos_1290_ref_A_alt_G 92 0 0 0
## chr_LPAL13-SCAF000001_pos_1394_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000001_pos_1424_ref_A_alt_C 0 0 0 0
## chr_LPAL13-SCAF000001_pos_1477_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000001_pos_148_ref_T_alt_A 0 0 0 0
## chr_LPAL13-SCAF000001_pos_1502_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000001_pos_1507_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000001_pos_1535_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000001_pos_1622_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000001_pos_1647_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000001_pos_1672_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000001_pos_179_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000001_pos_188_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000001_pos_261_ref_G_alt_A 14 0 0 0
## chr_LPAL13-SCAF000001_pos_56_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000001_pos_583_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000001_pos_81_ref_A_alt_T 0 0 0 0
## chr_LPAL13-SCAF000001_pos_870_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000001_pos_874_ref_T_alt_C 169 0 0 0
## chr_LPAL13-SCAF000001_pos_887_ref_G_alt_C 0 0 0 0
## chr_LPAL13-SCAF000001_pos_931_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000002_pos_1125_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000002_pos_1135_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000002_pos_1159_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000002_pos_1189_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000002_pos_133_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000002_pos_1504_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000002_pos_1549_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000002_pos_157_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000002_pos_1596_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000002_pos_1630_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000002_pos_1660_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000002_pos_175_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000002_pos_1803_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000002_pos_1837_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000002_pos_231_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000002_pos_275_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000002_pos_297_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000002_pos_302_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000002_pos_389_ref_T_alt_A 0 0 0 0
## chr_LPAL13-SCAF000002_pos_415_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000002_pos_422_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000002_pos_521_ref_C_alt_G 0 0 0 0
## chr_LPAL13-SCAF000002_pos_62_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000002_pos_762_ref_A_alt_C 0 0 0 0
## chr_LPAL13-SCAF000002_pos_977_ref_T_alt_C 11 0 0 0
## chr_LPAL13-SCAF000003_pos_10002_ref_G_alt_A 15 0 0 0
## chr_LPAL13-SCAF000003_pos_1132_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000003_pos_1170_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000003_pos_123_ref_T_alt_C 30 0 0 0
## chr_LPAL13-SCAF000003_pos_124_ref_T_alt_G 30 0 0 0
## chr_LPAL13-SCAF000003_pos_1310_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000003_pos_1392_ref_C_alt_G 0 0 0 0
## chr_LPAL13-SCAF000003_pos_1488_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000003_pos_1501_ref_C_alt_G 0 0 0 0
## chr_LPAL13-SCAF000003_pos_1518_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000003_pos_177_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000003_pos_19_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000003_pos_2560_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000003_pos_281_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000003_pos_2964_ref_T_alt_C 6061 4095 6045 453
## chr_LPAL13-SCAF000003_pos_3409_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000003_pos_4517_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000003_pos_530_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000003_pos_5637_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000003_pos_5646_ref_A_alt_G 343 0 0 0
## chr_LPAL13-SCAF000003_pos_5653_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000003_pos_5810_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000003_pos_5882_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000003_pos_6037_ref_G_alt_T 0 0 0 0
## chr_LPAL13-SCAF000003_pos_6360_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000003_pos_8678_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000003_pos_8776_ref_A_alt_C 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9085_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9096_ref_G_alt_C 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9189_ref_C_alt_A 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9313_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9512_ref_C_alt_T 1115 0 0 0
## chr_LPAL13-SCAF000003_pos_9562_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9563_ref_A_alt_C 791 0 0 0
## chr_LPAL13-SCAF000003_pos_9589_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9618_ref_G_alt_T 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9633_ref_C_alt_T 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9644_ref_T_alt_C 134 0 0 0
## chr_LPAL13-SCAF000003_pos_9697_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9703_ref_A_alt_G 17 0 0 0
## chr_LPAL13-SCAF000003_pos_9732_ref_T_alt_A 12 0 0 0
## chr_LPAL13-SCAF000003_pos_9779_ref_T_alt_C 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9781_ref_A_alt_G 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9880_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9903_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9919_ref_A_alt_C 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9964_ref_G_alt_A 0 0 0 0
## chr_LPAL13-SCAF000003_pos_9980_ref_A_alt_C 0 0 0 0
## chr inf
## chr_LPAL13-SCAF000001_pos_1019_ref_G_alt_A LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_106_ref_A_alt_G LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1092_ref_A_alt_G LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_111_ref_A_alt_G LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1138_ref_C_alt_A LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1147_ref_C_alt_A LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1290_ref_A_alt_G LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1394_ref_G_alt_A LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1424_ref_A_alt_C LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1477_ref_T_alt_C LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_148_ref_T_alt_A LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1502_ref_G_alt_A LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1507_ref_G_alt_A LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1535_ref_A_alt_G LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1622_ref_C_alt_T LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1647_ref_G_alt_A LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_1672_ref_A_alt_G LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_179_ref_G_alt_A LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_188_ref_T_alt_C LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_261_ref_G_alt_A LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_56_ref_T_alt_C LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_583_ref_C_alt_T LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_81_ref_A_alt_T LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_870_ref_T_alt_C LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_874_ref_T_alt_C LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_887_ref_G_alt_C LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000001_pos_931_ref_T_alt_C LPAL13-SCAF000001 0
## chr_LPAL13-SCAF000002_pos_1125_ref_G_alt_A LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_1135_ref_A_alt_G LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_1159_ref_C_alt_T LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_1189_ref_C_alt_T LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_133_ref_C_alt_T LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_1504_ref_A_alt_G LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_1549_ref_C_alt_T LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_157_ref_G_alt_A LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_1596_ref_G_alt_A LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_1630_ref_C_alt_T LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_1660_ref_C_alt_T LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_175_ref_C_alt_T LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_1803_ref_C_alt_T LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_1837_ref_A_alt_G LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_231_ref_G_alt_A LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_275_ref_A_alt_G LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_297_ref_A_alt_G LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_302_ref_T_alt_C LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_389_ref_T_alt_A LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_415_ref_A_alt_G LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_422_ref_C_alt_T LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_521_ref_C_alt_G LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_62_ref_A_alt_G LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_762_ref_A_alt_C LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000002_pos_977_ref_T_alt_C LPAL13-SCAF000002 0
## chr_LPAL13-SCAF000003_pos_10002_ref_G_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_1132_ref_T_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_1170_ref_C_alt_T LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_123_ref_T_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_124_ref_T_alt_G LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_1310_ref_C_alt_T LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_1392_ref_C_alt_G LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_1488_ref_A_alt_G LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_1501_ref_C_alt_G LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_1518_ref_G_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_177_ref_G_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_19_ref_T_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_2560_ref_T_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_281_ref_G_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_2964_ref_T_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_3409_ref_A_alt_G LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_4517_ref_T_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_530_ref_C_alt_T LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_5637_ref_A_alt_G LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_5646_ref_A_alt_G LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_5653_ref_A_alt_G LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_5810_ref_T_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_5882_ref_C_alt_T LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_6037_ref_G_alt_T LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_6360_ref_C_alt_T LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_8678_ref_G_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_8776_ref_A_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9085_ref_G_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9096_ref_G_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9189_ref_C_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9313_ref_G_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9512_ref_C_alt_T LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9562_ref_T_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9563_ref_A_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9589_ref_T_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9618_ref_G_alt_T LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9633_ref_C_alt_T LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9644_ref_T_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9697_ref_G_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9703_ref_A_alt_G LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9732_ref_T_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9779_ref_T_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9781_ref_A_alt_G LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9880_ref_G_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9903_ref_G_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9919_ref_A_alt_C LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9964_ref_G_alt_A LPAL13-SCAF000003 0
## chr_LPAL13-SCAF000003_pos_9980_ref_A_alt_C LPAL13-SCAF000003 0
snp_subset <- sm(snp_subset_genes(
both_expt, both_snps,
genes = c("LPAL13_120010900", "LPAL13_340013000", "LPAL13_000054100",
"LPAL13_140006100", "LPAL13_180018500", "LPAL13_320022300")))
## zymo_heat <- plot_sample_heatmap(snp_subset, row_label = rownames(exprs(snp_subset)))
clinical_sets <- get_snp_sets(new_snps, factor = "clinicalresponse")
## The factor Cure has 9 rows.
## The factor Failure has 12 rows.
## The factor Laboratory line has 2 rows.
## The factor Laboratory line miltefosine resistant has only 1 row.
## The factor ND has only 1 row.
## The factor Reference strain has 3 rows.
## Iterating over 686 elements.
clinical_genes <- sm(snps_vs_genes(lp_expt, clinical_sets, expt_name_col = "chromosome"))
clinical_snps <- snps_intersections(lp_expt, clinical_sets, chr_column = "chromosome")
head(as.data.frame(clinical_snps$inters[["Failure"]]))
## seqnames start end width strand
## chr_LpaL13-02_pos_205839_ref_C_alt_T LpaL13-02 205839 205840 2 +
## chr_LpaL13-03_pos_107522_ref_T_alt_C LpaL13-03 107522 107523 2 +
## chr_LpaL13-05_pos_161416_ref_T_alt_C LpaL13-05 161416 161417 2 +
## chr_LpaL13-06_pos_342394_ref_G_alt_C LpaL13-06 342394 342395 2 +
## chr_LpaL13-07_pos_280944_ref_A_alt_G LpaL13-07 280944 280945 2 +
## chr_LpaL13-07_pos_387049_ref_C_alt_T LpaL13-07 387049 387050 2 +
head(as.data.frame(clinical_snps$inters[["Cure"]]))
## seqnames start end width strand
## chr_LpaL13-03_pos_189630_ref_C_alt_A LpaL13-03 189630 189631 2 +
## chr_LpaL13-04_pos_37865_ref_G_alt_A LpaL13-04 37865 37866 2 +
## chr_LpaL13-04_pos_37867_ref_A_alt_G LpaL13-04 37867 37868 2 +
## chr_LpaL13-05_pos_340999_ref_G_alt_A LpaL13-05 340999 341000 2 +
## chr_LpaL13-06_pos_288177_ref_C_alt_G LpaL13-06 288177 288178 2 +
## chr_LpaL13-10_pos_203841_ref_C_alt_T LpaL13-10 203841 203842 2 +
head(clinical_snps$gene_summaries$Failure)
## LPAL13_300019900 LPAL13_000017900 LPAL13_100008800 LPAL13_200008500
## 3 2 2 2
## LPAL13_200014300 LPAL13_200021200
## 2 2
head(clinical_snps$gene_summaries$Cure)
## LPAL13_040006400 LPAL13_200013000 LPAL13_200014600 LPAL13_200015100
## 2 2 2 2
## LPAL13_200017900 LPAL13_200019500
## 2 2
annot <- fData(lp_expt)
clinical_interest <- as.data.frame(clinical_snps[["gene_summaries"]][["Cure"]])
clinical_interest <- merge(clinical_interest, as.data.frame(clinical_snps[["gene_summaries"]][["Failure"]]), by = "row.names")
rownames(clinical_interest) <- clinical_interest[["Row.names"]]
clinical_interest[["Row.names"]] <- NULL
colnames(clinical_interest) <- c("cure_snps","fail_snps")
annot <- merge(annot, clinical_interest, by = "row.names")
rownames(annot) <- annot[["Row.names"]]
annot[["Row.names"]] <- NULL
fData(lp_expt$expressionset) <- annot
The heatmap produced here should show the variants only for the zymodeme genes.
I am thinking that if we find clusters of locations which are variant, that might provide some PCR testing possibilities.
new_sets <- get_snp_sets(new_snps, factor = "phenotypiccharacteristics")
## The factor 2.2 has 8 rows.
## The factor 2.3 has 7 rows.
## The factor Laboratory line has 2 rows.
## The factor Laboratory line miltefosine resistant has only 1 row.
## The factor Reference strain has 3 rows.
## The factor unknown has 7 rows.
## Iterating over 686 elements.
summary(new_sets)
## Length Class Mode
## medians 7 data.frame list
## possibilities 6 -none- character
## intersections 61 -none- list
## chr_data 686 -none- list
## set_names 64 -none- list
## invert_names 64 -none- list
## density 686 -none- numeric
## 1000000: 2.2
## 0100000: 2.3
summary(new_sets[["intersections"]][["100000"]])
## Length Class Mode
## 562 character character
dim(new_sets$intersections[["100000"]])
## NULL
sequential_variants <- function(snp_sets, conditions = NULL, minimum = 3, maximum_separation = 3) {
if (is.null(conditions)) {
conditions <- 1
}
intersection_sets <- snp_sets[["intersections"]]
intersection_names <- snp_sets[["set_names"]]
chosen_intersection <- 1
if (is.numeric(conditions)) {
chosen_intersection <- conditions
} else {
intersection_idx <- intersection_names == conditions
chosen_intersection <- names(intersection_names)[intersection_idx]
}
possible_positions <- intersection_sets[[chosen_intersection]]
position_table <- data.frame(row.names = possible_positions)
pat <- "^chr_(.+)_pos_(.+)_ref_.*$"
position_table[["chr"]] <- gsub(pattern = pat, replacement = "\\1", x = rownames(position_table))
position_table[["pos"]] <- as.numeric(gsub(pattern = pat, replacement = "\\2", x = rownames(position_table)))
position_idx <- order(position_table[, "chr"], position_table[, "pos"])
position_table <- position_table[position_idx, ]
position_table[["dist"]] <- 0
last_chr <- ""
for (r in 1:nrow(position_table)) {
this_chr <- position_table[r, "chr"]
if (r == 1) {
position_table[r, "dist"] <- position_table[r, "pos"]
last_chr <- this_chr
next
}
if (this_chr == last_chr) {
position_table[r, "dist"] <- position_table[r, "pos"] - position_table[r - 1, "pos"]
} else {
position_table[r, "dist"] <- position_table[r, "pos"]
}
last_chr <- this_chr
}
sequentials <- position_table[["dist"]] <= maximum_separation
## The following can tell me how many runs of each length occurred, that is not quite what I want.
## Now use run length encoding to find the set of sequential sequentials!
rle_result <- rle(sequentials)
rle_values <- rle_result[["values"]]
## The following line is equivalent to just leaving values alone:
## true_values <- rle_result[["values"]] == TRUE
rle_lengths <- rle_result[["lengths"]]
true_sequentials <- rle_lengths[rle_values]
rle_idx <- cumsum(rle_lengths)[which(rle_values)]
position_table[["last_sequential"]] <- 0
count <- 0
for (r in rle_idx) {
count <- count + 1
position_table[r, "last_sequential"] <- true_sequentials[count]
}
wanted_idx <- position_table[["last_sequential"]] >= minimum
wanted <- position_table[wanted_idx, c("chr", "pos")]
return(wanted)
}
zymo22_sequentials <- sequential_variants(new_sets, conditions = "2.2")
zymo23_sequentials <- sequential_variants(new_sets, conditions = "2.3")
snp_genes <- sm(snps_vs_genes(lp_expt, new_sets, expt_name_col = "chromosome"))
new_zymo_norm <- normalize_expt(new_snps, filter = TRUE, convert = "cpm", norm = "quant", transform = TRUE)
## Removing 0 low-count genes (517279 remaining).
## transform_counts: Found 3658406 values equal to 0, adding 1 to the matrix.
new_zymo_norm <- set_expt_conditions(new_zymo_norm, fact = "phenotypiccharacteristics")
zymo_heat <- plot_disheat(new_zymo_norm)
zymo_subset <- snp_subset_genes(lp_expt, new_snps,
genes = c("LPAL13_120010900", "LPAL13_340013000", "LPAL13_000054100",
"LPAL13_140006100", "LPAL13_180018500", "LPAL13_320022300"))
## Warning in .Seqinfo.mergexy(x, y): Each of the 2 combined objects has sequence levels not in the other:
## - in 'x': LPAL13-SCAF000002, LPAL13-SCAF000003, LPAL13-SCAF000004, LPAL13-SCAF000005, LPAL13-SCAF000009, LPAL13-SCAF000013, LPAL13-SCAF000014, LPAL13-SCAF000015, LPAL13-SCAF000018, LPAL13-SCAF000019, LPAL13-SCAF000020, LPAL13-SCAF000022, LPAL13-SCAF000023, LPAL13-SCAF000026, LPAL13-SCAF000029, LPAL13-SCAF000030, LPAL13-SCAF000031, LPAL13-SCAF000032, LPAL13-SCAF000035, LPAL13-SCAF000036, LPAL13-SCAF000037, LPAL13-SCAF000038, LPAL13-SCAF000042, LPAL13-SCAF000043, LPAL13-SCAF000045, LPAL13-SCAF000047, LPAL13-SCAF000049, LPAL13-SCAF000050, LPAL13-SCAF000052, LPAL13-SCAF000054, LPAL13-SCAF000056, LPAL13-SCAF000057, LPAL13-SCAF000058, LPAL13-SCAF000060, LPAL13-SCAF000066, LPAL13-SCAF000067, LPAL13-SCAF000069, LPAL13-SCAF000070, LPAL13-SCAF000073, LPAL13-SCAF000081, LPAL13-SCAF000082, LPAL13-SCAF000083, LPAL13-SCAF000085, LPAL13-SCAF000086, LPAL13-SCAF000088, LPAL13-SCAF000090, LPAL13-SCAF000091, LPAL13-SCAF000092, LPAL13-SCAF000095, LPAL13-SCAF000098, LPAL13-SCAF000101, LPAL13-SCAF000103, LPAL13-SCAF000106, LPAL13-SCAF000109, LPAL13-SCAF000111, LPAL13-SCAF000112, LPAL13-SCAF000113, LPAL13-SCAF000118, LPAL13-SCAF000125, LPAL13-SCAF000126, LPAL13-SCAF000138, LPAL13-SCAF000139, LPAL13-SCAF000140, LPAL13-SCAF000141, LPAL13-SCAF000144, LPAL13-SCAF000145, LPAL13-SCAF000147, LPAL13-SCAF000148, LPAL13-SCAF000150, LPAL13-SCAF000151, LPAL13-SCAF000152, LPAL13-SCAF000154, LPAL13-SCAF000155, LPAL13-SCAF000156, LPAL13-SCAF000157, LPAL13-SCAF000158, LPAL13-SCAF000159, LPAL13-SCAF000160, LPAL13-SCAF000161, LPAL13-SCAF000163, LPAL13-SCAF000164, LPAL13-SCAF000167, LPAL13-SCAF000168, LPAL13-SCAF000169, LPAL13-SCAF000170, LPAL13-SCAF000175, LPAL13-SCAF000177, LPAL13-SCAF000178, LPAL13-SCAF000179, LPAL13-SCAF000180, LPAL13-SCAF000183, LPAL13-SCAF000184, LPAL13-SCAF000185, LPAL13-SCAF000189, LPAL13-SCAF000190, LPAL13-SCAF000192, LPAL13-SCAF000195, LPAL13-SCAF000196, LPAL13-SCAF000198, LPAL13-SCAF000199, LPAL13-SCAF000204, LPAL13-SCAF000207, LPAL13-SCAF000208, LPAL13-SCAF000210, LPAL13-SCAF000212, LPAL13-SCAF000213, LPAL13-SCAF000214, LPAL13-SCAF000215, LPAL13-SCAF000216, LPAL13-SCAF000218, LPAL13-SCAF000219, LPAL13-SCAF000221, LPAL13-SCAF000222, LPAL13-SCAF000223, LPAL13-SCAF000224, LPAL13-SCAF000225, LPAL13-SCAF000226, LPAL13-SCAF000228, LPAL13-SCAF000234, LPAL13-SCAF000236, LPAL13-SCAF000238, LPAL13-SCAF000240, LPAL13-SCAF000241, LPAL13-SCAF000242, LPAL13-SCAF000243, LPAL13-SCAF000244, LPAL13-SCAF000246, LPAL13-SCAF000247, LPAL13-SCAF000251, LPAL13-SCAF000252, LPAL13-SCAF000254, LPAL13-SCAF000255, LPAL13-SCAF000257, LPAL13-SCAF000258, LPAL13-SCAF000260, LPAL13-SCAF000262, LPAL13-SCAF000263, LPAL13-SCAF000268, LPAL13-SCAF000269, LPAL13-SCAF000270, LPAL13-SCAF000272, LPAL13-SCAF000273, LPAL13-SCAF000274, LPAL13-SCAF000275, LPAL13-SCAF000276, LPAL13-SCAF000277, LPAL13-SCAF000278, LPAL13-SCAF000279, LPAL13-SCAF000280, LPAL13-SCAF000282, LPAL13-SCAF000283, LPAL13-SCAF000284, LPAL13-SCAF000289, LPAL13-SCAF000290, LPAL13-SCAF000293, LPAL13-SCAF000294, LPAL13-SCAF000297, LPAL13-SCAF000298, LPAL13-SCAF000299, LPAL13-SCAF000304, LPAL13-SCAF000305, LPAL13-SCAF000306, LPAL13-SCAF000307, LPAL13-SCAF000308, LPAL13-SCAF000311, LPAL13-SCAF000312, LPAL13-SCAF000315, LPAL13-SCAF000318, LPAL13-SCAF000323, LPAL13-SCAF000324, LPAL13-SCAF000325, LPAL13-SCAF000327, LPAL13-SCAF000329, LPAL13-SCAF000331, LPAL13-SCAF000332, LPAL13-SCAF000333, LPAL13-SCAF000334, LPAL13-SCAF000336, LPAL13-SCAF000341, LPAL13-SCAF000342, LPAL13-SCAF000343, LPAL13-SCAF000344, LPAL13-SCAF000345, LPAL13-SCAF000346, LPAL13-SCAF000348, LPAL13-SCAF000349, LPAL13-SCAF000350, LPAL13-SCAF000351, LPAL13-SCAF000352, LPAL13-SCAF000353, LPAL13-SCAF000354, LPAL13-SCAF000355, LPAL13-SCAF000356, LPAL13-SCAF000357, LPAL13-SCAF000359, LPAL13-SCAF000360, LPAL13-SCAF000361, LPAL13-SCAF000362, LPAL13-SCAF000365, LPAL13-SCAF000366, LPAL13-SCAF000369, LPAL13-SCAF000371, LPAL13-SCAF000372, LPAL13-SCAF000373, LPAL13-SCAF000375, LPAL13-SCAF000376, LPAL13-SCAF000377, LPAL13-SCAF000378, LPAL13-SCAF000379, LPAL13-SCAF000380, LPAL13-SCAF000381, LPAL13-SCAF000382, LPAL13-SCAF000383, LPAL13-SCAF000384, LPAL13-SCAF000385, LPAL13-SCAF000386, LPAL13-SCAF000387, LPAL13-SCAF000389, LPAL13-SCAF000390, LPAL13-SCAF000392, LPAL13-SCAF000393, LPAL13-SCAF000394, LPAL13-SCAF000395, LPAL13-SCAF000396, LPAL13-SCAF000398, LPAL13-SCAF000399, LPAL13-SCAF000402, LPAL13-SCAF000404, LPAL13-SCAF000406, LPAL13-SCAF000407, LPAL13-SCAF000408, LPAL13-SCAF000409, LPAL13-SCAF000410, LPAL13-SCAF000411, LPAL13-SCAF000412, LPAL13-SCAF000413, LPAL13-SCAF000414, LPAL13-SCAF000416, LPAL13-SCAF000418, LPAL13-SCAF000422, LPAL13-SCAF000423, LPAL13-SCAF000425, LPAL13-SCAF000427, LPAL13-SCAF000428, LPAL13-SCAF000429, LPAL13-SCAF000431, LPAL13-SCAF000433, LPAL13-SCAF000435, LPAL13-SCAF000437, LPAL13-SCAF000438, LPAL13-SCAF000439, LPAL13-SCAF000441, LPAL13-SCAF000442, LPAL13-SCAF000443, LPAL13-SCAF000444, LPAL13-SCAF000445, LPAL13-SCAF000449, LPAL13-SCAF000450, LPAL13-SCAF000451, LPAL13-SCAF000452, LPAL13-SCAF000454, LPAL13-SCAF000455, LPAL13-SCAF000457, LPAL13-SCAF000458, LPAL13-SCAF000462, LPAL13-SCAF000464, LPAL13-SCAF000466, LPAL13-SCAF000467, LPAL13-SCAF000472, LPAL13-SCAF000473, LPAL13-SCAF000474, LPAL13-SCAF000475, LPAL13-SCAF000476, LPAL13-SCAF000478, LPAL13-SCAF000479, LPAL13-SCAF000480, LPAL13-SCAF000481, LPAL13-SCAF000482, LPAL13-SCAF000485, LPAL13-SCAF000487, LPAL13-SCAF000489, LPAL13-SCAF000493, LPAL13-SCAF000494, LPAL13-SCAF000497, LPAL13-SCAF000498, LPAL13-SCAF000499, LPAL13-SCAF000501, LPAL13-SCAF000502, LPAL13-SCAF000504, LPAL13-SCAF000506, LPAL13-SCAF000509, LPAL13-SCAF000510, LPAL13-SCAF000513, LPAL13-SCAF000514, LPAL13-SCAF000516, LPAL13-SCAF000517, LPAL13-SCAF000518, LPAL13-SCAF000519, LPAL13-SCAF000520, LPAL13-SCAF000521, LPAL13-SCAF000523, LPAL13-SCAF000524, LPAL13-SCAF000525, LPAL13-SCAF000526, LPAL13-SCAF000530, LPAL13-SCAF000531, LPAL13-SCAF000534, LPAL13-SCAF000545, LPAL13-SCAF000546, LPAL13-SCAF000550, LPAL13-SCAF000551, LPAL13-SCAF000557, LPAL13-SCAF000561, LPAL13-SCAF000565, LPAL13-SCAF000571, LPAL13-SCAF000579, LPAL13-SCAF000581, LPAL13-SCAF000584, LPAL13-SCAF000589, LPAL13-SCAF000592, LPAL13-SCAF000594, LPAL13-SCAF000595, LPAL13-SCAF000596, LPAL13-SCAF000597, LPAL13-SCAF000602, LPAL13-SCAF000604, LPAL13-SCAF000606, LPAL13-SCAF000608, LPAL13-SCAF000609, LPAL13-SCAF000612, LPAL13-SCAF000613, LPAL13-SCAF000615, LPAL13-SCAF000620, LPAL13-SCAF000621, LPAL13-SCAF000623, LPAL13-SCAF000624, LPAL13-SCAF000629, LPAL13-SCAF000630, LPAL13-SCAF000631, LPAL13-SCAF000632, LPAL13-SCAF000633, LPAL13-SCAF000634, LPAL13-SCAF000635, LPAL13-SCAF000638, LPAL13-SCAF000640, LPAL13-SCAF000642, LPAL13-SCAF000647, LPAL13-SCAF000648, LPAL13-SCAF000657, LPAL13-SCAF000658, LPAL13-SCAF000660, LPAL13-SCAF000662, LPAL13-SCAF000663, LPAL13-SCAF000664, LPAL13-SCAF000665, LPAL13-SCAF000667, LPAL13-SCAF000669, LPAL13-SCAF000670, LPAL13-SCAF000671, LPAL13-SCAF000674, LPAL13-SCAF000675, LPAL13-SCAF000676, LPAL13-SCAF000677, LPAL13-SCAF000678, LPAL13-SCAF000683, LPAL13-SCAF000684, LPAL13-SCAF000685, LPAL13-SCAF000686, LPAL13-SCAF000687, LPAL13-SCAF000689, LPAL13-SCAF000690, LPAL13-SCAF000691, LPAL13-SCAF000692, LPAL13-SCAF000693, LPAL13-SCAF000694, LPAL13-SCAF000699, LPAL13-SCAF000701, LPAL13-SCAF000702, LPAL13-SCAF000703, LPAL13-SCAF000705, LPAL13-SCAF000706, LPAL13-SCAF000708, LPAL13-SCAF000709, LPAL13-SCAF000710, LPAL13-SCAF000712, LPAL13-SCAF000715, LPAL13-SCAF000718, LPAL13-SCAF000721, LPAL13-SCAF000725, LPAL13-SCAF000728, LPAL13-SCAF000729, LPAL13-SCAF000730, LPAL13-SCAF000731, LPAL13-SCAF000733, LPAL13-SCAF000736, LPAL13-SCAF000739, LPAL13-SCAF000740, LPAL13-SCAF000741, LPAL13-SCAF000742, LPAL13-SCAF000743, LPAL13-SCAF000745, LPAL13-SCAF000746, LPAL13-SCAF000747, LPAL13-SCAF000749, LPAL13-SCAF000750, LPAL13-SCAF000751, LPAL13-SCAF000752, LPAL13-SCAF000753, LPAL13-SCAF000754, LPAL13-SCAF000755, LPAL13-SCAF000756, LPAL13-SCAF000757, LPAL13-SCAF000758, LPAL13-SCAF000759, LPAL13-SCAF000763, LPAL13-SCAF000764, LPAL13-SCAF000765, LPAL13-SCAF000766, LPAL13-SCAF000767, LPAL13-SCAF000768, LPAL13-SCAF000769, LPAL13-SCAF000770, LPAL13-SCAF000771, LPAL13-SCAF000773, LPAL13-SCAF000774, LPAL13-SCAF000775, LPAL13-SCAF0007
## Before removal, there were 517279 genes, now there are 82.
## There are 28 samples which kept less than 90 percent counts.
## tmrc20001 tmrc20004 tmrc20005 tmrc20007 tmrc20008 tmrc20009 tmrc20010 tmrc20011
## 0.03704 0.00000 0.04172 0.05308 0.04589 0.00000 0.02772 0.02499
## tmrc20012 tmrc20013 tmrc20014 tmrc20015 tmrc20016 tmrc20017 tmrc20018 tmrc20019
## 0.00000 0.02938 0.01836 0.02622 0.02636 0.02029 0.03281 0.07991
## tmrc20020 tmrc20021 tmrc20022 tmrc20024 tmrc20025 tmrc20026 tmrc20027 tmrc20028
## 0.07243 0.03243 0.00000 0.04054 0.06334 0.08188 0.05991 0.07737
## tmrc20029 tmrc20031 tmrc20032 tmrc20033
## 0.00000 0.04589 0.03713 0.00000
zymo_subset <- set_expt_conditions(zymo_subset, fact = "phenotypiccharacteristics")
## zymo_heat <- plot_sample_heatmap(zymo_subset, row_label = rownames(exprs(snp_subset)))
des <- both_norm$design
undef_idx <- is.na(des[["strain"]])
des[undef_idx, "strain"] <- "unknown"
##hmcols <- colorRampPalette(c("yellow","black","darkblue"))(256)
correlations <- hpgl_cor(exprs(both_norm))
zymo_missing_idx <- is.na(des[["phenotypiccharacteristics"]])
des[zymo_missing_idx, "phenotypiccharacteristics"] <- "unknown"
mydendro <- list(
"clustfun" = hclust,
"lwd" = 2.0)
col_data <- as.data.frame(des[, c("phenotypiccharacteristics", "clinicalcategorical")])
unknown_clinical <- is.na(col_data[["clinicalcategorical"]])
row_data <- as.data.frame(des[, c("strain")])
colnames(col_data) <- c("zymodeme", "outcome")
col_data[unknown_clinical, "outcome"] <- "undefined"
colnames(row_data) <- c("strain")
myannot <- list(
"Col" = list("data" = col_data),
"Row" = list("data" = row_data))
myclust <- list("cuth" = 1.0,
"col" = BrewerClusterCol)
mylabs <- list(
"Row" = list("nrow" = 4),
"Col" = list("nrow" = 4))
hmcols <- colorRampPalette(c("darkblue", "beige"))(240)
map1 <- annHeatmap2(
correlations,
dendrogram = mydendro,
annotation = myannot,
cluster = myclust,
labels = mylabs,
## The following controls if the picture is symmetric
scale = "none",
col = hmcols)
## Warning in breakColors(breaks, col): more colors than classes: ignoring 27 last
## colors
plot(map1)
The following uses the same information to make some guesses about the strains used in the new samples.
des <- both_norm$design
undef_idx <- is.na(des[["strain"]])
des[undef_idx, "strain"] <- "unknown"
##hmcols <- colorRampPalette(c("yellow","black","darkblue"))(256)
correlations <- hpgl_cor(exprs(both_norm))
mydendro <- list(
"clustfun" = hclust,
"lwd" = 2.0)
col_data <- as.data.frame(des[, c("condition")])
row_data <- as.data.frame(des[, c("strain")])
colnames(col_data) <- c("condition")
colnames(row_data) <- c("strain")
myannot <- list(
"Col" = list("data" = col_data),
"Row" = list("data" = row_data))
myclust <- list("cuth" = 1.0,
"col" = BrewerClusterCol)
mylabs <- list(
"Row" = list("nrow" = 4),
"Col" = list("nrow" = 4))
hmcols <- colorRampPalette(c("darkblue", "beige"))(170)
map1 <- annHeatmap2(
correlations,
dendrogram = mydendro,
annotation = myannot,
cluster = myclust,
labels = mylabs)
## col = hmcols)
plot(map1)
pheno <- subset_expt(lp_expt, subset = "condition=='z2.2'|condition=='z2.3'")
## There were 34, now there are 19 samples.
pheno_snps <- sm(count_expt_snps(pheno, annot_column = "bcftable"))
## Error : 'NA' does not exist in current working directory ('/fs01/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019').
## Error : 'NA' does not exist in current working directory ('/fs01/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019').
## Error : 'NA' does not exist in current working directory ('/fs01/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019').
## Error : 'NA' does not exist in current working directory ('/fs01/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019').
## Error : 'NA' does not exist in current working directory ('/fs01/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019').
## Error : 'NA' does not exist in current working directory ('/fs01/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019').
## Error : 'NA' does not exist in current working directory ('/fs01/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019').
## Error : 'NA' does not exist in current working directory ('/fs01/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019').
## Error in Biobase::`sampleNames<-`(`*tmp*`, value = colnames(snp_exprs)): number of new names (15) should equal number of rows in AnnotatedDataFrame (19)
xref_prop <- table(pheno_snps$conditions)
## Error in eval(quote(list(...)), env): object 'pheno_snps' not found
pheno_snps$conditions
## Error in eval(expr, envir, enclos): object 'pheno_snps' not found
idx_tbl <- exprs(pheno_snps) > 5
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'exprs': object 'pheno_snps' not found
new_tbl <- data.frame(row.names = rownames(exprs(pheno_snps)))
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'rownames': error in evaluating the argument 'object' in selecting a method for function 'exprs': object 'pheno_snps' not found
for (n in names(xref_prop)) {
new_tbl[[n]] <- 0
idx_cols <- which(pheno_snps[["conditions"]] == n)
prop_col <- rowSums(idx_tbl[, idx_cols]) / xref_prop[n]
new_tbl[n] <- prop_col
}
## Error in eval(expr, envir, enclos): object 'xref_prop' not found
new_tbl[["ratio"]] <- (new_tbl[["z2.2"]] - new_tbl[["z2.3"]])
## Error in eval(expr, envir, enclos): object 'new_tbl' not found
keepers <- grepl(x = rownames(new_tbl), pattern = "LpaL13")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'grepl': error in evaluating the argument 'x' in selecting a method for function 'rownames': object 'new_tbl' not found
new_tbl <- new_tbl[keepers, ]
## Error in eval(expr, envir, enclos): object 'new_tbl' not found
new_tbl[["SNP"]] <- rownames(new_tbl)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'rownames': object 'new_tbl' not found
new_tbl[["Chromosome"]] <- gsub(x = new_tbl[["SNP"]], pattern = "chr_(.*)_pos_.*", replacement = "\\1")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'gsub': object 'new_tbl' not found
new_tbl[["Position"]] <- gsub(x = new_tbl[["SNP"]], pattern = ".*_pos_(\\d+)_.*", replacement = "\\1")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'gsub': object 'new_tbl' not found
new_tbl <- new_tbl[, c("SNP", "Chromosome", "Position", "ratio")]
## Error in eval(expr, envir, enclos): object 'new_tbl' not found
library(CMplot)
## Much appreciate for using CMplot.
## Full description, Bug report, Suggestion and the latest codes:
## https://github.com/YinLiLin/CMplot
CMplot(new_tbl)
## Error in is.data.frame(x): object 'new_tbl' not found
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
message(paste0("Saving to ", savefile))
tmp <- sm(saveme(filename = savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 53433f808ad055552025c90161db331405085a9e
## This is hpgltools commit: Tue May 4 12:44:03 2021 -0400: 53433f808ad055552025c90161db331405085a9e
## Saving to tmrc2_02sample_estimation_v202104.rda.xz
tmp <- loadme(filename = savefile)