This document intends to provide finalized analyses of the TMRC2 samples.
These are sequences from an array of Leishmania strains collected over time by CIDEIM. My primary goal in these analyses is to look for potential differences among them which have the potential to elucidate different outcomes for patients, drug sensitivity, and any other aspects which may prove helpful.
The metadata factors of most interest in this group are:
The samples used in these analyses were all collected, purified, and libraries generated by the scientists/doctors at CIDEIM. The sequencing libraries were generated via the TruSeq non-stranded library kit and sequenced either at JHU or UMD; earlier samples were single-ended, but most were paired.
All samples were trimmed with trimomatic using the same set of parameters. All mapping was performed with hisat2 version 2.2.1. Quantifications were also performed with salmon version 1.2.0. The reference genome used was L.panamensis (MHOM/COL/81/L13), the TriTrypDB version 36 reference was used.
This document is limited primary to a series of RNAseq analyses to look for variance among the metadata factors of primary interest. It also includes a series of pseudo-variant searches which seek to provide some clarification vis a vis the relative lineages of these strains.
The primary metadata source for this experiment comes from our shared online sample sheet, which has been copied to sample_sheets/tmrc2_samples.xlsx. In our working repository we have multiple versions of this which change over time.
sample_sheet <- "sample_sheets/tmrc2_samples.xlsx"
Everything which follows depends on the Existing TriTrypDB annotations revision 46, circa 2019. The following block loads a database of these annotations and turns it into a matrix where the rows are genes and columns are all the annotation types provided by TriTrypDB.
The same database was used to create a matrix of orthologous genes between L.panamensis and all of the other species in the TriTrypDB.
tt <- sm(library(EuPathDB))
orgdb <- "org.Lpanamensis.MHOMCOL81L13.v46.eg.db"
tt <- sm(library(orgdb, character.only=TRUE))
pan_db <- org.Lpanamensis.MHOMCOL81L13.v46.eg.db
all_fields <- columns(pan_db)
all_lp_annot <- sm(load_orgdb_annotations(
pan_db,
keytype = "gid",
fields = c("annot_gene_entrez_id", "annot_gene_name",
"annot_strand", "annot_chromosome", "annot_cds_length",
"annot_gene_product")))$genes
lp_go <- sm(load_orgdb_go(pan_db))
lp_lengths <- all_lp_annot[, c("gid", "annot_cds_length")]
colnames(lp_lengths) <- c("ID", "length")
all_lp_annot[["annot_gene_product"]] <- tolower(all_lp_annot[["annot_gene_product"]])
orthos <- sm(EuPathDB::extract_eupath_orthologs(db = pan_db))
hisat_annot <- all_lp_annot
## rownames(hisat_annot) <- paste0("exon_", rownames(hisat_annot), ".E1")
meta <- EuPathDB::download_eupath_metadata(webservice="tritrypdb")
## Unable to find species names for 2 species.
## Leishmania sp. Ghana MHOM/GH/2012/GH5, Leishmania sp. Namibia MPRO/NA/1975/252/LV425
## Appending to an existing file: EuPathDB/metadata/biocv3.14_tritrypdbv56_metadata.csv
## Appending to an existing file: EuPathDB/metadata/GRanges_biocv3.14_tritrypdbv56_metadata.csv
## Appending to an existing file: EuPathDB/metadata/OrgDb_biocv3.14_tritrypdbv56_metadata.csv
## Appending to an existing file: EuPathDB/metadata/TxDb_biocv3.14_tritrypdbv56_metadata.csv
## Appending to an existing file: EuPathDB/metadata/OrganismDbi_biocv3.14_tritrypdbv56_metadata.csv
## Appending to an existing file: EuPathDB/metadata/BSgenome_biocv3.14_tritrypdbv56_metadata.csv
## Appending to an existing file: EuPathDB/metadata/biocv3.14_tritrypdbv56_invalid_metadata.csv
## Appending to an existing file: EuPathDB/metadata/GRanges_biocv3.14_tritrypdbv56_invalid_metadata.csv
## Appending to an existing file: EuPathDB/metadata/OrgDb_biocv3.14_tritrypdbv56_invalid_metadata.csv
## Appending to an existing file: EuPathDB/metadata/TxDb_biocv3.14_tritrypdbv56_invalid_metadata.csv
## Appending to an existing file: EuPathDB/metadata/OrganismDbi_biocv3.14_tritrypdbv56_invalid_metadata.csv
## Appending to an existing file: EuPathDB/metadata/BSgenome_biocv3.14_tritrypdbv56_invalid_metadata.csv
lp_entry <- EuPathDB::get_eupath_entry(species="Leishmania panamensis", metadata=meta)
## Found the following hits: Leishmania panamensis MHOM/COL/81/L13, Leishmania panamensis strain MHOM/PA/94/PSC-1, choosing the first.
## Using: Leishmania panamensis MHOM/COL/81/L13.
colnames(lp_entry)
## [1] "AnnotationVersion" "AnnotationSource" "BiocVersion"
## [4] "DataProvider" "Genome" "GenomeSource"
## [7] "GenomeVersion" "NumArrayGene" "NumChipChipGene"
## [10] "NumChromosome" "NumCodingGene" "NumCommunity"
## [13] "NumContig" "NumEC" "NumEST"
## [16] "NumGene" "NumGO" "NumOrtholog"
## [19] "NumOtherGene" "NumPopSet" "NumProteomics"
## [22] "NumPseudogene" "NumRNASeq" "NumRTPCR"
## [25] "NumSNP" "NumTFBS" "Organellar"
## [28] "ReferenceStrain" "MegaBP" "PrimaryKey"
## [31] "ProjectID" "RecordClassName" "SourceID"
## [34] "SourceVersion" "TaxonomyID" "TaxonomyName"
## [37] "URLGenome" "URLGFF" "URLProtein"
## [40] "Coordinate_1_based" "Maintainer" "SourceUrl"
## [43] "Tags" "BsgenomePkg" "GrangesPkg"
## [46] "OrganismdbiPkg" "OrgdbPkg" "TxdbPkg"
## [49] "Taxon" "Genus" "Species"
## [52] "Strain" "BsgenomeFile" "GrangesFile"
## [55] "OrganismdbiFile" "OrgdbFile" "TxdbFile"
## [58] "GenusSpecies" "TaxonUnmodified" "TaxonCanonical"
## [61] "TaxonXref"
testing_panamensis <- "BSGenome.Leishmania.panamensis.MHOMCOL81L13.v53"
## testing_panamensis <- EuPathDB::make_eupath_bsgenome(entry=lp_entry, eu_version="v46")
library(as.character(testing_panamensis), character.only=TRUE)
## Loading required package: BSgenome
## Loading required package: Biostrings
## Loading required package: XVector
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
## Loading required package: rtracklayer
genome <- get0(as.character(testing_panamensis))
Resequence samples: TMRC20002, TMRC20006, TMRC20004 (maybe TMRC20008 and TMRC20029)
The process of sample estimation takes two primary inputs:
An expressionset is a data structure used in R to examine RNASeq data. It is comprised of annotations, metadata, and expression data. In the case of our processing pipeline, the location of the expression data is provided by the filenames in the metadata.
The first lines of the following block create the Expressionset. All of the following lines perform various normalizations and generate plots from it.
The following samples are much lower coverage:
20210610: I made some manual changes to the sample sheet which I downloaded, filling in some zymodeme with ‘unknown’
sanitize_columns <- c("passagenumber", "clinicalresponse", "clinicalcategorical",
"zymodemecategorical", "zymodemecategorical")
lp_expt <- sm(create_expt(sample_sheet,
gene_info = hisat_annot,
annotation = orgdb,
id_column = "hpglidentifier",
file_column = "lpanamensisv36hisatfile")) %>%
set_expt_conditions(fact = "zymodemecategorical") %>%
subset_expt(nonzero = 8550) %>%
subset_expt(coverage = 5000000) %>%
semantic_expt_filter(semantic = c("amastin", "gp63", "leishmanolysin"),
semantic_column = "annot_gene_product") %>%
sanitize_expt_metadata(columns = sanitize_columns) %>%
set_expt_factors(columns = sanitize_columns, class = "factor")
## The samples (and read coverage) removed when filtering 8550 non-zero genes are:
## TMRC20002 TMRC20006
## 11681227 6670348
## subset_expt(): There were 75, now there are 73 samples.
## The samples removed (and read coverage) when filtering samples with less than 5e+06 reads are:
## TMRC20004 TMRC20029
## 564812 1658096
## subset_expt(): There were 73, now there are 71 samples.
## semantic_expt_filter(): Removed 68 genes.
libsizes <- plot_libsize(lp_expt)
pp(file = "images/lp_expt_libsizes.png", image = libsizes$plot, width = 14, height = 9)
## Warning in pp(file = "images/lp_expt_libsizes.png", image = libsizes$plot, :
## There is no device to shut down.
## I think samples 7,10 should be removed at minimum, probably also 9,11
nonzero <- plot_nonzero(lp_expt)
pp(file = "images/lp_nonzero.png", image = nonzero$plot, width = 9, height = 9)
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning in pp(file = "images/lp_nonzero.png", image = nonzero$plot, width = 9, :
## There is no device to shut down.
## Warning: ggrepel: 50 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
lp_box <- plot_boxplot(lp_expt)
## 5364 entries are 0. We are on a log scale, adding 1 to the data.
pp(file = "images/lp_expt_boxplot.png", image = lp_box, width = 12, height = 9)
## Warning in pp(file = "images/lp_expt_boxplot.png", image = lp_box, width = 12, :
## There is no device to shut down.
filter_plot <- plot_libsize_prepost(lp_expt)
filter_plot$lowgene_plot
## Warning: Using alpha for a discrete variable is not advised.
filter_plot$count_plot
Najib’s favorite plots are of course the PCA/TNSE. These are nice to look at in order to get a sense of the relationships between samples. They also provide a good opportunity to see what happens when one applies different normalizations, surrogate analyses, filters, etc. In addition, one may set different experimental factors as the primary ‘condition’ (usually the color of plots) and surrogate ‘batches’.
Column ‘Q’ in the sample sheet, make a categorical version of it with these parameters:
starting <- as.numeric(pData(lp_expt)[["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]])
## Warning: NAs introduced by coercion
sus_categorical <- starting
na_idx <- is.na(starting)
sus_categorical[na_idx] <- "unknown"
resist_idx <- starting <= 0.35
sus_categorical[resist_idx] <- "resistant"
indeterminant_idx <- starting >= 0.36 & starting <= 0.48
sus_categorical[indeterminant_idx] <- "ambiguous"
susceptible_idx <- starting >= 0.49
sus_categorical[susceptible_idx] <- "sensitive"
pData(lp_expt)[["sus_category"]] <- sus_categorical
clinical_colors <- list(
## "z2.1" = "#0000cc",
## "z2.3" = "#874400",
## "z2.2" = "#df7000",
## "z2.4" = "#cc0000",
"z2.1" = "#874400",
"z2.2" = "#0000cc",
"z2.3" = "#cc0000",
"z2.4" = "#df7000",
"unknown" = "#cbcbcb",
"null" = "#000000")
clinical_samples <- lp_expt %>%
set_expt_batches(fact = sus_categorical) %>%
set_expt_colors(clinical_colors)
clinical_norm <- sm(normalize_expt(clinical_samples, norm = "quant", transform = "log2",
convert = "cpm", batch = FALSE, filter = TRUE))
zymo_pca <- plot_pca(clinical_norm, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
pp(file = "images/zymo_pca_sus_shape.png", image = zymo_pca$plot)
## Warning in pp(file = "images/zymo_pca_sus_shape.png", image = zymo_pca$plot):
## There is no device to shut down.
only_two_types <- subset_expt(clinical_samples, subset = "condition=='z2.3'|condition=='z2.2'")
## subset_expt(): There were 71, now there are 59 samples.
only_two_norm <- sm(normalize_expt(only_two_types, norm = "quant", transform = "log2",
convert = "cpm", batch = FALSE, filter = TRUE))
onlytwo_pca <- plot_pca(only_two_norm, plot_title = "PCA of z2.2 and z2.3 parasite expression values",
plot_labels = FALSE)
pp(file = "images/zymo_z2.2_z2.3_pca_sus_shape.pdf", image = onlytwo_pca$plot)
zymo_3dpca <- plot_3d_pca(zymo_pca)
zymo_3dpca$plot
clinical_n <- sm(normalize_expt(clinical_samples, transform = "log2",
convert = "cpm", batch = FALSE, filter = TRUE))
zymo_tsne <- plot_tsne(clinical_n, plot_title = "TSNE of parasite expression values")
zymo_tsne$plot
clinical_nb <- normalize_expt(clinical_samples, convert = "cpm", transform = "log2",
filter = TRUE, batch = "svaseq")
## Removing 142 low-count genes (8568 remaining).
## batch_counts: Before batch/surrogate estimation, 1008 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 3380 entries are 0<x<1: 1%.
## Setting 370 low elements to zero.
## transform_counts: Found 370 values equal to 0, adding 1 to the matrix.
clinical_nb_pca <- plot_pca(clinical_nb, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
pp(file = "images/clinical_nb_pca_sus_shape.png", image = clinical_nb_pca$plot)
## Warning in pp(file = "images/clinical_nb_pca_sus_shape.png", image =
## clinical_nb_pca$plot): There is no device to shut down.
clinical_nb_tsne <- plot_tsne(clinical_nb, plot_title = "TSNE of parasite expression values")
clinical_nb_tsne$plot
## Warning: ggrepel: 43 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
corheat <- plot_corheat(clinical_norm, plot_title = "Correlation heatmap of parasite
expression values
")
corheat$plot
plot_sm(clinical_norm)$plot
## Performing correlation.
cf_colors <- list(
"cure" = "#006f00",
"fail" = "#9dffa0",
"unknown" = "#cbcbcb",
"notapplicable" = "#000000")
cf_expt <- set_expt_conditions(lp_expt, fact = "clinicalcategorical") %>%
set_expt_batches(fact = sus_categorical) %>%
set_expt_colors(cf_colors)
## Warning in set_expt_colors(., cf_colors): Colors for the following categories
## are not being used: notapplicable.
cf_norm <- normalize_expt(cf_expt, convert = "cpm", transform = "log2",
norm = "quant", filter = TRUE)
## Removing 142 low-count genes (8568 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
start_cf <- plot_pca(cf_norm, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
pp(file = "images/cf_sus_shape.png", image = start_cf$plot)
## Warning in pp(file = "images/cf_sus_shape.png", image = start_cf$plot): There is
## no device to shut down.
cf_nb <- normalize_expt(cf_expt, convert = "cpm", transform = "log2",
norm = "quant", filter = TRUE, batch = "svaseq")
## Warning in normalize_expt(cf_expt, convert = "cpm", transform = "log2", :
## Quantile normalization and sva do not always play well together.
## Removing 142 low-count genes (8568 remaining).
## batch_counts: Before batch/surrogate estimation, 2 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 4130 entries are 0<x<1: 1%.
## Setting 154 low elements to zero.
## transform_counts: Found 154 values equal to 0, adding 1 to the matrix.
cf_nb_pca <- plot_pca(cf_nb, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
pp(file = "images/cf_sus_share_nb.png", image = cf_nb_pca$plot)
## Warning in pp(file = "images/cf_sus_share_nb.png", image = cf_nb_pca$plot):
## There is no device to shut down.
cf_norm <- normalize_expt(cf_expt, transform = "log2", convert = "cpm",
filter = TRUE, norm = "quant")
## Removing 142 low-count genes (8568 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
test <- pca_information(cf_norm,
expt_factors = c("clinicalcategorical", "zymodemecategorical",
"pathogenstrain", "passagenumber"),
num_components = 6, plot_pcas = TRUE)
test$anova_p
## PC1 PC2 PC3 PC4 PC5 PC6
## clinicalcategorical 2.850e-01 0.3311343 0.47800 1.728e-03 0.6934731 0.4209
## zymodemecategorical 4.608e-08 0.0009469 0.33206 1.487e-02 0.0009543 0.2642
## pathogenstrain 7.092e-01 0.7763033 0.84356 4.512e-06 0.0181051 0.5619
## passagenumber 8.896e-01 0.2377294 0.04096 2.795e-02 0.2229372 0.4130
test$cor_heatmap
sus_colors <- list(
"resistant" = "#8563a7",
"sensitive" = "#8d0000",
"ambiguous" = "#cbcbcb",
"unknown" = "#000000")
sus_expt <- set_expt_conditions(lp_expt, fact = "sus_category") %>%
set_expt_batches(fact = "zymodemecategorical") %>%
set_expt_colors(colors = sus_colors) %>%
subset_expt(subset = "batch!='z24'") %>%
subset_expt(subset = "batch!='z21'")
## subset_expt(): There were 71, now there are 70 samples.
## subset_expt(): There were 70, now there are 67 samples.
sus_norm <- normalize_expt(sus_expt, transform = "log2", convert = "cpm",
norm = "quant", filter = TRUE)
## Removing 143 low-count genes (8567 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
sus_pca <- plot_pca(sus_norm, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
pp(file = "images/sus_norm_pca.png", image = sus_pca[["plot"]])
## Warning in pp(file = "images/sus_norm_pca.png", image = sus_pca[["plot"]]):
## There is no device to shut down.
sus_nb <- normalize_expt(sus_expt, transform = "log2", convert = "cpm",
batch = "svaseq", filter = TRUE)
## Removing 143 low-count genes (8567 remaining).
## batch_counts: Before batch/surrogate estimation, 939 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 3146 entries are 0<x<1: 1%.
## Setting 208 low elements to zero.
## transform_counts: Found 208 values equal to 0, adding 1 to the matrix.
sus_nb_pca <- plot_pca(sus_nb, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
pp(file = "images/sus_nb_pca.png", image = sus_nb_pca[["plot"]])
## Warning in pp(file = "images/sus_nb_pca.png", image = sus_nb_pca[["plot"]]):
## There is no device to shut down.
The following sections perform a series of analyses which seek to elucidate differences between the zymodemes 2.2 and 2.3 either through differential expression or variant profiles.
TODO: Do this with and without sva and compare the results.
zy_expt <- subset_expt(lp_expt, subset = "condition=='z2.2'|condition=='z2.3'")
## subset_expt(): There were 71, now there are 59 samples.
zy_norm <- normalize_expt(zy_expt, filter = TRUE, convert = "cpm", norm = "quant")
## Removing 159 low-count genes (8551 remaining).
zy_de_nobatch <- sm(all_pairwise(zy_expt, filter = TRUE, model_batch = "svaseq"))
zy_de <- sm(all_pairwise(zy_expt, filter = TRUE, model_batch = "svaseq"))
zy_table_nobatch <- sm(combine_de_tables(
zy_de_nobatch, excel = glue::glue("excel/zy_tables_nobatch-v{ver}.xlsx"),
sig_excel = glue::glue("excel/zy_sig_nobatch-v{ver}.xlsx"),
gmt = glue::glue("gmt/zymodeme_nobatch-v{ver}.gmt")))
zy_table <- sm(combine_de_tables(
zy_de, excel = glue::glue("excel/zy_tables-v{ver}.xlsx"),
sig_excel = glue::glue("excel/zy_sig-v{ver}.xlsx"),
gmt = glue::glue("gmt/zymodeme-v{ver}.gmt")))
I want to add a function to extract gmt files from our DE results. In order for this to work best, I will likely need to change my expressionset creator in order to include in it the lab/experiment/etc, as I already did for the annotation source.
In addition, some work will need to be done in order to ensure we get the gene IDs in the expected format – this is work which has already mostly been done for the main ensembl reference species (humans/mouse), but may prove more challenging for things like Leishmania. So, let us test that for this data.
With the above in mind, I made a couple of changes to expt.r to add:
summary(zy_table_nobatch[["significant"]])
## Length Class Mode
## limma 7 -none- list
## edger 7 -none- list
## deseq 7 -none- list
## ebseq 7 -none- list
## basic 7 -none- list
## sig_bar_plots 17 -none- list
## summary_df 10 data.frame list
## For the moment, fill in the parameter with the dataset of interest
gmts_from_signicant <- function(zy_table_nobatch, according_to = "deseq") {
researcher_name <- "elsayed"
study_name <- "lpamanensis"
annotation_name <- annotation(zy_table_nobatch[["input"]][["input"]][["expressionset"]])
annotation_data <- get0(annotation_name)
annot_meta <- metadata(annotation_data)
organism_name_idx <- annot_meta[["name"]] == "ORGANISM"
organism <- annot_meta[organism_name_idx, "value"]
sig_tables <- zy_table_nobatch[["significant"]][[according_to]]
categories <- names(sig_tables[["ups"]])
for (cat in categories) {
ups <- rownames(sig_tables[["ups"]][[cat]])
downs <- rownames(sig_tables[["downs"]][[cat]])
gsc <- make_gsc_from_ids(first_ids = ups, second_ids = downs,
annotation_name = annotation_name,
researcher_name = researcher_name,
study_name = study_name, organism = organism,
category_name = cat, pair_names = "up",
current_id = "ID", required_id = "ENTREZID")
}
}
pp(file = "images/zymo_ma.png", image = zy_table[["plots"]][["z23_vs_z22"]][["deseq_ma_plots"]][["plot"]])
## Warning in pp(file = "images/zymo_ma.png", image = zy_table[["plots"]]
## [["z23_vs_z22"]][["deseq_ma_plots"]][["plot"]]): There is no device to shut
## down.
In contrast, we can search for genes which are differentially expressed with respect to cure/failure status.
cf_de <- sm(all_pairwise(cf_expt, filter = TRUE, model_batch = "svaseq"))
cf_table <- sm(combine_de_tables(
cf_de,
excel = glue::glue("excel/cf_tables-v{ver}.xlsx"),
sig_excel = glue::glue("excel/cf_sig-v{ver}.xlsx")))
Finally, we can use our category of susceptibility and look for genes which change from sensitive to resistant. Keep in mind, though, that for the moment we have a lot of ambiguous and unknown strains.
sus_de <- sm(all_pairwise(sus_expt, filter = TRUE, model_batch = "svaseq"))
sus_table <- sm(combine_de_tables(
sus_de,
excel = glue::glue("excel/sus_tables-v{ver}.xlsx"),
sig_excel = glue::glue("excel/sus_sig-v{ver}.xlsx")))
knitr::kable(head(sus_table[["significant"]][["deseq"]][["ups"]][["sensitive_vs_resistant"]], n = 20))
gid | annotgeneproduct | annotgenetype | chromosome | start | end | strand | annotgeneentrezid | annotgenename | annotstrand | annotchromosome | annotcdslength | length | deseq_logfc | deseq_adjp | edger_logfc | edger_adjp | limma_logfc | limma_adjp | basic_nummed | basic_denmed | basic_numvar | basic_denvar | basic_logfc | basic_t | basic_p | basic_adjp | deseq_basemean | deseq_lfcse | deseq_stat | deseq_p | ebseq_fc | ebseq_logfc | ebseq_c1mean | ebseq_c2mean | ebseq_mean | ebseq_var | ebseq_postfc | ebseq_ppee | ebseq_ppde | ebseq_adjp | edger_logcpm | edger_lr | edger_p | limma_ave | limma_t | limma_b | limma_p | limma_adjp_ihw | deseq_adjp_ihw | edger_adjp_ihw | ebseq_adjp_ihw | basic_adjp_ihw | lfc_meta | lfc_var | lfc_varbymed | p_meta | p_var | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LPAL13_000017600 | LPAL13_000017600 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000146 | 359 | 586 | + | forward | Not Assigned | 228.0 | 227 | 6.564 | 0 | 6.548 | 0e+00 | 6.287 | 0.0429 | 4.1180 | -1.1470 | 5.268 | 2.5720 | 5.266 | 8.300 | 0e+00 | 0.0000 | 613.10 | 0.6613 | 9.927 | 0 | 72.705 | 6.184 | 13.210 | 961.12 | 676.75 | 4.516e+05 | 58.224 | 0.0000 | 1.0000 | 0.0000 | 4.4880 | 59.91 | 0 | 2.2600 | 2.665 | -2.6780 | 0.0097 | 5.346e-02 | 6.951e-20 | 9.352e-12 | 8.934e-01 | 3.762e-07 | 6.754 | 2.432e+00 | 3.600e-01 | 3.238e-03 | 3.146e-05 | ||
LPAL13_000053200 | LPAL13_000053200 | hypothetical protein | protein coding | LPAL13_SCAF000804 | 5037 | 5249 | - | reverse | Not Assigned | 213.0 | 212 | 8.925 | 0 | 10.330 | 0e+00 | 6.031 | 0.0185 | 0.8570 | -4.1700 | 9.600 | 0.1233 | 5.027 | 8.459 | 0e+00 | 0.0000 | 77.23 | 1.0440 | 8.550 | 0 | 13478.050 | 13.718 | 0.000 | 134.77 | 94.34 | 9.270e+03 | 36.806 | 1.0000 | 0.0000 | 1.0000 | 1.5470 | 58.05 | 0 | -0.8949 | 3.084 | -2.2650 | 0.0030 | 2.308e-02 | 1.372e-14 | 1.926e-11 | 0.000e+00 | 3.411e-07 | 8.406 | 3.528e-02 | 4.197e-03 | 1.003e-03 | 3.018e-06 | ||
LPAL13_200050100 | LPAL13_200050100 | hypothetical protein | protein coding | LpaL13_20.1 | 1627529 | 1627717 | + | forward | 20.1 | 189.0 | 188 | 5.525 | 0 | 5.473 | 0e+00 | 4.659 | 0.0009 | 2.3090 | -1.8430 | 1.742 | 2.3634 | 4.152 | 8.155 | 0e+00 | 0.0000 | 121.50 | 0.5792 | 9.539 | 0 | 24.986 | 4.643 | 8.318 | 208.07 | 148.14 | 2.549e+04 | 17.788 | 0.0000 | 1.0000 | 0.0000 | 2.1720 | 62.54 | 0 | 0.7885 | 4.367 | 0.9945 | 0.0000 | 1.122e-03 | 2.478e-18 | 4.481e-12 | 9.791e-01 | 8.497e-06 | 5.208 | 3.918e-01 | 7.523e-02 | 1.551e-05 | 7.217e-10 | ||
LPAL13_300029400 | LPAL13_300029400 | hypothetical protein, conserved | protein coding | LpaL13_30 | 853953 | 854150 | - | reverse | 30 | 198.0 | 197 | 6.390 | 0 | 6.323 | 0e+00 | 5.128 | 0.0011 | 1.5080 | -2.5360 | 1.854 | 1.9106 | 4.045 | 8.519 | 0e+00 | 0.0000 | 84.63 | 0.7095 | 9.007 | 0 | 59.752 | 5.901 | 2.119 | 127.20 | 89.67 | 1.056e+04 | 22.395 | 0.0000 | 0.0000 | 0.0000 | 1.6260 | 58.02 | 0 | -0.0869 | 4.290 | 0.4427 | 0.0001 | 1.350e-03 | 2.648e-16 | 1.926e-11 | 0.000e+00 | 2.140e-06 | 5.953 | 3.312e-01 | 5.564e-02 | 2.036e-05 | 1.244e-09 | ||
LPAL13_350011800 | LPAL13_350011800 | hypothetical protein, conserved | protein coding | LpaL13_35 | 171009 | 171242 | + | forward | 35 | 234.0 | 233 | 5.289 | 0 | 5.278 | 0e+00 | 4.720 | 0.0033 | 2.6120 | -1.1520 | 2.957 | 1.1319 | 3.764 | 8.418 | 0e+00 | 0.0000 | 178.20 | 0.5669 | 9.329 | 0 | 31.856 | 4.994 | 9.539 | 304.18 | 215.79 | 6.840e+04 | 23.769 | 0.0000 | 1.0000 | 0.0000 | 2.6990 | 59.94 | 0 | 1.0920 | 3.836 | -0.0887 | 0.0003 | 4.173e-03 | 1.649e-17 | 9.352e-12 | 9.791e-01 | 1.578e-07 | 5.163 | 8.144e-01 | 1.577e-01 | 9.547e-05 | 2.734e-08 | ||
LPAL13_000035500 | LPAL13_000035500 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000492 | 7045 | 7410 | + | forward | Not Assigned | 366.0 | 365 | 4.506 | 0 | 4.504 | 0e+00 | 4.085 | 0.0261 | 4.2390 | 0.8008 | 4.573 | 0.5655 | 3.439 | 7.496 | 0e+00 | 0.0000 | 529.10 | 0.5721 | 7.875 | 0 | 18.868 | 4.238 | 51.072 | 963.80 | 689.98 | 4.309e+05 | 18.173 | 0.0000 | 1.0000 | 0.0000 | 4.2760 | 43.65 | 0 | 2.7960 | 2.914 | -2.1340 | 0.0049 | 3.282e-02 | 1.711e-12 | 7.000e-09 | 8.934e-01 | 5.314e-07 | 4.559 | 7.298e-01 | 1.601e-01 | 1.633e-03 | 7.997e-06 | ||
LPAL13_000014000 | LPAL13_000014000 | hypothetical protein | protein coding | LPAL13_SCAF000119 | 655 | 942 | + | forward | Not Assigned | 288.0 | 287 | 4.336 | 0 | 4.322 | 0e+00 | 4.000 | 0.0071 | 2.3130 | -1.0970 | 1.833 | 1.9220 | 3.410 | 7.178 | 0e+00 | 0.0000 | 129.60 | 0.5067 | 8.557 | 0 | 16.737 | 4.065 | 12.133 | 203.23 | 145.90 | 1.498e+04 | 13.093 | 0.0000 | 1.0000 | 0.0000 | 2.2670 | 54.00 | 0 | 1.0460 | 3.522 | -0.8407 | 0.0008 | 8.896e-03 | 1.192e-14 | 1.052e-10 | 9.791e-01 | 1.809e-05 | 4.374 | 6.912e-01 | 1.580e-01 | 2.644e-04 | 2.098e-07 | ||
LPAL13_220019500 | LPAL13_220019500 | hypothetical protein | protein coding | LpaL13_22 | 578260 | 578538 | + | forward | 22 | 279.0 | 278 | 3.869 | 0 | 3.866 | 0e+00 | 3.186 | 0.0322 | 3.3350 | 0.3472 | 2.695 | 0.7936 | 2.988 | 7.415 | 0e+00 | 0.0000 | 287.50 | 0.4908 | 7.882 | 0 | 13.625 | 3.768 | 32.794 | 446.95 | 322.70 | 8.664e+04 | 12.589 | 0.0000 | 1.0000 | 0.0000 | 3.4060 | 45.98 | 0 | 2.3050 | 2.810 | -2.3530 | 0.0065 | 3.956e-02 | 1.711e-12 | 2.772e-09 | 9.791e-01 | 9.724e-07 | 3.704 | 9.768e-02 | 2.637e-02 | 2.181e-03 | 1.427e-05 | ||
LPAL13_040019400 | LPAL13_040019400 | hypothetical protein | protein coding | LpaL13_04 | 440768 | 441127 | - | reverse | 4 | 360.0 | 359 | 5.679 | 0 | 5.727 | 0e+00 | 3.639 | 0.0059 | -0.4572 | -3.3380 | 1.891 | 1.1727 | 2.881 | 7.087 | 0e+00 | 0.0000 | 28.83 | 0.8258 | 6.877 | 0 | 43.769 | 5.452 | 0.845 | 37.41 | 26.44 | 2.057e+03 | 8.868 | 0.0000 | 0.0000 | 0.0000 | 0.1553 | 36.77 | 0 | -1.8060 | 3.603 | -1.3140 | 0.0006 | 7.378e-03 | 1.373e-09 | 1.607e-07 | 0.000e+00 | 6.646e-06 | 5.066 | 6.063e-02 | 1.197e-02 | 2.041e-04 | 1.250e-07 | ||
LPAL13_080009800 | LPAL13_080009800 | tuzin, putative | protein coding | LpaL13_08 | 184254 | 185207 | + | forward | 8 | 954.0 | 953 | 3.312 | 0 | 3.314 | 0e+00 | 3.191 | 0.0000 | 7.1310 | 4.2880 | 1.763 | 0.4548 | 2.843 | 8.952 | 0e+00 | 0.0000 | 3419.00 | 0.3957 | 8.370 | 0 | 8.908 | 3.155 | 647.196 | 5765.41 | 4229.95 | 1.806e+07 | 9.131 | 0.0001 | 0.9999 | 0.0001 | 6.9600 | 56.60 | 0 | 6.1730 | 5.792 | 6.8160 | 0.0000 | 2.024e-05 | 5.422e-14 | 2.857e-11 | 1.000e+00 | 3.138e-08 | 3.587 | 6.434e-01 | 1.794e-01 | 7.470e-08 | 1.674e-14 | ||
LPAL13_170012200 | LPAL13_170012200 | hypothetical protein | protein coding | LpaL13_17 | 289959 | 290222 | - | reverse | 17 | 264.0 | 263 | 3.862 | 0 | 3.861 | 0e+00 | 3.345 | 0.0084 | 3.2290 | 0.4204 | 3.867 | 0.2272 | 2.809 | 7.088 | 0e+00 | 0.0000 | 285.70 | 0.5427 | 7.116 | 0 | 14.361 | 3.844 | 32.795 | 471.10 | 339.61 | 1.078e+05 | 13.286 | 0.0000 | 1.0000 | 0.0000 | 3.3940 | 37.77 | 0 | 2.0090 | 3.452 | -0.7961 | 0.0010 | 1.050e-02 | 3.311e-10 | 1.088e-07 | 9.791e-01 | 2.258e-06 | 3.897 | 4.859e-01 | 1.247e-01 | 3.286e-04 | 3.239e-07 | ||
LPAL13_000011700 | LPAL13_000011700 | hypothetical protein | protein coding | LPAL13_SCAF000076 | 101 | 364 | - | reverse | Not Assigned | 264.0 | 263 | 6.200 | 0 | 7.639 | 0e+00 | 3.206 | 0.0185 | -1.4720 | -4.1700 | 6.800 | 0.1233 | 2.698 | 5.362 | 0e+00 | 0.0002 | 14.87 | 1.1620 | 5.337 | 0 | 2690.500 | 11.394 | 0.000 | 26.89 | 18.83 | 5.831e+02 | 7.965 | 1.0000 | 0.0000 | 1.0000 | -0.5642 | 26.82 | 0 | -2.8600 | 3.083 | -2.3420 | 0.0030 | 2.262e-02 | 5.009e-06 | 9.045e-06 | 0.000e+00 | 1.999e-04 | 5.654 | 9.394e-01 | 1.661e-01 | 1.003e-03 | 3.018e-06 | ||
LPAL13_130006100 | LPAL13_130006100 | hypothetical protein | protein coding | LpaL13_13 | 26114 | 26335 | - | reverse | 13 | 222.0 | 221 | 3.094 | 0 | 3.095 | 0e+00 | 2.608 | 0.0059 | 3.4420 | 0.9663 | 1.020 | 0.2916 | 2.476 | 10.050 | 0e+00 | 0.0000 | 267.70 | 0.3652 | 8.472 | 0 | 7.926 | 2.986 | 50.143 | 397.49 | 293.28 | 5.813e+04 | 7.591 | 0.0000 | 1.0000 | 0.0000 | 3.3090 | 59.37 | 0 | 2.6680 | 3.601 | -0.3859 | 0.0006 | 7.393e-03 | 1.719e-14 | 9.352e-12 | 9.791e-01 | 5.005e-09 | 2.932 | 0.000e+00 | 0.000e+00 | 2.055e-04 | 1.266e-07 | ||
LPAL13_170014500 | LPAL13_170014500 | hypothetical protein, conserved | protein coding | LpaL13_17 | 361708 | 362040 | + | forward | 17 | 333.0 | 332 | 5.267 | 0 | 5.133 | 1e-04 | 3.102 | 0.0171 | -0.8103 | -3.2750 | 6.452 | 1.5010 | 2.465 | 4.134 | 2e-04 | 0.0020 | 22.51 | 1.0160 | 5.184 | 0 | 42.994 | 5.426 | 1.034 | 44.87 | 31.72 | 1.873e+03 | 10.109 | 0.0000 | 0.0000 | 0.0000 | -0.2222 | 20.80 | 0 | -2.3740 | 3.121 | -2.1820 | 0.0027 | 2.110e-02 | 8.269e-06 | 1.362e-04 | 0.000e+00 | 1.681e-03 | 4.355 | 1.129e-01 | 2.591e-02 | 9.014e-04 | 2.423e-06 | ||
LPAL13_190012600 | LPAL13_190012600 | hypothetical protein, conserved | protein coding | LpaL13_19 | 254293 | 254577 | - | reverse | 19 | 285.0 | 284 | 3.451 | 0 | 3.446 | 0e+00 | 2.745 | 0.0056 | 1.9270 | -0.3928 | 1.639 | 0.3257 | 2.319 | 7.924 | 0e+00 | 0.0000 | 94.05 | 0.4065 | 8.491 | 0 | 9.125 | 3.190 | 16.947 | 154.72 | 113.39 | 8.550e+03 | 7.723 | 0.0000 | 1.0000 | 0.0000 | 1.8280 | 53.36 | 0 | 1.0340 | 3.622 | -0.4766 | 0.0006 | 6.875e-03 | 1.594e-14 | 1.132e-10 | 9.791e-01 | 2.568e-07 | 3.199 | 6.800e-04 | 2.126e-04 | 1.917e-04 | 1.103e-07 | ||
LPAL13_240030500 | LPAL13_240030500 | hypothetical protein | protein coding | LpaL13_24 | 828719 | 828913 | + | forward | 24 | 195.0 | 194 | 2.310 | 0 | 2.312 | 0e+00 | 2.762 | 0.0066 | 3.2430 | 0.9338 | 1.225 | 3.9317 | 2.309 | 3.789 | 2e-03 | 0.0112 | 249.20 | 0.4205 | 5.493 | 0 | 4.360 | 2.124 | 81.147 | 353.80 | 272.00 | 4.014e+04 | 4.263 | 0.0022 | 0.9978 | 0.0022 | 3.1870 | 28.15 | 0 | 2.4700 | 3.552 | -0.5192 | 0.0007 | 8.277e-03 | 1.991e-06 | 5.296e-06 | 8.933e-01 | 1.131e-02 | 2.621 | 5.723e-01 | 2.183e-01 | 2.399e-04 | 1.725e-07 | ||
LPAL13_000025200 | LPAL13_000025200 | hypothetical protein | protein coding | LPAL13_SCAF000287 | 104 | 373 | + | forward | Not Assigned | 270.0 | 269 | 3.105 | 0 | 3.104 | 0e+00 | 2.745 | 0.0174 | 3.6680 | 1.3670 | 2.562 | 0.4839 | 2.301 | 6.339 | 0e+00 | 0.0000 | 305.20 | 0.5342 | 5.812 | 0 | 7.158 | 2.840 | 76.274 | 546.02 | 405.10 | 1.255e+05 | 7.032 | 0.0000 | 1.0000 | 0.0000 | 3.4860 | 27.26 | 0 | 2.3770 | 3.111 | -1.6810 | 0.0028 | 2.184e-02 | 5.316e-07 | 8.896e-06 | 9.682e-01 | 8.102e-06 | 3.016 | 1.756e-02 | 5.822e-03 | 9.247e-04 | 2.565e-06 | ||
LPAL13_210015300 | LPAL13_210015300 | core histone h2a/h2b/h3/h4, putative | protein coding | LpaL13_21 | 324301 | 324699 | + | forward | 21 | 399.0 | 398 | 2.588 | 0 | 2.592 | 0e+00 | 2.476 | 0.0000 | 8.6590 | 6.4030 | 1.842 | 0.5195 | 2.256 | 6.831 | 0e+00 | 0.0000 | 11650.00 | 0.3960 | 6.536 | 0 | 5.802 | 2.537 | 2899.069 | 16821.37 | 12644.68 | 1.212e+08 | 5.942 | 0.0056 | 0.9944 | 0.0056 | 8.7310 | 37.01 | 0 | 7.9430 | 6.000 | 7.5030 | 0.0000 | 1.406e-05 | 7.878e-09 | 1.176e-07 | 1.000e+00 | 3.408e-06 | 2.546 | 4.177e-02 | 1.641e-02 | 3.333e-08 | 3.210e-15 | ||
LPAL13_080010600 | LPAL13_080010600 | hypothetical protein, conserved | protein coding | LpaL13_08 | 195555 | 195749 | - | reverse | 8 | 195.0 | 194 | 6.002 | 0 | 7.324 | 0e+00 | 2.837 | 0.0176 | -1.9210 | -4.1700 | 4.935 | 0.1233 | 2.249 | 5.207 | 0e+00 | 0.0003 | 11.70 | 1.0980 | 5.466 | 0 | 2013.648 | 10.976 | 0.000 | 20.13 | 14.09 | 5.608e+02 | 6.299 | 1.0000 | 0.0000 | 1.0000 | -0.9047 | 27.91 | 0 | -3.0720 | 3.108 | -2.3430 | 0.0028 | 2.206e-02 | 2.256e-06 | 5.812e-06 | 0.000e+00 | 2.196e-04 | 5.091 | 1.949e+00 | 3.828e-01 | 9.341e-04 | 2.617e-06 | ||
LPAL13_000018000 | LPAL13_000018000 | hypothetical protein | protein coding | LPAL13_SCAF000166 | 2 | 412 | + | forward | Not Assigned | 411.0 | 410 | 2.730 | 0 | 2.731 | 0e+00 | 2.705 | 0.0004 | 6.6890 | 4.4530 | 2.050 | 0.1292 | 2.237 | 7.718 | 0e+00 | 0.0000 | 2496.00 | 0.4776 | 5.716 | 0 | 6.003 | 2.586 | 687.316 | 4125.65 | 3094.15 | 6.072e+06 | 6.106 | 0.0000 | 1.0000 | 0.0000 | 6.5080 | 28.55 | 0 | 5.5810 | 4.693 | 2.8090 | 0.0000 | 3.392e-04 | 6.009e-07 | 4.114e-06 | 1.000e+00 | 6.248e-07 | 2.696 | 7.249e-02 | 2.688e-02 | 4.857e-06 | 6.930e-11 |
knitr::kable(head(sus_table[["significant"]][["deseq"]][["downs"]][["sensitive_vs_resistant"]], n = 20))
gid | annotgeneproduct | annotgenetype | chromosome | start | end | strand | annotgeneentrezid | annotgenename | annotstrand | annotchromosome | annotcdslength | length | deseq_logfc | deseq_adjp | edger_logfc | edger_adjp | limma_logfc | limma_adjp | basic_nummed | basic_denmed | basic_numvar | basic_denvar | basic_logfc | basic_t | basic_p | basic_adjp | deseq_basemean | deseq_lfcse | deseq_stat | deseq_p | ebseq_fc | ebseq_logfc | ebseq_c1mean | ebseq_c2mean | ebseq_mean | ebseq_var | ebseq_postfc | ebseq_ppee | ebseq_ppde | ebseq_adjp | edger_logcpm | edger_lr | edger_p | limma_ave | limma_t | limma_b | limma_p | limma_adjp_ihw | deseq_adjp_ihw | edger_adjp_ihw | ebseq_adjp_ihw | basic_adjp_ihw | lfc_meta | lfc_var | lfc_varbymed | p_meta | p_var | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LPAL13_000033300 | LPAL13_000033300 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000463 | 551 | 811 | + | forward | Not Assigned | 261.0 | 260 | -4.464 | 0.0026 | -4.408 | 0.0030 | -7.262 | 0.0000 | -3.3430 | 3.4550 | 12.7819 | 0.0608 | -6.797 | -10.000 | 0e+00 | 0e+00 | 134.700 | 1.2270 | -3.637 | 0.0003 | 0.1605 | -2.6393 | 339.45 | 54.476 | 139.969 | 2.623e+04 | 0.1688 | 0.0000 | 0.0000 | 0.0000 | 2.2950 | 12.970 | 0.0003 | -0.9429 | -8.222 | 13.700 | 0e+00 | 4.930e-08 | 3.156e-03 | 3.526e-03 | 0.000e+00 | 3.138e-08 | -5.378 | 0.000e+00 | 0.000e+00 | 1.971e-04 | 2.955e-08 | ||
LPAL13_000012000 | LPAL13_000012000 | hypothetical protein | protein coding | LPAL13_SCAF000080 | 710 | 1159 | - | reverse | Not Assigned | 450.0 | 449 | -2.495 | 0.0010 | -2.491 | 0.0003 | -4.411 | 0.0000 | 0.1042 | 3.9370 | 8.7982 | 0.1681 | -3.832 | -6.689 | 0e+00 | 0e+00 | 212.100 | 0.6323 | -3.946 | 0.0001 | 0.2657 | -1.9123 | 490.41 | 130.281 | 238.320 | 6.077e+04 | 0.2767 | 0.0243 | 0.9757 | 0.0243 | 2.9530 | 18.170 | 0.0000 | 1.3280 | -5.852 | 6.927 | 0e+00 | 2.056e-05 | 1.155e-03 | 4.073e-04 | 9.680e-01 | 8.497e-06 | -3.046 | 6.800e-02 | -2.232e-02 | 3.334e-05 | 1.707e-09 | ||
LPAL13_310035500 | LPAL13_310035500 | hypothetical protein | protein coding | LpaL13_31 | 1198439 | 1198957 | - | reverse | 31 | 519.0 | 518 | -2.525 | 0.0033 | -2.426 | 0.0013 | -3.477 | 0.0000 | -4.1400 | -0.4281 | 4.1920 | 0.4519 | -3.712 | -8.576 | 0e+00 | 0e+00 | 7.035 | 0.7117 | -3.548 | 0.0004 | 0.2972 | -1.7503 | 19.35 | 5.744 | 9.826 | 3.556e+02 | 0.3460 | 0.0000 | 0.0000 | 0.0000 | -1.9660 | 14.900 | 0.0001 | -3.1990 | -7.723 | 8.857 | 0e+00 | 1.220e-07 | 3.383e-03 | 1.342e-03 | 0.000e+00 | 5.704e-08 | -2.836 | 1.131e-02 | -3.987e-03 | 1.671e-04 | 3.986e-08 | ||
LPAL13_000038400 | LPAL13_000038400 | expression-site associated gene (esag3), putative | protein coding | LPAL13_SCAF000573 | 101 | 1360 | + | forward | Not Assigned | 1260.0 | 1259 | -2.815 | 0.0000 | -2.813 | 0.0000 | -3.784 | 0.0000 | 4.6300 | 8.2100 | 3.5430 | 0.0361 | -3.581 | -9.948 | 0e+00 | 0e+00 | 3720.000 | 0.5328 | -5.283 | 0.0000 | 0.1947 | -2.3605 | 9563.26 | 1862.157 | 4172.487 | 1.879e+07 | 0.1996 | 0.0000 | 0.0000 | 0.0000 | 7.0800 | 31.120 | 0.0000 | 5.7700 | -6.294 | 8.671 | 0e+00 | 4.873e-06 | 6.859e-06 | 1.468e-06 | 0.000e+00 | 4.396e-08 | -3.241 | 3.350e-02 | -1.034e-02 | 6.075e-08 | 3.322e-15 | ||
LPAL13_000012100 | LPAL13_000012100 | hypothetical protein | protein coding | LPAL13_SCAF000080 | 1637 | 1894 | - | reverse | Not Assigned | 258.0 | 257 | -2.077 | 0.0107 | -2.069 | 0.0076 | -3.736 | 0.0000 | -2.2070 | 1.1620 | 6.5920 | 0.6332 | -3.370 | -6.277 | 0e+00 | 0e+00 | 31.370 | 0.6642 | -3.128 | 0.0018 | 0.3004 | -1.7351 | 69.77 | 20.952 | 35.598 | 2.066e+03 | 0.3283 | 0.0073 | 0.9927 | 0.0073 | 0.2324 | 10.720 | 0.0011 | -1.3790 | -5.597 | 4.952 | 0e+00 | 4.490e-05 | 1.086e-02 | 9.088e-03 | 9.780e-01 | 1.036e-05 | -2.487 | 1.387e-01 | -5.575e-02 | 9.408e-04 | 7.864e-07 | ||
LPAL13_000038500 | LPAL13_000038500 | hypothetical protein | protein coding | LPAL13_SCAF000575 | 39 | 251 | + | forward | Not Assigned | 213.0 | 212 | -2.142 | 0.0030 | -2.130 | 0.0032 | -3.662 | 0.0000 | -1.9850 | 1.3650 | 4.9749 | 0.6608 | -3.350 | -6.945 | 0e+00 | 0e+00 | 32.660 | 0.5980 | -3.582 | 0.0003 | 0.2950 | -1.7611 | 81.03 | 23.898 | 41.037 | 2.717e+03 | 0.3194 | 0.0219 | 0.9781 | 0.0219 | 0.2414 | 12.840 | 0.0003 | -1.2750 | -6.274 | 7.073 | 0e+00 | 6.329e-06 | 3.021e-03 | 3.685e-03 | 9.680e-01 | 2.258e-06 | -2.540 | 4.506e-02 | -1.774e-02 | 2.268e-04 | 3.858e-08 | ||
LPAL13_350063000 | LPAL13_350063000 | hypothetical protein | protein coding | LpaL13_35 | 1964328 | 1964543 | - | reverse | 35 | 216.0 | 215 | -2.757 | 0.0000 | -2.737 | 0.0000 | -3.704 | 0.0000 | -2.1430 | 1.1750 | 2.1229 | 0.2166 | -3.318 | -10.830 | 0e+00 | 0e+00 | 21.340 | 0.4698 | -5.868 | 0.0000 | 0.1753 | -2.5125 | 58.91 | 10.316 | 24.895 | 7.197e+02 | 0.1934 | 0.0000 | 1.0000 | 0.0000 | -0.3625 | 34.370 | 0.0000 | -1.4530 | -8.082 | 12.190 | 0e+00 | 4.930e-08 | 3.292e-07 | 3.841e-07 | 9.791e-01 | 1.500e-09 | -3.037 | 1.963e-02 | -6.464e-03 | 2.994e-09 | 6.631e-18 | ||
LPAL13_310031300 | LPAL13_310031300 | hypothetical protein, conserved | protein coding | LpaL13_31 | 1084772 | 1085059 | - | reverse | 31 | 288.0 | 287 | -2.255 | 0.0014 | -2.248 | 0.0008 | -3.859 | 0.0000 | -0.9701 | 2.1590 | 4.1051 | 0.7645 | -3.129 | -6.824 | 0e+00 | 0e+00 | 65.480 | 0.5902 | -3.821 | 0.0001 | 0.2715 | -1.8808 | 150.60 | 40.886 | 73.800 | 7.795e+03 | 0.2884 | 0.1534 | 0.8466 | 0.1534 | 1.2550 | 16.240 | 0.0001 | -0.2054 | -6.412 | 8.198 | 0e+00 | 4.634e-06 | 1.439e-03 | 9.162e-04 | 8.673e-01 | 2.258e-06 | -2.799 | 7.637e-02 | -2.729e-02 | 6.295e-05 | 4.451e-09 | ||
LPAL13_340039600 | LPAL13_340039600 | hypothetical protein | protein coding | LpaL13_34 | 1247554 | 1247757 | - | reverse | 34 | 204.0 | 203 | -2.336 | 0.0001 | -2.334 | 0.0000 | -3.414 | 0.0000 | 1.1870 | 4.2690 | 4.1252 | 0.1033 | -3.081 | -7.803 | 0e+00 | 0e+00 | 232.400 | 0.4936 | -4.733 | 0.0000 | 0.2185 | -2.1941 | 605.18 | 132.244 | 274.125 | 6.529e+04 | 0.2234 | 0.0000 | 1.0000 | 0.0000 | 3.0740 | 26.010 | 0.0000 | 1.9780 | -5.646 | 6.302 | 0e+00 | 3.888e-05 | 5.505e-05 | 1.474e-05 | 8.934e-01 | 7.620e-07 | -2.690 | 1.959e-03 | -7.283e-04 | 9.841e-07 | 1.141e-12 | ||
LPAL13_310031000 | LPAL13_310031000 | hypothetical protein, conserved | protein coding | LpaL13_31 | 1075172 | 1075459 | - | reverse | 31 | 288.0 | 287 | -2.217 | 0.0000 | -2.201 | 0.0000 | -3.375 | 0.0000 | -1.7270 | 0.9896 | 3.2962 | 0.5154 | -2.717 | -6.778 | 0e+00 | 0e+00 | 27.090 | 0.4444 | -4.989 | 0.0000 | 0.3096 | -1.6916 | 59.78 | 18.499 | 30.883 | 1.138e+03 | 0.3299 | 0.3062 | 0.6938 | 0.3062 | 0.0538 | 26.160 | 0.0000 | -1.1310 | -7.852 | 11.790 | 0e+00 | 8.114e-08 | 2.381e-05 | 1.185e-05 | 7.316e-01 | 2.436e-06 | -2.598 | 0.000e+00 | 0.000e+00 | 3.075e-07 | 9.255e-14 | ||
LPAL13_050005000 | LPAL13_050005000 | hypothetical protein | protein coding | LpaL13_05 | 3394 | 3612 | - | reverse | 5 | 219.0 | 218 | -2.022 | 0.0038 | -2.019 | 0.0018 | -2.980 | 0.0000 | -0.0244 | 2.6770 | 3.6342 | 0.1568 | -2.701 | -7.147 | 0e+00 | 0e+00 | 92.660 | 0.5776 | -3.500 | 0.0005 | 0.2979 | -1.7473 | 195.27 | 58.154 | 99.289 | 7.853e+03 | 0.3064 | 0.0000 | 1.0000 | 0.0000 | 1.7490 | 14.270 | 0.0002 | 0.4891 | -5.487 | 5.509 | 0e+00 | 6.053e-05 | 3.865e-03 | 2.037e-03 | 8.934e-01 | 2.298e-06 | -2.259 | 1.872e-02 | -8.287e-03 | 2.080e-04 | 5.564e-08 | ||
LPAL13_140019300 | LPAL13_140019300 | bt1 family, putative | protein coding | LpaL13_14 | 530784 | 531350 | + | forward | 14 | 567.0 | 566 | -2.653 | 0.0000 | -2.651 | 0.0000 | -2.671 | 0.0000 | 4.7250 | 7.1000 | 0.6657 | 1.0271 | -2.375 | -7.180 | 0e+00 | 0e+00 | 1940.000 | 0.3605 | -7.359 | 0.0000 | 0.1910 | -2.3882 | 5126.37 | 979.260 | 2223.392 | 6.182e+06 | 0.1961 | 0.0000 | 1.0000 | 0.0000 | 6.1420 | 64.150 | 0.0000 | 5.4100 | -7.759 | 14.430 | 0e+00 | 8.114e-08 | 4.900e-11 | 2.142e-12 | 9.560e-01 | 4.518e-05 | -2.777 | 1.910e-01 | -6.879e-02 | 2.735e-11 | 2.228e-21 | ||
LPAL13_310039200 | LPAL13_310039200 | hypothetical protein | protein coding | LpaL13_31 | 1301745 | 1301972 | - | reverse | 31 | 228.0 | 227 | -2.348 | 0.0000 | -2.345 | 0.0000 | -2.711 | 0.0000 | 1.4340 | 3.7450 | 1.6362 | 0.2106 | -2.311 | -8.385 | 0e+00 | 0e+00 | 198.300 | 0.3819 | -6.150 | 0.0000 | 0.3053 | -1.7118 | 426.88 | 130.311 | 219.282 | 4.004e+04 | 0.3156 | 0.1979 | 0.8021 | 0.1979 | 2.8580 | 43.820 | 0.0000 | 2.0540 | -6.751 | 10.360 | 0e+00 | 1.849e-06 | 8.256e-08 | 7.702e-09 | 7.442e-01 | 8.324e-08 | -2.621 | 8.058e-02 | -3.074e-02 | 1.905e-09 | 6.879e-18 | ||
LPAL13_050016500 | LPAL13_050016500 | unspecified product | snoRNA encoding | LpaL13_05 | undefined | undefined | + | forward | 5 | 0.0 | undefined | -1.102 | 0.2008 | -1.091 | 0.2390 | -2.500 | 0.0021 | -1.4520 | 0.6502 | 3.8690 | 0.9718 | -2.103 | -4.491 | 1e-04 | 9e-04 | 30.300 | 0.6526 | -1.689 | 0.0913 | 0.7102 | -0.4937 | 47.76 | 33.919 | 38.073 | 4.052e+03 | 0.7470 | 0.7265 | 0.2735 | 0.7265 | 0.1482 | 2.528 | 0.1118 | -1.3560 | -4.029 | 0.496 | 2e-04 | 2.601e-03 | 2.133e-01 | 2.737e-01 | 2.860e-01 | 7.600e-04 | -1.524 | 3.228e-01 | -2.118e-01 | 6.775e-02 | 3.532e-03 | ||
LPAL13_340039700 | LPAL13_340039700 | snare domain containing protein, putative | protein coding | LpaL13_34 | 1248192 | 1248947 | - | reverse | 34 | 756.0 | 755 | -1.890 | 0.0000 | -1.889 | 0.0000 | -2.149 | 0.0000 | 4.6280 | 6.7120 | 0.7919 | 0.0938 | -2.084 | -10.970 | 0e+00 | 0e+00 | 1426.000 | 0.3010 | -6.278 | 0.0000 | 0.2803 | -1.8347 | 3280.43 | 919.659 | 1627.891 | 1.661e+06 | 0.2841 | 0.0000 | 1.0000 | 0.0000 | 5.6970 | 47.030 | 0.0000 | 5.2740 | -7.285 | 12.540 | 0e+00 | 2.533e-07 | 5.661e-08 | 2.434e-09 | 9.698e-01 | 1.500e-09 | -1.969 | 6.690e-03 | -3.397e-03 | 3.045e-10 | 7.844e-20 | ||
LPAL13_140019100 | LPAL13_140019100 | bt1 family, putative | protein coding | LpaL13_14 | 525164 | 525514 | + | forward | 14 | 351.0 | 350 | -2.085 | 0.0000 | -2.084 | 0.0000 | -2.270 | 0.0000 | 3.9610 | 6.0230 | 0.5051 | 0.5074 | -2.062 | -8.394 | 0e+00 | 0e+00 | 900.900 | 0.2929 | -7.119 | 0.0000 | 0.2498 | -2.0009 | 2157.85 | 539.110 | 1024.734 | 8.522e+05 | 0.2536 | 0.0000 | 1.0000 | 0.0000 | 5.0350 | 60.970 | 0.0000 | 4.5830 | -8.750 | 18.360 | 0e+00 | 1.675e-08 | 2.969e-10 | 8.398e-12 | 8.934e-01 | 2.258e-06 | -2.201 | 2.558e-02 | -1.162e-02 | 8.466e-13 | 5.626e-25 | ||
LPAL13_350073400 | LPAL13_350073400 | hypothetical protein | protein coding | LpaL13_35 | 2342701 | 2342883 | + | forward | 35 | 183.0 | 182 | -1.403 | 0.0040 | -1.396 | 0.0061 | -2.137 | 0.0002 | -0.2233 | 1.7810 | 1.2535 | 0.8080 | -2.004 | -5.986 | 0e+00 | 1e-04 | 50.190 | 0.4025 | -3.486 | 0.0005 | 0.3119 | -1.6806 | 120.30 | 37.519 | 62.352 | 7.255e+03 | 0.3353 | 0.0002 | 0.9998 | 0.0002 | 0.8256 | 11.230 | 0.0008 | 0.0804 | -5.000 | 3.719 | 0e+00 | 2.185e-04 | 3.963e-03 | 6.171e-03 | 9.791e-01 | 7.644e-05 | -1.602 | 1.952e-02 | -1.219e-02 | 4.335e-04 | 1.629e-07 | ||
LPAL13_170015400 | LPAL13_170015400 | hypothetical protein, conserved | protein coding | LpaL13_17 | 395975 | 396307 | + | forward | 17 | 333.0 | 332 | -1.739 | 0.0000 | -1.736 | 0.0000 | -2.077 | 0.0000 | 1.3560 | 3.2620 | 1.0759 | 0.1243 | -1.906 | -8.631 | 0e+00 | 0e+00 | 150.600 | 0.2986 | -5.825 | 0.0000 | 0.3481 | -1.5226 | 296.39 | 103.157 | 161.127 | 1.504e+04 | 0.3526 | 0.0000 | 1.0000 | 0.0000 | 2.4540 | 35.860 | 0.0000 | 1.9690 | -6.586 | 9.689 | 0e+00 | 2.759e-06 | 4.103e-07 | 2.448e-07 | 9.791e-01 | 4.953e-08 | -1.803 | 1.522e-02 | -8.438e-03 | 5.796e-09 | 1.373e-17 | ||
LPAL13_180013900 | LPAL13_180013900 | hypothetical protein | protein coding | LpaL13_18 | 351792 | 352085 | + | forward | 18 | 294.0 | 293 | -1.347 | 0.0038 | -1.342 | 0.0035 | -2.097 | 0.0001 | -0.2658 | 1.5920 | 1.0315 | 0.0777 | -1.858 | -8.927 | 0e+00 | 0e+00 | 46.230 | 0.3853 | -3.497 | 0.0005 | 0.4249 | -1.2348 | 78.10 | 33.181 | 46.657 | 8.576e+02 | 0.4295 | 0.0028 | 0.9972 | 0.0028 | 0.7518 | 12.620 | 0.0004 | 0.1474 | -5.270 | 4.540 | 0e+00 | 1.103e-04 | 3.900e-03 | 4.025e-03 | 9.791e-01 | 4.837e-08 | -1.537 | 1.419e-02 | -9.230e-03 | 2.844e-04 | 6.196e-08 | ||
LPAL13_350013200 | LPAL13_350013200 | hypothetical protein, conserved | protein coding | LpaL13_35 | 223837 | 224070 | + | forward | 35 | 234.0 | 233 | -1.320 | 0.0092 | -1.308 | 0.0146 | -2.058 | 0.0002 | -2.0020 | -0.1622 | 1.2185 | 0.6795 | -1.840 | -5.814 | 0e+00 | 1e-04 | 11.430 | 0.4142 | -3.186 | 0.0014 | 0.3789 | -1.4002 | 27.01 | 10.228 | 15.263 | 3.236e+02 | 0.4038 | 0.7229 | 0.2771 | 0.7229 | -1.1140 | 9.184 | 0.0024 | -1.9950 | -4.983 | 2.676 | 0e+00 | 2.315e-04 | 9.806e-03 | 1.698e-02 | 3.226e-01 | 8.757e-05 | -1.488 | 1.433e-02 | -9.631e-03 | 1.296e-03 | 1.501e-06 |
sus_ma <- sus_table[["plots"]][["sensitive_vs_resistant"]][["deseq_ma_plots"]][["plot"]]
pp(file = "images/sus_ma.png", image = sus_ma)
## Warning in pp(file = "images/sus_ma.png", image = sus_ma): There is no device to
## shut down.
## test <- ggplt(sus_ma)
Now let us look for ontology categories which are increased in the 2.3 samples followed by the 2.2 samples.
## Gene categories more represented in the 2.3 group.
zy_go_up <- sm(simple_goseq(sig_genes = zy_table[["significant"]][["deseq"]][["ups"]][[1]],
go_db = lp_go, length_db = lp_lengths))
## Gene categories more represented in the 2.2 group.
zy_go_down <- sm(simple_goseq(sig_genes = zy_table[["significant"]][["deseq"]][["downs"]][[1]],
go_db = lp_go, length_db = lp_lengths))
In the function ‘combined_de_tables()’ above, one of the tasks performed is to look at the agreement among DESeq2, limma, and edgeR. The following show a couple of these for the set of genes observed with a fold-change >= |2| and adjusted p-value <= 0.05.
zy_table[["venns"]][[1]][["p_lfc1"]][["up_noweight"]]
zy_table[["venns"]][[1]][["p_lfc1"]][["down_noweight"]]
zy_go_up$pvalue_plots$bpp_plot_over
zy_go_down$pvalue_plots$bpp_plot_over
Remind myself, the data structures are (zy|sus)_(de|table|sig).
zy_df <- zy_table[["data"]][["z23_vs_z22"]]
sus_df <- sus_table[["data"]][["sensitive_vs_resistant"]]
both_df <- merge(zy_df, sus_df, by = "row.names")
plot_df <- both_df[, c("deseq_logfc.x", "deseq_logfc.y")]
rownames(plot_df) <- both_df[["Row.names"]]
colnames(plot_df) <- c("z23_vs_z22", "sensitive_vs_resistant")
compare <- plot_linear_scatter(plot_df)
## Warning in plot_multihistogram(df): NAs introduced by coercion
pp(file = "images/compare_sus_zy.png", image = compare$scatter)
## Warning in pp(file = "images/compare_sus_zy.png", image = compare$scatter):
## There is no device to shut down.
compare$cor
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = -244, df = 8549, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.9378 -0.9325
## sample estimates:
## cor
## -0.9352
Najib read me an email listing off the gene names associated with the zymodeme classification. I took those names and cross referenced them against the Leishmania panamensis gene annotations and found the following:
They are:
Given these 6 gene IDs (NH has two gene IDs associated with it), I can do some looking for specific differences among the various samples.
The following creates a colorspace (red to green) heatmap showing the observed expression of these genes in every sample.
my_genes <- c("LPAL13_120010900", "LPAL13_340013000", "LPAL13_000054100",
"LPAL13_140006100", "LPAL13_180018500", "LPAL13_320022300",
"other")
my_names <- c("ALAT", "ASAT", "G6PD", "NHv1", "NHv2", "MPI", "other")
zymo_expt <- exclude_genes_expt(zy_norm, ids = my_genes, method = "keep")
## Before removal, there were 8551 genes, now there are 6.
## There are 59 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20065 TMRC20005 TMRC20066 TMRC20039 TMRC20037 TMRC20038 TMRC20067
## 0.1313 0.1250 0.1321 0.1061 0.1301 0.1102 0.1131 0.1165
## TMRC20068 TMRC20041 TMRC20015 TMRC20009 TMRC20010 TMRC20016 TMRC20011 TMRC20012
## 0.1157 0.1182 0.1150 0.1137 0.1101 0.1062 0.1104 0.1209
## TMRC20013 TMRC20017 TMRC20014 TMRC20018 TMRC20019 TMRC20070 TMRC20020 TMRC20021
## 0.1208 0.1066 0.1092 0.1147 0.1225 0.1128 0.1103 0.1063
## TMRC20022 TMRC20024 TMRC20036 TMRC20069 TMRC20033 TMRC20026 TMRC20031 TMRC20076
## 0.1307 0.1126 0.1203 0.1164 0.1128 0.1386 0.1005 0.1203
## TMRC20073 TMRC20055 TMRC20079 TMRC20071 TMRC20078 TMRC20042 TMRC20058 TMRC20072
## 0.1228 0.1347 0.1269 0.1234 0.1341 0.1315 0.1182 0.1431
## TMRC20059 TMRC20048 TMRC20060 TMRC20077 TMRC20074 TMRC20063 TMRC20053 TMRC20052
## 0.1104 0.1033 0.1087 0.1221 0.1209 0.1168 0.1183 0.1106
## TMRC20064 TMRC20075 TMRC20051 TMRC20050 TMRC20049 TMRC20062 TMRC20080 TMRC20043
## 0.1140 0.1113 0.1283 0.1154 0.1394 0.1286 0.1155 0.1138
## TMRC20054 TMRC20046 TMRC20044
## 0.1278 0.1368 0.1338
zymo_heatmap <- plot_sample_heatmap(zymo_expt, row_label = my_names)
zymo_heatmap
In contrast, the following plots take the set of genes which are shared among all differential expression methods (|lfc| >= 1.0 and adjp <= 0.05) and use them to make categories of genes which are increased in 2.3 or 2.2.
shared_zymo <- intersect_significant(zy_table)
## Deleting the file excel/intersect_significant.xlsx before writing the tables.
up_shared <- shared_zymo[["ups"]][[1]][["data"]][["all"]]
rownames(up_shared)
## [1] "LPAL13_000033300" "LPAL13_000012000" "LPAL13_000012100" "LPAL13_000038400"
## [5] "LPAL13_000038500" "LPAL13_310031300" "LPAL13_340039600" "LPAL13_050005000"
## [9] "LPAL13_310035500" "LPAL13_310031000" "LPAL13_310039200" "LPAL13_350063000"
## [13] "LPAL13_140019300" "LPAL13_210015500" "LPAL13_340039700" "LPAL13_180013900"
## [17] "LPAL13_170015400" "LPAL13_250006300" "LPAL13_350073400" "LPAL13_290026200"
## [21] "LPAL13_350013200" "LPAL13_140019100" "LPAL13_000025600" "LPAL13_330024000"
## [25] "LPAL13_050016500" "LPAL13_230016400" "LPAL13_190021800" "LPAL13_320038700"
## [29] "LPAL13_240009700" "LPAL13_350012400" "LPAL13_330021800" "LPAL13_140019200"
## [33] "LPAL13_270034100" "LPAL13_210005000" "LPAL13_160014500" "LPAL13_000029000"
## [37] "LPAL13_350073200" "LPAL13_250025700" "LPAL13_230011200" "LPAL13_330021900"
## [41] "LPAL13_000052700" "LPAL13_230021300" "LPAL13_050009600" "LPAL13_160014100"
## [45] "LPAL13_230011500" "LPAL13_310028500" "LPAL13_310032500" "LPAL13_040007800"
## [49] "LPAL13_160014200" "LPAL13_230011400" "LPAL13_020006700" "LPAL13_000045100"
## [53] "LPAL13_230011300"
upshared_expt <- exclude_genes_expt(zy_norm, ids = rownames(up_shared), method = "keep")
## Before removal, there were 8551 genes, now there are 53.
## There are 59 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20065 TMRC20005 TMRC20066 TMRC20039 TMRC20037 TMRC20038 TMRC20067
## 0.3932 0.4641 0.1453 0.4758 0.2091 0.5464 0.6423 0.3691
## TMRC20068 TMRC20041 TMRC20015 TMRC20009 TMRC20010 TMRC20016 TMRC20011 TMRC20012
## 0.4202 0.1913 0.4875 0.1747 0.4566 0.3736 0.1888 0.1534
## TMRC20013 TMRC20017 TMRC20014 TMRC20018 TMRC20019 TMRC20070 TMRC20020 TMRC20021
## 0.4250 0.2302 0.1952 0.4045 0.1674 0.4577 0.1705 0.4480
## TMRC20022 TMRC20024 TMRC20036 TMRC20069 TMRC20033 TMRC20026 TMRC20031 TMRC20076
## 0.1558 0.1969 0.2370 0.2122 0.2186 0.1409 0.1309 0.2037
## TMRC20073 TMRC20055 TMRC20079 TMRC20071 TMRC20078 TMRC20042 TMRC20058 TMRC20072
## 0.5922 0.2213 0.5851 0.5850 0.2494 0.1975 0.7807 0.2338
## TMRC20059 TMRC20048 TMRC20060 TMRC20077 TMRC20074 TMRC20063 TMRC20053 TMRC20052
## 0.3495 0.4065 0.1451 0.1461 0.2658 0.1908 0.2287 0.5417
## TMRC20064 TMRC20075 TMRC20051 TMRC20050 TMRC20049 TMRC20062 TMRC20080 TMRC20043
## 0.5423 0.3757 0.7146 0.2298 0.2052 0.7305 0.5693 0.5051
## TMRC20054 TMRC20046 TMRC20044
## 0.6412 0.1828 0.2029
We can plot a quick heatmap to get a sense of the differences observed between the genes which are different between the two zymodemes.
high_23_heatmap <- plot_sample_heatmap(upshared_expt, row_label = rownames(up_shared))
high_23_heatmap
down_shared <- shared_zymo[["downs"]][[1]][["data"]][["all"]]
downshared_expt <- exclude_genes_expt(zy_norm, ids = rownames(down_shared), method = "keep")
## Before removal, there were 8551 genes, now there are 68.
## There are 59 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20065 TMRC20005 TMRC20066 TMRC20039 TMRC20037 TMRC20038 TMRC20067
## 0.2208 0.1912 0.6726 0.2278 0.6597 0.2115 0.2020 0.2399
## TMRC20068 TMRC20041 TMRC20015 TMRC20009 TMRC20010 TMRC20016 TMRC20011 TMRC20012
## 0.2035 0.6927 0.1899 0.6407 0.1712 0.2111 0.5737 0.5573
## TMRC20013 TMRC20017 TMRC20014 TMRC20018 TMRC20019 TMRC20070 TMRC20020 TMRC20021
## 0.1683 0.6603 0.6543 0.1620 0.6572 0.1870 0.6903 0.1571
## TMRC20022 TMRC20024 TMRC20036 TMRC20069 TMRC20033 TMRC20026 TMRC20031 TMRC20076
## 0.6812 0.7222 0.6814 0.6993 0.7260 0.7025 0.6176 0.6103
## TMRC20073 TMRC20055 TMRC20079 TMRC20071 TMRC20078 TMRC20042 TMRC20058 TMRC20072
## 0.1959 0.7163 0.1906 0.1730 0.5351 0.5511 0.2224 0.5397
## TMRC20059 TMRC20048 TMRC20060 TMRC20077 TMRC20074 TMRC20063 TMRC20053 TMRC20052
## 0.1432 0.1563 0.7657 0.5766 0.6658 0.6365 0.5752 0.1801
## TMRC20064 TMRC20075 TMRC20051 TMRC20050 TMRC20049 TMRC20062 TMRC20080 TMRC20043
## 0.1963 0.1821 0.1913 0.6169 0.6990 0.1894 0.1555 0.1740
## TMRC20054 TMRC20046 TMRC20044
## 0.2037 0.6355 0.6241
high_22_heatmap <- plot_sample_heatmap(downshared_expt, row_label = rownames(down_shared))
high_22_heatmap
Now I will combine our previous samples and our new samples in the hopes of finding variant positions which help elucidate currently unknown aspects of either group via their clustering to known samples from the other group. In other words, we do not know the zymodeme annotations for the old samples nor the strain identities (or the shortcut ‘chronic vs. self-healing’) for the new samples. I hope to make educated guesses given the variant profiles. There are some differences in how the previous and current data sets were analyzed (though I have since redone the old samples so it should be trivial to remove those differences now).
I added our 2016 data to a specific TMRC2 sample sheet, dated 20191203. Thus I will load the data here. That previous data was mapped using tophat, so I will also need to make some changes to the gene names to accomodate the two mappings.
old_expt <- create_expt("sample_sheets/tmrc2_samples_20191203.xlsx",
file_column = "tophat2file")
## Reading the sample metadata.
## Dropped 13 rows from the sample metadata because they were blank.
## The sample definitions comprises: 50 rows(samples) and 38 columns(metadata fields).
## Warning in create_expt("sample_sheets/tmrc2_samples_20191203.xlsx", file_column
## = "tophat2file"): Some samples were removed when cross referencing the samples
## against the count data.
## Matched 8841 annotations and counts.
## Bringing together the count matrix and gene information.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 8841 rows and 33 columns.
tt <- lp_expt[["expressionset"]]
rownames(tt) <- gsub(pattern = "^exon_", replacement = "", x = rownames(tt))
rownames(tt) <- gsub(pattern = "\\.E1$", replacement = "", x = rownames(tt))
lp_expt$expressionset <- tt
tt <- old_expt$expressionset
rownames(tt) <- gsub(pattern = "^exon_", replacement = "", x = rownames(tt))
rownames(tt) <- gsub(pattern = "\\.1$", replacement = "", x = rownames(tt))
old_expt$expressionset <- tt
rm(tt)
One other important caveat, we have a group of new samples which have not yet run through the variant search pipeline, so I need to remove them from consideration. Though it looks like they finished overnight…
## The next line drops the samples which are missing the SNP pipeline.
lp_snp <- subset_expt(lp_expt, subset="!is.na(pData(lp_expt)[['bcftable']])")
## subset_expt(): There were 71, now there are 67 samples.
new_snps <- sm(count_expt_snps(lp_snp, annot_column = "bcftable"))
old_snps <- sm(count_expt_snps(old_expt, annot_column = "bcftable", snp_column = 2))
nonzero_snps <- exprs(new_snps) != 0
colSums(nonzero_snps)
## tmrc20001 tmrc20065 tmrc20005 tmrc20007 tmrc20008 tmrc20027 tmrc20028 tmrc20032
## 31443 73441 1965 2133 2786 291022 290524 116072
## tmrc20040 tmrc20066 tmrc20039 tmrc20037 tmrc20038 tmrc20067 tmrc20068 tmrc20041
## 34648 69275 4048 79861 80805 75518 74157 34972
## tmrc20015 tmrc20009 tmrc20010 tmrc20016 tmrc20011 tmrc20012 tmrc20013 tmrc20017
## 95041 7410 87192 95605 5297 10 89292 6689
## tmrc20014 tmrc20018 tmrc20019 tmrc20070 tmrc20020 tmrc20021 tmrc20022 tmrc20025
## 6440 82235 3021 78538 3209 88148 2608 279253
## tmrc20024 tmrc20036 tmrc20069 tmrc20033 tmrc20026 tmrc20031 tmrc20073 tmrc20055
## 4981 32060 3304 4443 2851 3150 78772 2819
## tmrc20079 tmrc20071 tmrc20078 tmrc20042 tmrc20058 tmrc20072 tmrc20059 tmrc20048
## 78013 74977 3287 2490 78849 31425 77883 76892
## tmrc20057 tmrc20056 tmrc20060 tmrc20077 tmrc20063 tmrc20053 tmrc20052 tmrc20064
## 32003 2977 2770 3091 1592 2836 77467 75335
## tmrc20051 tmrc20050 tmrc20062 tmrc20080 tmrc20043 tmrc20054 tmrc20046 tmrc20047
## 75845 3399 75667 81666 77526 77155 31433 75963
## tmrc20044 tmrc20045 tmrc20061
## 3132 30107 96058
both_snps <- combine_expts(new_snps, old_snps)
both_norm <- sm(normalize_expt(both_snps, transform = "log2", convert = "cpm", filter = TRUE))
## strains <- both_norm[["design"]][["strain"]]
both_strain <- set_expt_conditions(both_norm, fact = "strain")
The data structure ‘both_norm’ now contains our 2016 data along with the newer data collected since 2019.
The following plot shows the SNP profiles of all samples (old and new) where the colors at the top show either the 2.2 strains (orange), 2.3 strains (green), the previous samples (purple), or the various lab strains (pink etc).
old_new_variant_heatmap <- plot_disheat(both_norm)
pp(file = "images/raw_snp_disheat.png", image = old_new_variant_heatmap,
height = 12, width = 12)
## Warning in pp(file = "images/raw_snp_disheat.png", image =
## old_new_variant_heatmap, : There is no device to shut down.
The function get_snp_sets() takes the provided metadata factor (in this case ‘condition’) and looks for variants which are exclusive to each element in it. In this case, this is looking for differences between 2.2 and 2.3, as well as the set shared among them.
snp_sets <- get_snp_sets(both_snps, factor = "condition")
## The factor z2.3 has 27 rows.
## The factor z2.2 has 28 rows.
## The factor unknown has 7 rows.
## The factor z2.1 has 3 rows.
## The factor z2.4 has only 1 row.
## The factor null has only 1 row.
## The factor sh has 13 rows.
## The factor chr has 14 rows.
## The factor inf has 6 rows.
## Iterating over 727 elements.
both_expt <- combine_expts(lp_expt, old_expt)
## Error in combine(exp1, exp2): objects have different annotations: org.Lpanamensis.MHOMCOL81L13.v46.eg.db, org.Hs.eg.db
snp_genes <- sm(snps_vs_genes(both_expt, snp_sets, expt_name_col = "chromosome"))
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'fData': object 'both_expt' not found
## I think we have some metrics here we can plot...
snp_subset <- sm(snp_subset_genes(
both_expt, both_snps,
genes = c("LPAL13_120010900", "LPAL13_340013000", "LPAL13_000054100",
"LPAL13_140006100", "LPAL13_180018500", "LPAL13_320022300")))
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'fData': object 'both_expt' not found
zymo_heat <- plot_sample_heatmap(snp_subset, row_label = rownames(exprs(snp_subset)))
## Error in plot_sample_heatmap(snp_subset, row_label = rownames(exprs(snp_subset))): object 'snp_subset' not found
zymo_heat
## Error in eval(expr, envir, enclos): object 'zymo_heat' not found
Didn’t I create a set of densities by chromosome? Oh I think they come in from get_snp_sets()
clinical_sets <- get_snp_sets(new_snps, factor = "clinicalresponse")
## The factor cure has 26 rows.
## The factor failure has 21 rows.
## The factor laboratory line has only 1 row.
## The factor laboratory line miltefosine resistant has only 1 row.
## The factor nd has 14 rows.
## The factor reference strain has 4 rows.
## Iterating over 695 elements.
density_vec <- clinical_sets[["density"]]
chromosome_idx <- grep(pattern = "LpaL", x = names(density_vec))
density_df <- as.data.frame(density_vec[chromosome_idx])
density_df[["chr"]] <- rownames(density_df)
colnames(density_df) <- c("density_vec", "chr")
ggplot(density_df, aes_string(x = "chr", y = "density_vec")) +
ggplot2::geom_col() +
ggplot2::theme(axis.text = ggplot2::element_text(size = 10, colour = "black"),
axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5))
## clinical_written <- write_variants(new_snps)
clinical_genes <- sm(snps_vs_genes(lp_expt, clinical_sets, expt_name_col = "chromosome"))
snp_density <- merge(as.data.frame(clinical_genes[["summary_by_gene"]]),
as.data.frame(fData(lp_expt)),
by = "row.names")
snp_density <- snp_density[, c(1, 2, 4, 15)]
colnames(snp_density) <- c("name", "snps", "product", "length")
snp_density[["product"]] <- tolower(snp_density[["product"]])
snp_density[["length"]] <- as.numeric(snp_density[["length"]])
snp_density[["density"]] <- snp_density[["snps"]] / snp_density[["length"]]
snp_idx <- order(snp_density[["density"]], decreasing = TRUE)
snp_density <- snp_density[snp_idx, ]
removers <- c("amastin", "gp63", "leishmanolysin")
for (r in removers) {
drop_idx <- grepl(pattern = r, x = snp_density[["product"]])
snp_density <- snp_density[!drop_idx, ]
}
## Filter these for [A|a]mastin gp63 Leishmanolysin
clinical_snps <- snps_intersections(lp_expt, clinical_sets, chr_column = "chromosome")
fail_ref_snps <- as.data.frame(clinical_snps[["inters"]][["failure, reference strain"]])
cure_snps <- as.data.frame(clinical_snps[["inters"]][["cure"]])
head(fail_ref_snps)
## seqnames start end width strand
## chr_LpaL13-10_pos_327353_ref_T_alt_C LpaL13-10 327353 327354 2 +
## chr_LpaL13-13_pos_167047_ref_G_alt_C LpaL13-13 167047 167048 2 +
## chr_LpaL13-15_pos_42885_ref_A_alt_G LpaL13-15 42885 42886 2 +
## chr_LpaL13-20.1_pos_111781_ref_T_alt_C LpaL13-20.1 111781 111782 2 +
## chr_LpaL13-20.1_pos_85158_ref_C_alt_G LpaL13-20.1 85158 85159 2 +
## chr_LpaL13-20.2_pos_48545_ref_T_alt_C LpaL13-20.2 48545 48546 2 +
head(cure_snps)
## seqnames start end width strand
## chr_LpaL13-08_pos_184791_ref_T_alt_A LpaL13-08 184791 184792 2 +
## chr_LpaL13-20.1_pos_369935_ref_C_alt_T LpaL13-20.1 369935 369936 2 +
## chr_LpaL13-20.1_pos_370282_ref_C_alt_T LpaL13-20.1 370282 370283 2 +
## chr_LpaL13-20.1_pos_371356_ref_T_alt_C LpaL13-20.1 371356 371357 2 +
## chr_LpaL13-20.1_pos_380785_ref_A_alt_G LpaL13-20.1 380785 380786 2 +
## chr_LpaL13-20.1_pos_382801_ref_A_alt_C LpaL13-20.1 382801 382802 2 +
annot <- fData(lp_expt)
clinical_interest <- as.data.frame(clinical_snps[["gene_summaries"]][["cure"]])
clinical_interest <- merge(clinical_interest,
as.data.frame(clinical_snps[["gene_summaries"]][["failure, reference strain"]]),
by = "row.names")
rownames(clinical_interest) <- clinical_interest[["Row.names"]]
clinical_interest[["Row.names"]] <- NULL
colnames(clinical_interest) <- c("cure_snps","fail_snps")
annot <- merge(annot, clinical_interest, by = "row.names")
rownames(annot) <- annot[["Row.names"]]
annot[["Row.names"]] <- NULL
fData(lp_expt$expressionset) <- annot
The heatmap produced here should show the variants only for the zymodeme genes.
I am thinking that if we find clusters of locations which are variant, that might provide some PCR testing possibilities.
## Drop the 2.1, 2.4, unknown, and null
pruned_snps <- subset_expt(new_snps, subset="condition=='z2.2'|condition=='z2.3'")
## subset_expt(): There were 67, now there are 55 samples.
new_sets <- get_snp_sets(pruned_snps, factor = "zymodemecategorical")
## The factor z22 has 28 rows.
## The factor z23 has 27 rows.
## Iterating over 695 elements.
summary(new_sets)
## Length Class Mode
## medians 3 data.frame list
## possibilities 2 -none- character
## intersections 3 -none- list
## chr_data 695 -none- list
## set_names 4 -none- list
## invert_names 4 -none- list
## density 695 -none- numeric
## 1000000: 2.2
## 0100000: 2.3
summary(new_sets[["intersections"]][["10"]])
## Length Class Mode
## 890 character character
summary(new_sets[["intersections"]][["01"]])
## Length Class Mode
## 76189 character character
Thus we see that there are 511 variants associated with 2.2 and 49,790 associated with 2.3.
The following function uses the positional data to look for sequential mismatches associated with zymodeme in the hopes that there will be some regions which would provide good potential targets for a PCR-based assay.
sequential_variants <- function(snp_sets, conditions = NULL, minimum = 3, maximum_separation = 3) {
if (is.null(conditions)) {
conditions <- 1
}
intersection_sets <- snp_sets[["intersections"]]
intersection_names <- snp_sets[["set_names"]]
chosen_intersection <- 1
if (is.numeric(conditions)) {
chosen_intersection <- conditions
} else {
intersection_idx <- intersection_names == conditions
chosen_intersection <- names(intersection_names)[intersection_idx]
}
possible_positions <- intersection_sets[[chosen_intersection]]
position_table <- data.frame(row.names = possible_positions)
pat <- "^chr_(.+)_pos_(.+)_ref_.*$"
position_table[["chr"]] <- gsub(pattern = pat, replacement = "\\1", x = rownames(position_table))
position_table[["pos"]] <- as.numeric(gsub(pattern = pat, replacement = "\\2", x = rownames(position_table)))
position_idx <- order(position_table[, "chr"], position_table[, "pos"])
position_table <- position_table[position_idx, ]
position_table[["dist"]] <- 0
last_chr <- ""
for (r in 1:nrow(position_table)) {
this_chr <- position_table[r, "chr"]
if (r == 1) {
position_table[r, "dist"] <- position_table[r, "pos"]
last_chr <- this_chr
next
}
if (this_chr == last_chr) {
position_table[r, "dist"] <- position_table[r, "pos"] - position_table[r - 1, "pos"]
} else {
position_table[r, "dist"] <- position_table[r, "pos"]
}
last_chr <- this_chr
}
## Working interactively here.
doubles <- position_table[["dist"]] == 1
doubles <- position_table[doubles, ]
write.csv(doubles, "doubles.csv")
one_away <- position_table[["dist"]] == 2
one_away <- position_table[one_away, ]
write.csv(one_away, "one_away.csv")
two_away <- position_table[["dist"]] == 3
two_away <- position_table[two_away, ]
write.csv(two_away, "two_away.csv")
combined <- rbind(doubles, one_away)
combined <- rbind(combined, two_away)
position_idx <- order(combined[, "chr"], combined[, "pos"])
combined <- combined[position_idx, ]
this_chr <- ""
for (r in 1:nrow(combined)) {
this_chr <- combined[r, "chr"]
if (r == 1) {
combined[r, "dist_pair"] <- combined[r, "pos"]
last_chr <- this_chr
next
}
if (this_chr == last_chr) {
combined[r, "dist_pair"] <- combined[r, "pos"] - combined[r - 1, "pos"]
} else {
combined[r, "dist_pair"] <- combined[r, "pos"]
}
last_chr <- this_chr
}
dist_pair_maximum <- 1000
dist_pair_minimum <- 200
dist_pair_idx <- combined[["dist_pair"]] <= dist_pair_maximum &
combined[["dist_pair"]] >= dist_pair_minimum
remaining <- combined[dist_pair_idx, ]
no_weak_idx <- grepl(pattern="ref_(G|C)", x=rownames(remaining))
remaining <- remaining[no_weak_idx, ]
print(head(table(position_table[["dist"]])))
sequentials <- position_table[["dist"]] <= maximum_separation
message("There are ", sum(sequentials), " candidate regions.")
## The following can tell me how many runs of each length occurred, that is not quite what I want.
## Now use run length encoding to find the set of sequential sequentials!
rle_result <- rle(sequentials)
rle_values <- rle_result[["values"]]
## The following line is equivalent to just leaving values alone:
## true_values <- rle_result[["values"]] == TRUE
rle_lengths <- rle_result[["lengths"]]
true_sequentials <- rle_lengths[rle_values]
rle_idx <- cumsum(rle_lengths)[which(rle_values)]
position_table[["last_sequential"]] <- 0
count <- 0
for (r in rle_idx) {
count <- count + 1
position_table[r, "last_sequential"] <- true_sequentials[count]
}
message("The maximum sequential set is: ", max(position_table[["last_sequential"]]), ".")
wanted_idx <- position_table[["last_sequential"]] >= minimum
wanted <- position_table[wanted_idx, c("chr", "pos")]
return(wanted)
}
zymo22_sequentials <- sequential_variants(new_sets, conditions = "z22", minimum=1, maximum_separation=2)
dim(zymo22_sequentials)
## 7 candidate regions for zymodeme 2.2 -- thus I am betting that the reference strain is a 2.2
zymo23_sequentials <- sequential_variants(new_sets, conditions = "z23",
minimum = 2, maximum_separation = 2)
dim(zymo23_sequentials)
## In contrast, there are lots (587) of interesting regions for 2.3!
The first 4 candidate regions from my set of remaining: * Chr Pos. Distance * LpaL13-15 238433 448 * LpaL13-18 142844 613 * LpaL13-29 830342 252 * LpaL13-33 1331507 843
Lets define a couple of terms: * Third: Each of the 4 above positions. * Second: Third - Distance * End: Third + PrimerLen * Start: Second - Primerlen
In each instance, these are the last positions, so we want to grab three things:
## * LpaL13-15 238433 448
first_candidate_chr <- genome[["LpaL13_15"]]
primer_length <- 22
amplicon_length <- 448
first_candidate_third <- 238433
first_candidate_second <- first_candidate_third - amplicon_length
first_candidate_start <- first_candidate_second - primer_length
first_candidate_end <- first_candidate_third + primer_length
first_candidate_region <- subseq(first_candidate_chr, first_candidate_start, first_candidate_end)
first_candidate_region
first_candidate_5p <- subseq(first_candidate_chr, first_candidate_start, first_candidate_second)
as.character(first_candidate_5p)
first_candidate_3p <- spgs::reverseComplement(subseq(first_candidate_chr, first_candidate_third, first_candidate_end))
first_candidate_3p
## * LpaL13-18 142844 613
second_candidate_chr <- genome[["LpaL13_18"]]
primer_length <- 22
amplicon_length <- 613
second_candidate_third <- 142844
second_candidate_second <- second_candidate_third - amplicon_length
second_candidate_start <- second_candidate_second - primer_length
second_candidate_end <- second_candidate_third + primer_length
second_candidate_region <- subseq(second_candidate_chr, second_candidate_start, second_candidate_end)
second_candidate_region
second_candidate_5p <- subseq(second_candidate_chr, second_candidate_start, second_candidate_second)
as.character(second_candidate_5p)
second_candidate_3p <- spgs::reverseComplement(subseq(second_candidate_chr, second_candidate_third, second_candidate_end))
second_candidate_3p
## * LpaL13-29 830342 252
third_candidate_chr <- genome[["LpaL13_29"]]
primer_length <- 22
amplicon_length <- 252
third_candidate_third <- 830342
third_candidate_second <- third_candidate_third - amplicon_length
third_candidate_start <- third_candidate_second - primer_length
third_candidate_end <- third_candidate_third + primer_length
third_candidate_region <- subseq(third_candidate_chr, third_candidate_start, third_candidate_end)
third_candidate_region
third_candidate_5p <- subseq(third_candidate_chr, third_candidate_start, third_candidate_second)
as.character(third_candidate_5p)
third_candidate_3p <- spgs::reverseComplement(subseq(third_candidate_chr, third_candidate_third, third_candidate_end))
third_candidate_3p
## You are a garbage polypyrimidine tract.
## Which is actually interesting if the mutations mess it up.
## * LpaL13-33 1331507 843
fourth_candidate_chr <- genome[["LpaL13_33"]]
primer_length <- 22
amplicon_length <- 843
fourth_candidate_third <- 1331507
fourth_candidate_second <- fourth_candidate_third - amplicon_length
fourth_candidate_start <- fourth_candidate_second - primer_length
fourth_candidate_end <- fourth_candidate_third + primer_length
fourth_candidate_region <- subseq(fourth_candidate_chr, fourth_candidate_start, fourth_candidate_end)
fourth_candidate_region
fourth_candidate_5p <- subseq(fourth_candidate_chr, fourth_candidate_start, fourth_candidate_second)
as.character(fourth_candidate_5p)
fourth_candidate_3p <- spgs::reverseComplement(subseq(fourth_candidate_chr, fourth_candidate_third, fourth_candidate_end))
fourth_candidate_3p
I made a fun little function which should find regions which have lots of variants associated with a given experimental factor.
pheno <- subset_expt(lp_expt, subset = "condition=='z2.2'|condition=='z2.3'")
## subset_expt(): There were 71, now there are 59 samples.
pheno <- subset_expt(pheno, subset = "!is.na(pData(pheno)[['bcftable']])")
## subset_expt(): There were 59, now there are 55 samples.
pheno_snps <- sm(count_expt_snps(pheno, annot_column = "bcftable"))
fun_stuff <- snp_density_primers(pheno_snps,
bsgenome = "BSGenome.Leishmania.panamensis.MHOMCOL81L13.v53",
gff = "reference/TriTrypDB-53_LpanamensisMHOMCOL81L13.gff")
## Started with 154019, after cutoff there are 76569 rows left.
## Starting chromosome: LpaL13_01.
## Starting chromosome: LpaL13_02.
## Starting chromosome: LpaL13_03.
## Starting chromosome: LpaL13_04.
## Starting chromosome: LpaL13_05.
## Starting chromosome: LpaL13_06.
## Starting chromosome: LpaL13_07.
## Starting chromosome: LpaL13_08.
## Starting chromosome: LpaL13_09.
## Starting chromosome: LpaL13_10.
## Starting chromosome: LpaL13_11.
## Starting chromosome: LpaL13_12.
## Starting chromosome: LpaL13_13.
## Starting chromosome: LpaL13_14.
## Starting chromosome: LpaL13_15.
## Starting chromosome: LpaL13_16.
## Starting chromosome: LpaL13_17.
## Starting chromosome: LpaL13_18.
## Starting chromosome: LpaL13_19.
## Starting chromosome: LpaL13_20.1.
## Starting chromosome: LpaL13_20.2.
## Starting chromosome: LpaL13_21.
## Starting chromosome: LpaL13_22.
## Starting chromosome: LpaL13_23.
## Starting chromosome: LpaL13_24.
## Starting chromosome: LpaL13_25.
## Starting chromosome: LpaL13_26.
## Starting chromosome: LpaL13_27.
## Starting chromosome: LpaL13_28.
## Starting chromosome: LpaL13_29.
## Starting chromosome: LpaL13_30.
## Starting chromosome: LpaL13_31.
## Starting chromosome: LpaL13_32.
## Starting chromosome: LpaL13_33.
## Starting chromosome: LpaL13_34.
## Starting chromosome: LpaL13_35.
## Starting chromosome: LPAL13_SCAF000001.
## Starting chromosome: LPAL13_SCAF000002.
## Starting chromosome: LPAL13_SCAF000003.
## Starting chromosome: LPAL13_SCAF000005.
## Starting chromosome: LPAL13_SCAF000009.
## Starting chromosome: LPAL13_SCAF000010.
## Starting chromosome: LPAL13_SCAF000011.
## Starting chromosome: LPAL13_SCAF000014.
## Starting chromosome: LPAL13_SCAF000015.
## Starting chromosome: LPAL13_SCAF000017.
## Starting chromosome: LPAL13_SCAF000018.
## Starting chromosome: LPAL13_SCAF000019.
## Starting chromosome: LPAL13_SCAF000021.
## Starting chromosome: LPAL13_SCAF000022.
## Starting chromosome: LPAL13_SCAF000026.
## Starting chromosome: LPAL13_SCAF000027.
## Starting chromosome: LPAL13_SCAF000029.
## Starting chromosome: LPAL13_SCAF000032.
## Starting chromosome: LPAL13_SCAF000033.
## Starting chromosome: LPAL13_SCAF000035.
## Starting chromosome: LPAL13_SCAF000036.
## Starting chromosome: LPAL13_SCAF000037.
## Starting chromosome: LPAL13_SCAF000039.
## Starting chromosome: LPAL13_SCAF000041.
## Starting chromosome: LPAL13_SCAF000042.
## Starting chromosome: LPAL13_SCAF000043.
## Starting chromosome: LPAL13_SCAF000044.
## Starting chromosome: LPAL13_SCAF000047.
## Starting chromosome: LPAL13_SCAF000048.
## Starting chromosome: LPAL13_SCAF000049.
## Starting chromosome: LPAL13_SCAF000050.
## Starting chromosome: LPAL13_SCAF000051.
## Starting chromosome: LPAL13_SCAF000052.
## Starting chromosome: LPAL13_SCAF000053.
## Starting chromosome: LPAL13_SCAF000054.
## Starting chromosome: LPAL13_SCAF000056.
## Starting chromosome: LPAL13_SCAF000058.
## Starting chromosome: LPAL13_SCAF000062.
## Starting chromosome: LPAL13_SCAF000063.
## Starting chromosome: LPAL13_SCAF000064.
## Starting chromosome: LPAL13_SCAF000065.
## Starting chromosome: LPAL13_SCAF000066.
## Starting chromosome: LPAL13_SCAF000067.
## Starting chromosome: LPAL13_SCAF000070.
## Starting chromosome: LPAL13_SCAF000071.
## Starting chromosome: LPAL13_SCAF000073.
## Starting chromosome: LPAL13_SCAF000075.
## Starting chromosome: LPAL13_SCAF000076.
## Starting chromosome: LPAL13_SCAF000078.
## Starting chromosome: LPAL13_SCAF000080.
## Starting chromosome: LPAL13_SCAF000084.
## Starting chromosome: LPAL13_SCAF000085.
## Starting chromosome: LPAL13_SCAF000086.
## Starting chromosome: LPAL13_SCAF000088.
## Starting chromosome: LPAL13_SCAF000089.
## Starting chromosome: LPAL13_SCAF000090.
## Starting chromosome: LPAL13_SCAF000092.
## Starting chromosome: LPAL13_SCAF000095.
## Starting chromosome: LPAL13_SCAF000097.
## Starting chromosome: LPAL13_SCAF000101.
## Starting chromosome: LPAL13_SCAF000103.
## Starting chromosome: LPAL13_SCAF000109.
## Starting chromosome: LPAL13_SCAF000110.
## Starting chromosome: LPAL13_SCAF000111.
## Starting chromosome: LPAL13_SCAF000112.
## Starting chromosome: LPAL13_SCAF000113.
## Starting chromosome: LPAL13_SCAF000114.
## Starting chromosome: LPAL13_SCAF000115.
## Starting chromosome: LPAL13_SCAF000117.
## Starting chromosome: LPAL13_SCAF000118.
## Starting chromosome: LPAL13_SCAF000119.
## Starting chromosome: LPAL13_SCAF000122.
## Starting chromosome: LPAL13_SCAF000124.
## Starting chromosome: LPAL13_SCAF000125.
## Starting chromosome: LPAL13_SCAF000137.
## Starting chromosome: LPAL13_SCAF000138.
## Starting chromosome: LPAL13_SCAF000139.
## Starting chromosome: LPAL13_SCAF000140.
## Starting chromosome: LPAL13_SCAF000142.
## Starting chromosome: LPAL13_SCAF000144.
## Starting chromosome: LPAL13_SCAF000145.
## Starting chromosome: LPAL13_SCAF000146.
## Starting chromosome: LPAL13_SCAF000147.
## Starting chromosome: LPAL13_SCAF000148.
## Starting chromosome: LPAL13_SCAF000150.
## Starting chromosome: LPAL13_SCAF000151.
## Starting chromosome: LPAL13_SCAF000152.
## Starting chromosome: LPAL13_SCAF000154.
## Starting chromosome: LPAL13_SCAF000155.
## Starting chromosome: LPAL13_SCAF000156.
## Starting chromosome: LPAL13_SCAF000158.
## Starting chromosome: LPAL13_SCAF000159.
## Starting chromosome: LPAL13_SCAF000160.
## Starting chromosome: LPAL13_SCAF000163.
## Starting chromosome: LPAL13_SCAF000164.
## Starting chromosome: LPAL13_SCAF000165.
## Starting chromosome: LPAL13_SCAF000166.
## Starting chromosome: LPAL13_SCAF000168.
## Starting chromosome: LPAL13_SCAF000169.
## Starting chromosome: LPAL13_SCAF000171.
## Starting chromosome: LPAL13_SCAF000172.
## Starting chromosome: LPAL13_SCAF000174.
## Starting chromosome: LPAL13_SCAF000175.
## Starting chromosome: LPAL13_SCAF000176.
## Starting chromosome: LPAL13_SCAF000177.
## Starting chromosome: LPAL13_SCAF000180.
## Starting chromosome: LPAL13_SCAF000184.
## Starting chromosome: LPAL13_SCAF000185.
## Starting chromosome: LPAL13_SCAF000188.
## Starting chromosome: LPAL13_SCAF000190.
## Starting chromosome: LPAL13_SCAF000191.
## Starting chromosome: LPAL13_SCAF000192.
## Starting chromosome: LPAL13_SCAF000194.
## Starting chromosome: LPAL13_SCAF000195.
## Starting chromosome: LPAL13_SCAF000196.
## Starting chromosome: LPAL13_SCAF000197.
## Starting chromosome: LPAL13_SCAF000198.
## Starting chromosome: LPAL13_SCAF000199.
## Starting chromosome: LPAL13_SCAF000200.
## Starting chromosome: LPAL13_SCAF000201.
## Starting chromosome: LPAL13_SCAF000202.
## Starting chromosome: LPAL13_SCAF000203.
## Starting chromosome: LPAL13_SCAF000204.
## Starting chromosome: LPAL13_SCAF000206.
## Starting chromosome: LPAL13_SCAF000209.
## Starting chromosome: LPAL13_SCAF000211.
## Starting chromosome: LPAL13_SCAF000212.
## Starting chromosome: LPAL13_SCAF000219.
## Starting chromosome: LPAL13_SCAF000221.
## Starting chromosome: LPAL13_SCAF000223.
## Starting chromosome: LPAL13_SCAF000226.
## Starting chromosome: LPAL13_SCAF000227.
## Starting chromosome: LPAL13_SCAF000229.
## Starting chromosome: LPAL13_SCAF000233.
## Starting chromosome: LPAL13_SCAF000235.
## Starting chromosome: LPAL13_SCAF000237.
## Starting chromosome: LPAL13_SCAF000240.
## Starting chromosome: LPAL13_SCAF000241.
## Starting chromosome: LPAL13_SCAF000242.
## Starting chromosome: LPAL13_SCAF000244.
## Starting chromosome: LPAL13_SCAF000245.
## Starting chromosome: LPAL13_SCAF000246.
## Starting chromosome: LPAL13_SCAF000247.
## Starting chromosome: LPAL13_SCAF000248.
## Starting chromosome: LPAL13_SCAF000249.
## Starting chromosome: LPAL13_SCAF000250.
## Starting chromosome: LPAL13_SCAF000251.
## Starting chromosome: LPAL13_SCAF000252.
## Starting chromosome: LPAL13_SCAF000253.
## Starting chromosome: LPAL13_SCAF000254.
## Starting chromosome: LPAL13_SCAF000255.
## Starting chromosome: LPAL13_SCAF000257.
## Starting chromosome: LPAL13_SCAF000258.
## Starting chromosome: LPAL13_SCAF000260.
## Starting chromosome: LPAL13_SCAF000261.
## Starting chromosome: LPAL13_SCAF000262.
## Starting chromosome: LPAL13_SCAF000263.
## Starting chromosome: LPAL13_SCAF000264.
## Starting chromosome: LPAL13_SCAF000265.
## Starting chromosome: LPAL13_SCAF000266.
## Starting chromosome: LPAL13_SCAF000267.
## Starting chromosome: LPAL13_SCAF000269.
## Starting chromosome: LPAL13_SCAF000270.
## Starting chromosome: LPAL13_SCAF000271.
## Starting chromosome: LPAL13_SCAF000274.
## Starting chromosome: LPAL13_SCAF000275.
## Starting chromosome: LPAL13_SCAF000276.
## Starting chromosome: LPAL13_SCAF000277.
## Starting chromosome: LPAL13_SCAF000279.
## Starting chromosome: LPAL13_SCAF000280.
## Starting chromosome: LPAL13_SCAF000282.
## Starting chromosome: LPAL13_SCAF000283.
## Starting chromosome: LPAL13_SCAF000284.
## Starting chromosome: LPAL13_SCAF000287.
## Starting chromosome: LPAL13_SCAF000288.
## Starting chromosome: LPAL13_SCAF000289.
## Starting chromosome: LPAL13_SCAF000290.
## Starting chromosome: LPAL13_SCAF000292.
## Starting chromosome: LPAL13_SCAF000293.
## Starting chromosome: LPAL13_SCAF000294.
## Starting chromosome: LPAL13_SCAF000295.
## Starting chromosome: LPAL13_SCAF000298.
## Starting chromosome: LPAL13_SCAF000299.
## Starting chromosome: LPAL13_SCAF000302.
## Starting chromosome: LPAL13_SCAF000303.
## Starting chromosome: LPAL13_SCAF000306.
## Starting chromosome: LPAL13_SCAF000307.
## Starting chromosome: LPAL13_SCAF000308.
## Starting chromosome: LPAL13_SCAF000310.
## Starting chromosome: LPAL13_SCAF000312.
## Starting chromosome: LPAL13_SCAF000315.
## Starting chromosome: LPAL13_SCAF000316.
## Starting chromosome: LPAL13_SCAF000319.
## Starting chromosome: LPAL13_SCAF000325.
## Starting chromosome: LPAL13_SCAF000327.
## Starting chromosome: LPAL13_SCAF000331.
## Starting chromosome: LPAL13_SCAF000332.
## Starting chromosome: LPAL13_SCAF000334.
## Starting chromosome: LPAL13_SCAF000343.
## Starting chromosome: LPAL13_SCAF000344.
## Starting chromosome: LPAL13_SCAF000347.
## Starting chromosome: LPAL13_SCAF000348.
## Starting chromosome: LPAL13_SCAF000349.
## Starting chromosome: LPAL13_SCAF000350.
## Starting chromosome: LPAL13_SCAF000351.
## Starting chromosome: LPAL13_SCAF000352.
## Starting chromosome: LPAL13_SCAF000355.
## Starting chromosome: LPAL13_SCAF000357.
## Starting chromosome: LPAL13_SCAF000358.
## Starting chromosome: LPAL13_SCAF000360.
## Starting chromosome: LPAL13_SCAF000361.
## Starting chromosome: LPAL13_SCAF000362.
## Starting chromosome: LPAL13_SCAF000364.
## Starting chromosome: LPAL13_SCAF000368.
## Starting chromosome: LPAL13_SCAF000373.
## Starting chromosome: LPAL13_SCAF000375.
## Starting chromosome: LPAL13_SCAF000377.
## Starting chromosome: LPAL13_SCAF000379.
## Starting chromosome: LPAL13_SCAF000382.
## Starting chromosome: LPAL13_SCAF000383.
## Starting chromosome: LPAL13_SCAF000385.
## Starting chromosome: LPAL13_SCAF000387.
## Starting chromosome: LPAL13_SCAF000388.
## Starting chromosome: LPAL13_SCAF000389.
## Starting chromosome: LPAL13_SCAF000392.
## Starting chromosome: LPAL13_SCAF000393.
## Starting chromosome: LPAL13_SCAF000394.
## Starting chromosome: LPAL13_SCAF000395.
## Starting chromosome: LPAL13_SCAF000396.
## Starting chromosome: LPAL13_SCAF000397.
## Starting chromosome: LPAL13_SCAF000398.
## Starting chromosome: LPAL13_SCAF000399.
## Starting chromosome: LPAL13_SCAF000400.
## Starting chromosome: LPAL13_SCAF000403.
## Starting chromosome: LPAL13_SCAF000407.
## Starting chromosome: LPAL13_SCAF000408.
## Starting chromosome: LPAL13_SCAF000409.
## Starting chromosome: LPAL13_SCAF000412.
## Starting chromosome: LPAL13_SCAF000413.
## Starting chromosome: LPAL13_SCAF000417.
## Starting chromosome: LPAL13_SCAF000418.
## Starting chromosome: LPAL13_SCAF000420.
## Starting chromosome: LPAL13_SCAF000421.
## Starting chromosome: LPAL13_SCAF000422.
## Starting chromosome: LPAL13_SCAF000423.
## Starting chromosome: LPAL13_SCAF000424.
## Starting chromosome: LPAL13_SCAF000426.
## Starting chromosome: LPAL13_SCAF000431.
## Starting chromosome: LPAL13_SCAF000434.
## Starting chromosome: LPAL13_SCAF000441.
## Starting chromosome: LPAL13_SCAF000442.
## Starting chromosome: LPAL13_SCAF000444.
## Starting chromosome: LPAL13_SCAF000445.
## Starting chromosome: LPAL13_SCAF000447.
## Starting chromosome: LPAL13_SCAF000449.
## Starting chromosome: LPAL13_SCAF000450.
## Starting chromosome: LPAL13_SCAF000454.
## Starting chromosome: LPAL13_SCAF000455.
## Starting chromosome: LPAL13_SCAF000457.
## Starting chromosome: LPAL13_SCAF000458.
## Starting chromosome: LPAL13_SCAF000459.
## Starting chromosome: LPAL13_SCAF000461.
## Starting chromosome: LPAL13_SCAF000462.
## Starting chromosome: LPAL13_SCAF000463.
## Starting chromosome: LPAL13_SCAF000464.
## Starting chromosome: LPAL13_SCAF000465.
## Starting chromosome: LPAL13_SCAF000466.
## Starting chromosome: LPAL13_SCAF000467.
## Starting chromosome: LPAL13_SCAF000470.
## Starting chromosome: LPAL13_SCAF000472.
## Starting chromosome: LPAL13_SCAF000473.
## Starting chromosome: LPAL13_SCAF000474.
## Starting chromosome: LPAL13_SCAF000475.
## Starting chromosome: LPAL13_SCAF000476.
## Starting chromosome: LPAL13_SCAF000477.
## Starting chromosome: LPAL13_SCAF000482.
## Starting chromosome: LPAL13_SCAF000483.
## Starting chromosome: LPAL13_SCAF000485.
## Starting chromosome: LPAL13_SCAF000489.
## Starting chromosome: LPAL13_SCAF000492.
## Starting chromosome: LPAL13_SCAF000495.
## Starting chromosome: LPAL13_SCAF000496.
## Starting chromosome: LPAL13_SCAF000497.
## Starting chromosome: LPAL13_SCAF000498.
## Starting chromosome: LPAL13_SCAF000499.
## Starting chromosome: LPAL13_SCAF000503.
## Starting chromosome: LPAL13_SCAF000504.
## Starting chromosome: LPAL13_SCAF000506.
## Starting chromosome: LPAL13_SCAF000510.
## Starting chromosome: LPAL13_SCAF000511.
## Starting chromosome: LPAL13_SCAF000512.
## Starting chromosome: LPAL13_SCAF000513.
## Starting chromosome: LPAL13_SCAF000514.
## Starting chromosome: LPAL13_SCAF000515.
## Starting chromosome: LPAL13_SCAF000516.
## Starting chromosome: LPAL13_SCAF000520.
## Starting chromosome: LPAL13_SCAF000521.
## Starting chromosome: LPAL13_SCAF000522.
## Starting chromosome: LPAL13_SCAF000525.
## Starting chromosome: LPAL13_SCAF000532.
## Starting chromosome: LPAL13_SCAF000541.
## Starting chromosome: LPAL13_SCAF000544.
## Starting chromosome: LPAL13_SCAF000550.
## Starting chromosome: LPAL13_SCAF000554.
## Starting chromosome: LPAL13_SCAF000555.
## Starting chromosome: LPAL13_SCAF000557.
## Starting chromosome: LPAL13_SCAF000565.
## Starting chromosome: LPAL13_SCAF000569.
## Starting chromosome: LPAL13_SCAF000573.
## Starting chromosome: LPAL13_SCAF000575.
## Starting chromosome: LPAL13_SCAF000576.
## Starting chromosome: LPAL13_SCAF000577.
## Starting chromosome: LPAL13_SCAF000581.
## Starting chromosome: LPAL13_SCAF000582.
## Starting chromosome: LPAL13_SCAF000584.
## Starting chromosome: LPAL13_SCAF000585.
## Starting chromosome: LPAL13_SCAF000588.
## Starting chromosome: LPAL13_SCAF000589.
## Starting chromosome: LPAL13_SCAF000590.
## Starting chromosome: LPAL13_SCAF000591.
## Starting chromosome: LPAL13_SCAF000593.
## Starting chromosome: LPAL13_SCAF000595.
## Starting chromosome: LPAL13_SCAF000596.
## Starting chromosome: LPAL13_SCAF000599.
## Starting chromosome: LPAL13_SCAF000600.
## Starting chromosome: LPAL13_SCAF000601.
## Starting chromosome: LPAL13_SCAF000603.
## Starting chromosome: LPAL13_SCAF000604.
## Starting chromosome: LPAL13_SCAF000605.
## Starting chromosome: LPAL13_SCAF000606.
## Starting chromosome: LPAL13_SCAF000608.
## Starting chromosome: LPAL13_SCAF000609.
## Starting chromosome: LPAL13_SCAF000610.
## Starting chromosome: LPAL13_SCAF000611.
## Starting chromosome: LPAL13_SCAF000612.
## Starting chromosome: LPAL13_SCAF000613.
## Starting chromosome: LPAL13_SCAF000615.
## Starting chromosome: LPAL13_SCAF000618.
## Starting chromosome: LPAL13_SCAF000619.
## Starting chromosome: LPAL13_SCAF000620.
## Starting chromosome: LPAL13_SCAF000621.
## Starting chromosome: LPAL13_SCAF000627.
## Starting chromosome: LPAL13_SCAF000631.
## Starting chromosome: LPAL13_SCAF000633.
## Starting chromosome: LPAL13_SCAF000635.
## Starting chromosome: LPAL13_SCAF000637.
## Starting chromosome: LPAL13_SCAF000642.
## Starting chromosome: LPAL13_SCAF000643.
## Starting chromosome: LPAL13_SCAF000644.
## Starting chromosome: LPAL13_SCAF000647.
## Starting chromosome: LPAL13_SCAF000648.
## Starting chromosome: LPAL13_SCAF000649.
## Starting chromosome: LPAL13_SCAF000650.
## Starting chromosome: LPAL13_SCAF000652.
## Starting chromosome: LPAL13_SCAF000654.
## Starting chromosome: LPAL13_SCAF000657.
## Starting chromosome: LPAL13_SCAF000660.
## Starting chromosome: LPAL13_SCAF000663.
## Starting chromosome: LPAL13_SCAF000664.
## Starting chromosome: LPAL13_SCAF000665.
## Starting chromosome: LPAL13_SCAF000666.
## Starting chromosome: LPAL13_SCAF000667.
## Starting chromosome: LPAL13_SCAF000669.
## Starting chromosome: LPAL13_SCAF000670.
## Starting chromosome: LPAL13_SCAF000673.
## Starting chromosome: LPAL13_SCAF000675.
## Starting chromosome: LPAL13_SCAF000676.
## Starting chromosome: LPAL13_SCAF000677.
## Starting chromosome: LPAL13_SCAF000678.
## Starting chromosome: LPAL13_SCAF000680.
## Starting chromosome: LPAL13_SCAF000682.
## Starting chromosome: LPAL13_SCAF000683.
## Starting chromosome: LPAL13_SCAF000684.
## Starting chromosome: LPAL13_SCAF000686.
## Starting chromosome: LPAL13_SCAF000687.
## Starting chromosome: LPAL13_SCAF000689.
## Starting chromosome: LPAL13_SCAF000691.
## Starting chromosome: LPAL13_SCAF000693.
## Starting chromosome: LPAL13_SCAF000694.
## Starting chromosome: LPAL13_SCAF000697.
## Starting chromosome: LPAL13_SCAF000699.
## Starting chromosome: LPAL13_SCAF000700.
## Starting chromosome: LPAL13_SCAF000701.
## Starting chromosome: LPAL13_SCAF000702.
## Starting chromosome: LPAL13_SCAF000703.
## Starting chromosome: LPAL13_SCAF000705.
## Starting chromosome: LPAL13_SCAF000706.
## Starting chromosome: LPAL13_SCAF000707.
## Starting chromosome: LPAL13_SCAF000708.
## Starting chromosome: LPAL13_SCAF000710.
## Starting chromosome: LPAL13_SCAF000713.
## Starting chromosome: LPAL13_SCAF000715.
## Starting chromosome: LPAL13_SCAF000717.
## Starting chromosome: LPAL13_SCAF000718.
## Starting chromosome: LPAL13_SCAF000720.
## Starting chromosome: LPAL13_SCAF000721.
## Starting chromosome: LPAL13_SCAF000722.
## Starting chromosome: LPAL13_SCAF000723.
## Starting chromosome: LPAL13_SCAF000724.
## Starting chromosome: LPAL13_SCAF000727.
## Starting chromosome: LPAL13_SCAF000729.
## Starting chromosome: LPAL13_SCAF000733.
## Starting chromosome: LPAL13_SCAF000734.
## Starting chromosome: LPAL13_SCAF000735.
## Starting chromosome: LPAL13_SCAF000736.
## Starting chromosome: LPAL13_SCAF000737.
## Starting chromosome: LPAL13_SCAF000738.
## Starting chromosome: LPAL13_SCAF000741.
## Starting chromosome: LPAL13_SCAF000742.
## Starting chromosome: LPAL13_SCAF000743.
## Starting chromosome: LPAL13_SCAF000745.
## Starting chromosome: LPAL13_SCAF000746.
## Starting chromosome: LPAL13_SCAF000747.
## Starting chromosome: LPAL13_SCAF000751.
## Starting chromosome: LPAL13_SCAF000754.
## Starting chromosome: LPAL13_SCAF000755.
## Starting chromosome: LPAL13_SCAF000757.
## Starting chromosome: LPAL13_SCAF000758.
## Starting chromosome: LPAL13_SCAF000759.
## Starting chromosome: LPAL13_SCAF000760.
## Starting chromosome: LPAL13_SCAF000761.
## Starting chromosome: LPAL13_SCAF000762.
## Starting chromosome: LPAL13_SCAF000764.
## Starting chromosome: LPAL13_SCAF000765.
## Starting chromosome: LPAL13_SCAF000766.
## Starting chromosome: LPAL13_SCAF000767.
## Starting chromosome: LPAL13_SCAF000768.
## Starting chromosome: LPAL13_SCAF000769.
## Starting chromosome: LPAL13_SCAF000770.
## Starting chromosome: LPAL13_SCAF000771.
## Starting chromosome: LPAL13_SCAF000773.
## Starting chromosome: LPAL13_SCAF000774.
## Starting chromosome: LPAL13_SCAF000776.
## Starting chromosome: LPAL13_SCAF000777.
## Starting chromosome: LPAL13_SCAF000778.
## Starting chromosome: LPAL13_SCAF000779.
## Starting chromosome: LPAL13_SCAF000780.
## Starting chromosome: LPAL13_SCAF000781.
## Starting chromosome: LPAL13_SCAF000782.
## Starting chromosome: LPAL13_SCAF000783.
## Starting chromosome: LPAL13_SCAF000784.
## Starting chromosome: LPAL13_SCAF000785.
## Starting chromosome: LPAL13_SCAF000786.
## Starting chromosome: LPAL13_SCAF000787.
## Starting chromosome: LPAL13_SCAF000789.
## Starting chromosome: LPAL13_SCAF000791.
## Starting chromosome: LPAL13_SCAF000794.
## Starting chromosome: LPAL13_SCAF000795.
## Starting chromosome: LPAL13_SCAF000796.
## Starting chromosome: LPAL13_SCAF000797.
## Starting chromosome: LPAL13_SCAF000798.
## Starting chromosome: LPAL13_SCAF000799.
## Starting chromosome: LPAL13_SCAF000803.
## Starting chromosome: LPAL13_SCAF000804.
## Starting chromosome: LPAL13_SCAF000805.
## Starting chromosome: LPAL13_SCAF000806.
## Starting chromosome: LPAL13_SCAF000807.
## Starting chromosome: LPAL13_SCAF000809.
## Starting chromosome: LPAL13_SCAF000810.
## Starting chromosome: LPAL13_SCAF000811.
## Starting chromosome: LPAL13_SCAF000812.
## Starting chromosome: LPAL13_SCAF000813.
## Starting chromosome: LPAL13_SCAF000815.
## Starting chromosome: LPAL13_SCAF000816.
## Starting chromosome: LPAL13_SCAF000818.
## Starting chromosome: LPAL13_SCAF000819.
## Extracting primer regions.
## Searching for overlapping/closest genes.
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo = TRUE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo = TRUE)
## Returning a df with 16 columns and 35190 rows.
## Now the annotation has 8665 rows.
## Dropped 11 regions with Ns in the 5' region.
## Dropped 9 regions with Ns in the 3' region.
drop_scaffolds <- grepl(x = rownames(fun_stuff$favorites), pattern = "SCAF")
favorite_primer_regions <- fun_stuff[["favorites"]][!drop_scaffolds, ]
favorite_primer_regions[["bin"]] <- rownames(favorite_primer_regions)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Biostrings':
##
## collapse, intersect, setdiff, setequal, union
## The following object is masked from 'package:XVector':
##
## slice
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:hpgltools':
##
## combine
## The following object is masked from 'package:testthat':
##
## matches
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:matrixStats':
##
## count
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
favorite_primer_regions <- favorite_primer_regions %>%
relocate(bin)
Here is my note from our meeting:
Cross reference primers to DE genes of 2.2/2.3 and/or resistance/suscpetible, add a column to the primer spreadsheet with the DE genes (in retrospect I am guessing this actually means to put the logFC as a column.
One nice thing, I did a semantic removal on the lp_expt, so the set of logFC/pvalues should not have any of the offending types; thus I should be able to automagically get rid of them in the merge.
logfc <- zy_table[["data"]][["z23_vs_z22"]]
logfc_columns <- logfc[, c("deseq_logfc", "deseq_adjp")]
colnames(logfc_columns) <- c("z23_logfc", "z23_adjp")
new_table <- merge(favorite_primer_regions, logfc_columns,
by.x = "closest_gene_before_id", by.y = "row.names")
sus <- sus_table[["data"]][["sensitive_vs_resistant"]]
sus_columns <- sus[, c("deseq_logfc", "deseq_adjp")]
colnames(sus_columns) <- c("sus_logfc", "sus_adjp")
new_table <- merge(new_table, sus_columns,
by.x = "closest_gene_before_id", by.y = "row.names") %>%
relocate(bin)
written <- write_xlsx(data=new_table,
excel="excel/favorite_primers_xref_zy_sus.xlsx")
We can cross reference the variants against the zymodeme status and plot a heatmap of the results and hopefully see how they separate.
## pruned_snps <- subset_expt(new_snps, subset="condition=='z2.2'|condition=='z2.3'")
snp_genes <- sm(snps_vs_genes(lp_expt, new_sets, expt_name_col = "chromosome"))
##new_zymo_norm <- normalize_expt(pruned_snps, filter = TRUE, convert = "cpm", norm = "quant", transform = TRUE)
##new_zymo_norm <- set_expt_conditions(new_zymo_norm, fact = "zymodemecategorical")
clinical_colors_v2 <- list(
"z22" = "#0000cc",
"z23" = "#cc0000")
new_zymo_norm <- normalize_expt(pruned_snps, filter = TRUE, convert = "cpm", norm = "quant", transform = TRUE) %>%
set_expt_conditions(fact = "zymodemecategorical") %>%
set_expt_colors(clinical_colors_v2)
## Removing 0 low-count genes (568627 remaining).
## transform_counts: Found 28953155 values equal to 0, adding 1 to the matrix.
zymo_heat <- plot_disheat(new_zymo_norm)
pp(file = "images/onlyz22_z23_snp_heatmap.pdf", image=zymo_heat[["plot"]])
zymo_heat[["plot"]]
Now let us try to make a heatmap which includes some of the annotation data.
des <- both_norm[["design"]]
undef_idx <- is.na(des[["strain"]])
des[undef_idx, "strain"] <- "unknown"
##hmcols <- colorRampPalette(c("yellow","black","darkblue"))(256)
correlations <- hpgl_cor(exprs(both_norm))
zymo_missing_idx <- is.na(des[["zymodemecategorical"]])
des[["zymodemecategorical"]] <- as.character(des[["zymodemecategorical"]])
des[["clinicalcategorical"]] <- as.character(des[["clinicalcategorical"]])
des[zymo_missing_idx, "zymodemecategorical"] <- "unknown"
mydendro <- list(
"clustfun" = hclust,
"lwd" = 2.0)
col_data <- as.data.frame(des[, c("zymodemecategorical", "clinicalcategorical")])
unknown_clinical <- is.na(col_data[["clinicalcategorical"]])
row_data <- as.data.frame(des[, c("strain")])
colnames(col_data) <- c("zymodeme", "outcome")
col_data[unknown_clinical, "outcome"] <- "undefined"
colnames(row_data) <- c("strain")
myannot <- list(
"Col" = list("data" = col_data),
"Row" = list("data" = row_data))
myclust <- list("cuth" = 1.0,
"col" = BrewerClusterCol)
mylabs <- list(
"Row" = list("nrow" = 4),
"Col" = list("nrow" = 4))
hmcols <- colorRampPalette(c("darkblue", "beige"))(240)
map1 <- annHeatmap2(
correlations,
dendrogram = mydendro,
annotation = myannot,
cluster = myclust,
labels = mylabs,
## The following controls if the picture is symmetric
scale = "none",
col = hmcols)
## Warning in breakColors(breaks, col): more colors than classes: ignoring 29 last
## colors
pp(file = "images/dendro_heatmap.png", image = map1, height = 20, width = 20)
## Warning in pp(file = "images/dendro_heatmap.png", image = map1, height = 20, :
## There is no device to shut down.
## annotated Heatmap
##
## Rows: 'dendrogram' with 2 branches and 100 members total, at height 6.173
## 11 annotation variable(s)
## Cols: 'dendrogram' with 2 branches and 100 members total, at height 6.173
## 10 annotation variable(s)
Print the larger heatmap so that all the labels appear. Keep in mind that as we get more samples, this image needs to continue getting bigger.
big heatmap
xref_prop <- table(pheno_snps[["conditions"]])
pheno_snps$conditions
## [1] "z2.3" "z2.3" "z2.2" "z2.3" "z2.2" "z2.3" "z2.3" "z2.3" "z2.3" "z2.2"
## [11] "z2.3" "z2.2" "z2.3" "z2.3" "z2.2" "z2.2" "z2.3" "z2.2" "z2.2" "z2.3"
## [21] "z2.2" "z2.3" "z2.2" "z2.3" "z2.2" "z2.2" "z2.2" "z2.2" "z2.2" "z2.2"
## [31] "z2.2" "z2.3" "z2.2" "z2.3" "z2.3" "z2.2" "z2.2" "z2.3" "z2.2" "z2.3"
## [41] "z2.3" "z2.2" "z2.2" "z2.2" "z2.2" "z2.3" "z2.3" "z2.3" "z2.2" "z2.3"
## [51] "z2.3" "z2.3" "z2.3" "z2.2" "z2.2"
idx_tbl <- exprs(pheno_snps) > 5
new_tbl <- data.frame(row.names = rownames(exprs(pheno_snps)))
for (n in names(xref_prop)) {
new_tbl[[n]] <- 0
idx_cols <- which(pheno_snps[["conditions"]] == n)
prop_col <- rowSums(idx_tbl[, idx_cols]) / xref_prop[n]
new_tbl[n] <- prop_col
}
keepers <- grepl(x = rownames(new_tbl), pattern = "LpaL13")
new_tbl <- new_tbl[keepers, ]
new_tbl[["strong22"]] <- 1.001 - new_tbl[["z2.2"]]
new_tbl[["strong23"]] <- 1.001 - new_tbl[["z2.3"]]
s22_na <- new_tbl[["strong22"]] > 1
new_tbl[s22_na, "strong22"] <- 1
s23_na <- new_tbl[["strong23"]] > 1
new_tbl[s23_na, "strong23"] <- 1
new_tbl[["SNP"]] <- rownames(new_tbl)
new_tbl[["Chromosome"]] <- gsub(x = new_tbl[["SNP"]], pattern = "chr_(.*)_pos_.*", replacement = "\\1")
new_tbl[["Position"]] <- gsub(x = new_tbl[["SNP"]], pattern = ".*_pos_(\\d+)_.*", replacement = "\\1")
new_tbl <- new_tbl[, c("SNP", "Chromosome", "Position", "strong22", "strong23")]
library(CMplot)
## Much appreciate for using CMplot.
## Full description, Bug report, Suggestion and the latest codes:
## https://github.com/YinLiLin/CMplot
simplify <- new_tbl
simplify[["strong22"]] <- NULL
CMplot(simplify, bin.size = 100000)
## SNP-Density Plotting.
## Circular-Manhattan Plotting strong23.
## Rectangular-Manhattan Plotting strong23.
## QQ Plotting strong23.
## Plots are stored in: /mnt/cbcb/fs01_abelew/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_git
CMplot(new_tbl, plot.type="m", multracks=TRUE, threshold = c(0.01, 0.05),
threshold.lwd=c(1,1), threshold.col=c("black","grey"),
amplify=TRUE, bin.size=10000,
chr.den.col=c("darkgreen", "yellow", "red"),
signal.col=c("red", "green", "blue"),
signal.cex=1, file="jpg", memo="", dpi=300, file.output=TRUE, verbose=TRUE)
## Multracks-Manhattan Plotting strong22.
## Multracks-Manhattan Plotting strong23.
## Multraits-Rectangular Plotting...(finished 73%)
Multraits-Rectangular Plotting...(finished 74%)
Multraits-Rectangular Plotting...(finished 75%)
Multraits-Rectangular Plotting...(finished 76%)
Multraits-Rectangular Plotting...(finished 77%)
Multraits-Rectangular Plotting...(finished 78%)
Multraits-Rectangular Plotting...(finished 79%)
Multraits-Rectangular Plotting...(finished 80%)
Multraits-Rectangular Plotting...(finished 81%)
Multraits-Rectangular Plotting...(finished 82%)
Multraits-Rectangular Plotting...(finished 83%)
Multraits-Rectangular Plotting...(finished 84%)
Multraits-Rectangular Plotting...(finished 85%)
Multraits-Rectangular Plotting...(finished 86%)
Multraits-Rectangular Plotting...(finished 87%)
Multraits-Rectangular Plotting...(finished 88%)
Multraits-Rectangular Plotting...(finished 89%)
Multraits-Rectangular Plotting...(finished 90%)
Multraits-Rectangular Plotting...(finished 91%)
Multraits-Rectangular Plotting...(finished 92%)
Multraits-Rectangular Plotting...(finished 93%)
Multraits-Rectangular Plotting...(finished 94%)
Multraits-Rectangular Plotting...(finished 95%)
Multraits-Rectangular Plotting...(finished 96%)
Multraits-Rectangular Plotting...(finished 97%)
Multraits-Rectangular Plotting...(finished 98%)
Multraits-Rectangular Plotting...(finished 99%)
Multraits-Rectangular Plotting...(finished 100%)
## Plots are stored in: /mnt/cbcb/fs01_abelew/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_git
This tool looks a little opaque, but provides sample data with things that make sense to me and should be pretty easy to recapitulate in our data.
## For this, let us use the 'new_snps' data structure.
## Caveat here: these need to be coerced to numbers.
my_covariates <- pData(new_snps)[, c("zymodemecategorical", "clinicalcategorical")]
for (col in colnames(my_covariates)) {
my_covariates[[col]] <- as.numeric(as.factor(my_covariates[[col]]))
}
my_covariates <- t(my_covariates)
my_geneloc <- fData(lp_expt)[, c("gid", "chromosome", "start", "end")]
colnames(my_geneloc) <- c("geneid", "chr", "left", "right")
my_ge <- exprs(normalize_expt(lp_expt, transform = "log2", filter = TRUE, convert = "cpm"))
used_samples <- tolower(colnames(my_ge)) %in% colnames(exprs(new_snps))
my_ge <- my_ge[, used_samples]
my_snpsloc <- data.frame(rownames = rownames(exprs(new_snps)))
## Oh, caveat here: Because of the way I stored the data,
## I could have duplicate rows which presumably will make matrixEQTL sad
my_snpsloc[["chr"]] <- gsub(pattern = "^chr_(.+)_pos(.+)_ref_.*$", replacement = "\\1",
x = rownames(my_snpsloc))
my_snpsloc[["pos"]] <- gsub(pattern = "^chr_(.+)_pos(.+)_ref_.*$", replacement = "\\2",
x = rownames(my_snpsloc))
test <- duplicated(my_snpsloc)
## Each duplicated row would be another variant at that position;
## so in theory we would do a rle to number them I am guessing
## However, I do not have different variants so I think I can ignore this for the moment
## but will need to make my matrix either 0 or 1.
if (sum(test) > 0) {
message("There are: ", sum(duplicated), " duplicated entries.")
keep_idx <- ! test
my_snpsloc <- my_snpsloc[keep_idx, ]
}
my_snps <- exprs(new_snps)
one_idx <- my_snps > 0
my_snps[one_idx] <- 1
## Ok, at this point I think I have all the pieces which this method wants...
## Oh, no I guess not; it actually wants the data as a set of filenames...
library(MatrixEQTL)
write.table(my_snps, "eqtl/snps.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(my_snps, "eqtl/snps.tsv", )
write.table(my_snpsloc, "eqtl/snpsloc.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(my_snpsloc, "eqtl/snpsloc.tsv")
write.table(as.data.frame(my_ge), "eqtl/ge.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(as.data.frame(my_ge), "eqtl/ge.tsv")
write.table(as.data.frame(my_geneloc), "eqtl/geneloc.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(as.data.frame(my_geneloc), "eqtl/geneloc.tsv")
write.table(as.data.frame(my_covariates), "eqtl/covariates.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(as.data.frame(my_covariates), "eqtl/covariates.tsv")
useModel = modelLINEAR # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = "eqtl/snps.tsv"
snps_location_file_name = "eqtl/snpsloc.tsv"
expression_file_name = "eqtl/ge.tsv"
gene_location_file_name = "eqtl/geneloc.tsv"
covariates_file_name = "eqtl/covariates.tsv"
# Output file name
output_file_name_cis = tempfile()
output_file_name_tra = tempfile()
# Only associations significant at this level will be saved
pvOutputThreshold_cis = 0.1
pvOutputThreshold_tra = 0.1
# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric()
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6
## Load genotype data
snps = SlicedData$new()
snps$fileDelimiter = "\t" # the TAB character
snps$fileOmitCharacters = "NA" # denote missing values;
snps$fileSkipRows = 1 # one row of column labels
snps$fileSkipColumns = 1 # one column of row labels
snps$fileSliceSize = 2000 # read file in slices of 2,000 rows
snps$LoadFile(SNP_file_name)
## Load gene expression data
gene = SlicedData$new()
gene$fileDelimiter = "\t" # the TAB character
gene$fileOmitCharacters = "NA" # denote missing values;
gene$fileSkipRows = 1 # one row of column labels
gene$fileSkipColumns = 1 # one column of row labels
gene$fileSliceSize = 2000 # read file in slices of 2,000 rows
gene$LoadFile(expression_file_name)
## Load covariates
cvrt = SlicedData$new()
cvrt$fileDelimiter = "\t" # the TAB character
cvrt$fileOmitCharacters = "NA" # denote missing values;
cvrt$fileSkipRows = 1 # one row of column labels
cvrt$fileSkipColumns = 1 # one column of row labels
if(length(covariates_file_name) > 0) {
cvrt$LoadFile(covariates_file_name)
}
## Run the analysis
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE)
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE)
me = Matrix_eQTL_main(
snps = snps,
gene = gene,
cvrt = cvrt,
output_file_name = output_file_name_tra,
pvOutputThreshold = pvOutputThreshold_tra,
useModel = useModel,
errorCovariance = errorCovariance,
verbose = TRUE,
output_file_name.cis = output_file_name_cis,
pvOutputThreshold.cis = pvOutputThreshold_cis,
snpspos = snpspos,
genepos = genepos,
cisDist = cisDist,
pvalue.hist = "qqplot",
min.pv.by.genesnp = FALSE,
noFDRsaveMemory = FALSE);
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
message(paste0("Saving to ", savefile))
tmp <- sm(saveme(filename = savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset cc2fec1cb27347df846cacd2578dfa1a9312f14e
## This is hpgltools commit: Wed Feb 23 13:53:56 2022 -0500: cc2fec1cb27347df846cacd2578dfa1a9312f14e
## Saving to tmrc2_reorganized_2022.rda.xz
tmp <- loadme(filename = savefile)