sample_sheet <- glue::glue("sample_sheets/tmrc2_samples_202206.xlsx")
This is mostly just a run of this worksheet to reacquaint myself with it.
This document is intended to provide a general overview of the TMRC2 samples which have thus far been sequenced. In some cases, this includes only those samples starting in 2019; in other instances I am including our previous (2015-2016) samples.
In all cases the processing performed was:
The analyses in this document use the matrices of counts/gene from #3 and variants/position from #4 in order to provide some images and metrics describing the samples we have sequenced so far.
Everything which follows depends on the Existing TriTrypDB annotations revision 46, circa 2019. The following block loads a database of these annotations and turns it into a matrix where the rows are genes and columns are all the annotation types provided by TriTrypDB.
The same database was used to create a matrix of orthologous genes between L.panamensis and all of the other species in the TriTrypDB.
tt <- sm(library(EuPathDB))
orgdb <- "org.Lpanamensis.MHOMCOL81L13.v46.eg.db"
tt <- sm(library(orgdb, character.only=TRUE))
pan_db <- org.Lpanamensis.MHOMCOL81L13.v46.eg.db
all_fields <- columns(pan_db)
all_lp_annot <- sm(load_orgdb_annotations(
pan_db,
keytype = "gid",
fields = c("annot_gene_entrez_id", "annot_gene_name",
"annot_strand", "annot_chromosome", "annot_cds_length",
"annot_gene_product")))$genes
lp_go <- sm(load_orgdb_go(pan_db))
lp_lengths <- all_lp_annot[, c("gid", "annot_cds_length")]
colnames(lp_lengths) <- c("ID", "length")
all_lp_annot[["annot_gene_product"]] <- tolower(all_lp_annot[["annot_gene_product"]])
orthos <- sm(EuPathDB::extract_eupath_orthologs(db = pan_db))
meta <- sm(EuPathDB::download_eupath_metadata(webservice="tritrypdb"))
lp_entry <- EuPathDB::get_eupath_entry(species="Leishmania panamensis", metadata=meta)
## Found the following hits: Leishmania panamensis MHOM/COL/81/L13, Leishmania panamensis strain MHOM/PA/94/PSC-1, choosing the first.
## Using: Leishmania panamensis MHOM/COL/81/L13.
colnames(lp_entry)
## [1] "AnnotationVersion" "AnnotationSource" "BiocVersion"
## [4] "DataProvider" "Genome" "GenomeSource"
## [7] "GenomeVersion" "NumArrayGene" "NumChipChipGene"
## [10] "NumChromosome" "NumCodingGene" "NumCommunity"
## [13] "NumContig" "NumEC" "NumEST"
## [16] "NumGene" "NumGO" "NumOrtholog"
## [19] "NumOtherGene" "NumPopSet" "NumProteomics"
## [22] "NumPseudogene" "NumRNASeq" "NumRTPCR"
## [25] "NumSNP" "NumTFBS" "Organellar"
## [28] "ReferenceStrain" "MegaBP" "PrimaryKey"
## [31] "ProjectID" "RecordClassName" "SourceID"
## [34] "SourceVersion" "TaxonomyID" "TaxonomyName"
## [37] "URLGenome" "URLGFF" "URLProtein"
## [40] "Coordinate_1_based" "Maintainer" "SourceUrl"
## [43] "Tags" "BsgenomePkg" "GrangesPkg"
## [46] "OrganismdbiPkg" "OrgdbPkg" "TxdbPkg"
## [49] "Taxon" "Genus" "Species"
## [52] "Strain" "BsgenomeFile" "GrangesFile"
## [55] "OrganismdbiFile" "OrgdbFile" "TxdbFile"
## [58] "GenusSpecies" "TaxonUnmodified" "TaxonCanonical"
## [61] "TaxonXref"
testing_panamensis <- "BSGenome.Leishmania.panamensis.MHOMCOL81L13.v53"
## testing_panamensis <- EuPathDB::make_eupath_bsgenome(entry=lp_entry, eu_version="v46")
library(as.character(testing_panamensis), character.only=TRUE)
## Loading required package: BSgenome
## Loading required package: Biostrings
## Loading required package: XVector
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
## Loading required package: rtracklayer
genome <- get0(as.character(testing_panamensis))
Resequence samples: TMRC20002, TMRC20006, TMRC20004 (maybe TMRC20008 and TMRC20029)
The process of sample estimation takes two primary inputs:
An expressionset is a data structure used in R to examine RNASeq data. It is comprised of annotations, metadata, and expression data. In the case of our processing pipeline, the location of the expression data is provided by the filenames in the metadata.
The first lines of the following block create the Expressionset. All of the following lines perform various normalizations and generate plots from it.
The following samples are much lower coverage:
20210610: I made some manual changes to the sample sheet which I downloaded, filling in some zymodeme with ‘unknown’
clinical_colors <- list(
## "z1.0" = "#333333", ## Changed this to 'braz' to make it easier to find them.
"z2.0" = "#555555",
"z3.0" = "#777777",
"z2.1" = "#874400",
"z2.2" = "#0000cc",
"z2.3" = "#cc0000",
"z2.4" = "#df7000",
"braz" = "#cc00cc",
"unknown" = "#cbcbcb",
"null" = "#000000")
sanitize_columns <- c("passagenumber", "clinicalresponse", "clinicalcategorical",
"zymodemecategorical")
lp_expt <- create_expt(sample_sheet,
gene_info = all_lp_annot,
annotation_name = orgdb,
id_column = "hpglidentifier",
file_column = "lpanamensisv36hisatfile") %>%
set_expt_conditions(fact = "zymodemecategorical") %>%
subset_expt(nonzero = 8550) %>%
subset_expt(coverage = 5000000) %>%
set_expt_colors(clinical_colors) %>%
semantic_expt_filter(semantic = c("amastin", "gp63", "leishmanolysin"),
semantic_column = "annot_gene_product") %>%
sanitize_expt_metadata(columns = sanitize_columns) %>%
set_expt_factors(columns = sanitize_columns, class = "factor")
## Reading the sample metadata.
## Dropped 11 rows from the sample metadata because the sample ID is blank.
## Did not find the condition column in the sample sheet.
## Filling it in as undefined.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## The sample definitions comprises: 110 rows(samples) and 66 columns(metadata fields).
## Warning in create_expt(sample_sheet, gene_info = all_lp_annot, annotation_name
## = orgdb, : Some samples were removed when cross referencing the samples against
## the count data.
## Matched 8778 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 8778 features and 105 samples.
## The samples (and read coverage) removed when filtering 8550 non-zero genes are:
## TMRC20002 TMRC20006
## 11681227 6670348
## subset_expt(): There were 105, now there are 103 samples.
## The samples removed (and read coverage) when filtering samples with less than 5e+06 reads are:
## TMRC20004 TMRC20029
## 564812 1658096
## subset_expt(): There were 103, now there are 101 samples.
## semantic_expt_filter(): Removed 68 genes.
libsizes <- plot_libsize(lp_expt)
dev <- pp("images/lp_expt_libsizes.png", width = 14, height = 9)
libsizes$plot
closed <- dev.off()
libsizes$plot
## I think samples 7,10 should be removed at minimum, probably also 9,11
nonzero <- plot_nonzero(lp_expt)
dev <- pp(file = "images/lp_nonzero.png", width=9, height=9)
nonzero$plot
## Warning: ggrepel: 81 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
closed <- dev.off()
lp_box <- plot_boxplot(lp_expt)
## 8122 entries are 0. We are on a log scale, adding 1 to the data.
dev <- pp(file = "images/lp_expt_boxplot.png", width = 12, height = 9)
lp_box
closed <- dev.off()
lp_box
filter_plot <- plot_libsize_prepost(lp_expt)
filter_plot$lowgene_plot
## Warning: Using alpha for a discrete variable is not advised.
filter_plot$count_plot
table(pData(lp_expt)[["zymodemecategorical"]])
##
## braz notapplicable unknown z20 z21
## 2 2 3 1 7
## z22 z23 z24 z30
## 43 40 2 1
table(pData(lp_expt)[["clinicalresponse"]])
##
## cure failure
## 38 38
## laboratory line laboratory line miltefosine resistant
## 1 1
## nd reference strain
## 19 4
Najib’s favorite plots are of course the PCA/TNSE. These are nice to look at in order to get a sense of the relationships between samples. They also provide a good opportunity to see what happens when one applies different normalizations, surrogate analyses, filters, etc. In addition, one may set different experimental factors as the primary ‘condition’ (usually the color of plots) and surrogate ‘batches’.
Column ‘Q’ in the sample sheet, make a categorical version of it with these parameters:
fix_excel_percent <- function(numbers) {
for (n in 1:length(numbers)) {
pct <- grepl(x=numbers[n], pattern="\\%")
new_number <- NA
if (pct) {
new_number <- as.numeric(gsub(x=numbers[n], pattern="\\%", replacement="")) / 100.0
} else {
new_number <- as.numeric(numbers[n])
}
numbers[n] <- new_number
}
return(as.numeric(numbers))
}
starting <- fix_excel_percent(pData(lp_expt)[["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]])
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]]): NAs introduced
## by coercion
sus_categorical <- starting
na_idx <- is.na(starting)
sum(na_idx)
## [1] 51
sus_categorical[na_idx] <- "unknown"
resist_idx <- starting <= 0.35
sus_categorical[resist_idx] <- "resistant"
indeterminant_idx <- starting >= 0.36 & starting <= 0.48
sus_categorical[indeterminant_idx] <- "ambiguous"
susceptible_idx <- starting >= 0.49
sus_categorical[susceptible_idx] <- "sensitive"
sus_categorical <- as.factor(sus_categorical)
pData(lp_expt)[["sus_category_historical"]] <- sus_categorical
table(sus_categorical)
## sus_categorical
## ambiguous resistant sensitive unknown
## 5 12 33 51
starting_current <- fix_excel_percent(pData(lp_expt)[["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]])
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
## Warning in fix_excel_percent(pData(lp_expt)
## [["susceptibilityinfectionreduction32ugmlsbvcurrentdata"]]): NAs introduced by
## coercion
sus_categorical_current <- starting_current
na_idx <- is.na(starting_current)
sum(na_idx)
## [1] 54
sus_categorical_current[na_idx] <- "unknown"
resist_idx <- starting_current <= 0.35
sus_categorical_current[resist_idx] <- "resistant"
indeterminant_idx <- starting_current >= 0.36 & starting_current <= 0.48
sus_categorical_current[indeterminant_idx] <- "ambiguous"
susceptible_idx <- starting_current >= 0.49
sus_categorical_current[susceptible_idx] <- "sensitive"
sus_categorical_current <- as.factor(sus_categorical_current)
pData(lp_expt)[["sus_category_current"]] <- sus_categorical_current
table(sus_categorical_current)
## sus_categorical_current
## ambiguous resistant sensitive unknown
## 9 6 32 54
clinical_samples <- lp_expt %>%
set_expt_batches(fact = sus_categorical_current) %>%
set_expt_colors(clinical_colors)
table(pData(clinical_samples)[["condition"]])
##
## braz null unknown z2.0 z2.1 z2.2 z2.3 z2.4 z3.0
## 2 2 3 1 7 43 40 2 1
clinical_norm <- normalize_expt(clinical_samples, norm = "quant", transform = "log2",
convert = "cpm", filter = TRUE)
## Removing 134 low-count genes (8576 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
zymo_pca <- plot_pca(clinical_norm, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
ggplt(zymo_pca$plot)
## [1] "ggplot.html"
dev <- pp(file = "images/zymo_pca_sus_shape.png")
zymo_pca$plot
closed <- dev.off()
zymo_pca$plot
only_two_types <- subset_expt(clinical_samples, subset = "condition=='z2.3'|condition=='z2.2'")
## subset_expt(): There were 101, now there are 83 samples.
only_two_norm <- sm(normalize_expt(only_two_types, norm = "quant", transform = "log2",
convert = "cpm", batch = FALSE, filter = TRUE))
onlytwo_pca <- plot_pca(only_two_norm, plot_title = "PCA of z2.2 and z2.3 parasite expression values",
plot_labels = FALSE)
dev <- pp(file = "images/zymo_z2.2_z2.3_pca_sus_shape.pdf")
onlytwo_pca$plot
closed <- dev.off()
onlytwo_pca$plot
zymo_3dpca <- plot_3d_pca(zymo_pca)
zymo_3dpca$plot
clinical_n <- sm(normalize_expt(clinical_samples, transform = "log2",
convert = "cpm", batch = FALSE, filter = TRUE))
zymo_tsne <- plot_tsne(clinical_n, plot_title = "TSNE of parasite expression values")
## plot labels was not set and there are more than 100 samples, disabling it.
zymo_tsne$plot
clinical_nb <- normalize_expt(clinical_samples, convert = "cpm", transform = "log2",
filter = TRUE, batch = "svaseq")
## Removing 134 low-count genes (8576 remaining).
## Setting 748 low elements to zero.
## transform_counts: Found 748 values equal to 0, adding 1 to the matrix.
clinical_nb_pca <- plot_pca(clinical_nb, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
dev <- pp(file = "images/clinical_nb_pca_sus_shape.png")
clinical_nb_pca$plot
closed <- dev.off()
clinical_nb_pca$plot
clinical_nb_tsne <- plot_tsne(clinical_nb, plot_title = "TSNE of parasite expression values")
## plot labels was not set and there are more than 100 samples, disabling it.
clinical_nb_tsne$plot
corheat <- plot_corheat(clinical_norm, plot_title = "Correlation heatmap of parasite
expression values
")
corheat$plot
plot_sm(clinical_norm)$plot
## Performing correlation.
cf_colors <- list(
"cure" = "#006f00",
"fail" = "#9dffa0",
"unknown" = "#cbcbcb",
"notapplicable" = "#000000")
cf_expt <- set_expt_conditions(lp_expt, fact = "clinicalcategorical") %>%
set_expt_batches(fact = sus_categorical_current) %>%
set_expt_colors(cf_colors)
## Warning in set_expt_colors(., cf_colors): Colors for the following categories
## are not being used: notapplicable.
table(pData(cf_expt)[["condition"]])
##
## cure fail unknown
## 38 38 25
cf_norm <- normalize_expt(cf_expt, convert = "cpm", transform = "log2",
norm = "quant", filter = TRUE)
## Removing 134 low-count genes (8576 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
start_cf <- plot_pca(cf_norm, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
dev <- pp(file = "images/cf_sus_shape.png")
start_cf$plot
closed <- dev.off()
start_cf$plot
cf_nb_input <- subset_expt(cf_expt, subset="condition!='unknown'")
## subset_expt(): There were 101, now there are 76 samples.
cf_nb <- normalize_expt(cf_nb_input, convert = "cpm", transform = "log2",
filter = TRUE, batch = "svaseq")
## Removing 162 low-count genes (8548 remaining).
## Setting 117 low elements to zero.
## transform_counts: Found 117 values equal to 0, adding 1 to the matrix.
cf_nb_pca <- plot_pca(cf_nb, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
dev <- pp(file = "images/cf_sus_share_nb.png")
cf_nb_pca$plot
closed <- dev.off()
cf_nb_pca$plot
cf_norm <- normalize_expt(cf_expt, transform = "log2", convert = "cpm",
filter = TRUE, norm = "quant")
## Removing 134 low-count genes (8576 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
test <- pca_information(cf_norm,
expt_factors = c("clinicalcategorical", "zymodemecategorical",
"pathogenstrain", "passagenumber"),
num_components = 6, plot_pcas = TRUE)
## plot labels was not set and there are more than 100 samples, disabling it.
test$anova_p
## PC1 PC2 PC3 PC4 PC5 PC6
## clinicalcategorical 3.139e-01 0.457872 0.9691 7.839e-03 0.2264183 3.371e-01
## zymodemecategorical 9.358e-06 0.004581 0.6655 3.058e-02 0.0001343 1.206e-01
## pathogenstrain 4.747e-01 0.870333 0.6433 5.629e-05 0.0188863 2.316e-01
## passagenumber 9.502e-01 0.174448 0.4657 3.136e-02 0.8601857 5.429e-06
test$cor_heatmap
sus_colors <- list(
"resistant" = "#8563a7",
"sensitive" = "#8d0000",
"ambiguous" = "#cbcbcb",
"unknown" = "#555555")
sus_expt <- set_expt_conditions(lp_expt, fact = "sus_category_current") %>%
set_expt_batches(fact = "clinicalcategorical") %>%
set_expt_colors(colors = sus_colors)
## subset_expt(subset = "batch!='z24'") %>%
## subset_expt(subset = "batch!='z21'")
sus_norm <- normalize_expt(sus_expt, transform = "log2", convert = "cpm",
norm = "quant", filter = TRUE)
## Removing 134 low-count genes (8576 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
sus_pca <- plot_pca(sus_norm, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
dev <- pp(file = "images/sus_norm_pca.png")
sus_pca[["plot"]]
closed <- dev.off()
sus_pca[["plot"]]
sus_nb <- normalize_expt(sus_expt, transform = "log2", convert = "cpm",
batch = "svaseq", filter = TRUE)
## Removing 134 low-count genes (8576 remaining).
## Setting 447 low elements to zero.
## transform_counts: Found 447 values equal to 0, adding 1 to the matrix.
sus_nb_pca <- plot_pca(sus_nb, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
dev <- pp(file = "images/sus_nb_pca.png")
sus_nb_pca[["plot"]]
closed <- dev.off()
sus_nb_pca[["plot"]]
The following sections perform a series of analyses which seek to elucidate differences between the zymodemes 2.2 and 2.3 either through differential expression or variant profiles.
TODO: Do this with and without sva and compare the results.
zy_expt <- subset_expt(lp_expt, subset = "condition=='z2.2'|condition=='z2.3'")
## subset_expt(): There were 101, now there are 83 samples.
zy_norm <- normalize_expt(zy_expt, filter = TRUE, convert = "cpm", norm = "quant")
## Removing 152 low-count genes (8558 remaining).
zy_de_nobatch <- all_pairwise(zy_expt, filter = TRUE, model_batch = FALSE)
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
zy_table_nobatch <- combine_de_tables(
zy_de_nobatch,
excel = glue::glue("excel/zy_tables_nobatch-v{ver}.xlsx"))
## Deleting the file excel/zy_tables_nobatch-v202206.xlsx before writing the tables.
zy_sig_nobatch <- extract_significant_genes(
zy_table_nobatch,
according_to = "deseq", current_id = "GID", required_id = "GID",
gmt = glue::glue("gmt/zymodeme_nobatch-v{ver}.gmt"),
excel = glue::glue("excel/zy_sig_nobatch_deseq-v{ver}.xlsx"))
## Deleting the file excel/zy_sig_nobatch_deseq-v202206.xlsx before writing the tables.
## Going to attempt to create gmt files from these results.
## There is an error lurking in extract_significant_genes()
## in which it incorrectly returns genes when not explicitly setting the 'according_to' parameter.
zy_sig_test <- extract_significant_genes(
zy_table_nobatch,
current_id = "GID", required_id = "GID",
gmt = "gmt/zymodeme_test.gmt",
excel = "excel/zy_sig_nobatch_test.xlsx")
## Deleting the file excel/zy_sig_nobatch_test.xlsx before writing the tables.
## Going to attempt to create gmt files from these results.
## For now, limiting this to deseq.
first_test <- zy_sig_nobatch[["deseq"]][["ups"]][[1]]
second_test <- zy_sig_test[["deseq"]][["ups"]][[1]]
## I think I fixed it!
expect_equal(first_test, second_test)
zy_sig_nobatch_all <- extract_significant_genes(
zy_table_nobatch,
current_id = "GID", required_id = "GID",
gmt = glue::glue("gmt/zymodeme_nobatch-v{ver}.gmt"),
excel = glue::glue("excel/zy_sig_nobatch_all-v{ver}.xlsx"))
## Deleting the file excel/zy_sig_nobatch_all-v202206.xlsx before writing the tables.
## Going to attempt to create gmt files from these results.
## For now, limiting this to deseq.
zy_de_sva <- all_pairwise(zy_expt, filter = TRUE, model_batch = "svaseq")
## Removing 0 low-count genes (8558 remaining).
## Setting 427 low elements to zero.
## transform_counts: Found 427 values equal to 0, adding 1 to the matrix.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
zy_table_sva <- combine_de_tables(
zy_de_sva, excel = glue::glue("excel/zy_tables_sva-v{ver}.xlsx"))
## Deleting the file excel/zy_tables_sva-v202206.xlsx before writing the tables.
zy_sig_sva <- extract_significant_genes(
zy_table_sva,
according_to = "deseq",
current_id = "GID", required_id = "GID",
gmt = glue::glue("gmt/zymodeme_sva-v{ver}.gmt"),
excel = glue::glue("excel/zy_sig_sva-v{ver}.xlsx"))
## Deleting the file excel/zy_sig_sva-v202206.xlsx before writing the tables.
## Going to attempt to create gmt files from these results.
dev <- pp(file = "images/zymo_ma.png")
zy_table_sva[["plots"]][["z23_vs_z22"]][["deseq_ma_plots"]][["plot"]]
closed <- dev.off()
zy_table_sva[["plots"]][["z23_vs_z22"]][["deseq_ma_plots"]][["plot"]]
In contrast, we can search for genes which are differentially expressed with respect to cure/failure status.
##cf_nb_input <- subset_expt(cf_expt, subset="condition!='unknown'")
cf_de <- all_pairwise(cf_nb_input, filter = TRUE, model_batch = "svaseq")
## Removing 0 low-count genes (8548 remaining).
## Setting 117 low elements to zero.
## transform_counts: Found 117 values equal to 0, adding 1 to the matrix.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
cf_table <- combine_de_tables(cf_de, excel = glue::glue("excel/cf_tables-v{ver}.xlsx"))
## Deleting the file excel/cf_tables-v202206.xlsx before writing the tables.
cf_sig <- extract_significant_genes(cf_table, excel = glue::glue("excel/cf_sig-v{ver}.xlsx"))
## Deleting the file excel/cf_sig-v202206.xlsx before writing the tables.
dev <- pp(file = "images/cf_ma.png")
cf_table[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]][["plot"]]
closed <- dev.off()
cf_table[["plots"]][["fail_vs_cure"]][["deseq_ma_plots"]][["plot"]]
Finally, we can use our category of susceptibility and look for genes which change from sensitive to resistant. Keep in mind, though, that for the moment we have a lot of ambiguous and unknown strains.
sus_de_sva <- all_pairwise(sus_expt, filter = TRUE, model_batch = "svaseq")
## Removing 0 low-count genes (8576 remaining).
## Setting 428 low elements to zero.
## transform_counts: Found 428 values equal to 0, adding 1 to the matrix.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
sus_table_sva <- combine_de_tables(
sus_de_sva,
excel = glue::glue("excel/sus_tables_sva-v{ver}.xlsx"))
## Deleting the file excel/sus_tables_sva-v202206.xlsx before writing the tables.
sus_sig_sva <- extract_significant_genes(
sus_table_sva, according_to = "deseq",
excel = glue::glue("excel/sus_sig_sva-v{ver}.xlsx"))
## Deleting the file excel/sus_sig_sva-v202206.xlsx before writing the tables.
sus_de_nobatch <- all_pairwise(sus_expt, filter = TRUE, model_batch = FALSE)
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
sus_table_nobatch <- combine_de_tables(
sus_de_nobatch,
excel = glue::glue("excel/sus_tables_nobatch-v{ver}.xlsx"))
## Deleting the file excel/sus_tables_nobatch-v202206.xlsx before writing the tables.
sus_sig_nobatch <- extract_significant_genes(
sus_table_nobatch, according_to = "deseq",
excel = glue::glue("excel/sus_sig_nobatch-v{ver}.xlsx"))
## Deleting the file excel/sus_sig_nobatch-v202206.xlsx before writing the tables.
Checking on my function to do the comparison first, thus the comparison of the nobatch vs. sva result for the susceptibility data.
Yes, the compare_de_results() function assumes that the results it compares contain identical sets of contrasts, which is explicitly not the case for these data. Thus I am making a simpler function, compare_de_tables() which handles this scenario.
sus_nobatch_sva <- compare_de_results(sus_table_nobatch, sus_table_sva)
## Testing method: limma.
## Adding method: limma to the set.
## Testing method: deseq.
## Adding method: deseq to the set.
## Testing method: edger.
## Adding method: edger to the set.
## Starting method limma, table resistant_vs_ambiguous.
## Starting method limma, table sensitive_vs_ambiguous.
## Starting method limma, table unknown_vs_ambiguous.
## Starting method limma, table sensitive_vs_resistant.
## Starting method limma, table unknown_vs_resistant.
## Starting method limma, table unknown_vs_sensitive.
## Starting method deseq, table resistant_vs_ambiguous.
## Starting method deseq, table sensitive_vs_ambiguous.
## Starting method deseq, table unknown_vs_ambiguous.
## Starting method deseq, table sensitive_vs_resistant.
## Starting method deseq, table unknown_vs_resistant.
## Starting method deseq, table unknown_vs_sensitive.
## Starting method edger, table resistant_vs_ambiguous.
## Starting method edger, table sensitive_vs_ambiguous.
## Starting method edger, table unknown_vs_ambiguous.
## Starting method edger, table sensitive_vs_resistant.
## Starting method edger, table unknown_vs_resistant.
## Starting method edger, table unknown_vs_sensitive.
Remind myself, the data structures are (zy|sus)_(de|table|sig).
zy_df <- zy_table_sva[["data"]][["z23_vs_z22"]]
sus_df <- sus_table_sva[["data"]][["sensitive_vs_resistant"]]
both_df <- merge(zy_df, sus_df, by = "row.names")
plot_df <- both_df[, c("deseq_logfc.x", "deseq_logfc.y")]
rownames(plot_df) <- both_df[["Row.names"]]
colnames(plot_df) <- c("z23_vs_z22", "sensitive_vs_resistant")
compare <- plot_linear_scatter(plot_df)
## Warning in plot_multihistogram(df): NAs introduced by coercion
dev <- pp(file = "images/compare_sus_zy.png")
compare$scatter
closed <- dev.off()
compare$scatter
compare$cor
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = -156, df = 8556, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8649 -0.8538
## sample estimates:
## cor
## -0.8595
knitr::kable(head(sus_sig_sva$deseq$ups$sensitive_vs_resistant, n = 20))
gid | annotgeneproduct | annotgenetype | chromosome | start | end | strand | annotgeneentrezid | annotgenename | annotstrand | annotchromosome | annotcdslength | length | deseq_logfc | deseq_adjp | edger_logfc | edger_adjp | limma_logfc | limma_adjp | basic_nummed | basic_denmed | basic_numvar | basic_denvar | basic_logfc | basic_t | basic_p | basic_adjp | deseq_basemean | deseq_lfcse | deseq_stat | deseq_p | ebseq_fc | ebseq_logfc | ebseq_c1mean | ebseq_c2mean | ebseq_mean | ebseq_var | ebseq_postfc | ebseq_ppee | ebseq_ppde | ebseq_adjp | edger_logcpm | edger_lr | edger_p | limma_ave | limma_t | limma_b | limma_p | limma_adjp_ihw | deseq_adjp_ihw | edger_adjp_ihw | ebseq_adjp_ihw | basic_adjp_ihw | lfc_meta | lfc_var | lfc_varbymed | p_meta | p_var | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LPAL13_080010800 | LPAL13_080010800 | hypothetical protein | protein coding | LpaL13_08 | 199409 | 199792 | - | reverse | 8 | 384.0 | 383 | 20.320 | 0.0000 | 6.193 | 0.0655 | 1.5820 | 0.3819 | -2.4080 | -4.1720 | 4.869 | 1.3462 | 1.7640 | 2.875 | 0.0129 | 0.2759 | 7.815 | 1.3390 | 15.180 | 0.0000 | 1516.477 | 10.566 | 0.0000 | 15.15 | 12.76 | 4.649e+02 | 2.667 | 1.0000 | 0.0000 | 1.0000 | -1.0340 | 11.680 | 0.0006 | -3.2250 | 1.7450 | -4.161 | 0.0841 | 4.650e-01 | 4.439e-48 | 5.587e-02 | 0.000e+00 | 2.787e-01 | 5.880 | 1.519e+01 | 2.583e+00 | 2.824e-02 | 2.339e-03 | ||
LPAL13_000035800 | LPAL13_000035800 | hypothetical protein | protein coding | LPAL13_SCAF000500 | 737 | 1006 | - | reverse | Not Assigned | 270.0 | 269 | 6.439 | 0.0146 | 6.339 | 0.0424 | 2.7710 | 0.6601 | 5.3420 | -0.6307 | 16.890 | 18.9800 | 5.9730 | 3.109 | 0.0178 | 0.3120 | 2530.000 | 1.5380 | 4.188 | 0.0000 | 5.006 | 2.324 | 1040.3152 | 5208.30 | 4550.20 | 2.011e+07 | 5.489 | 0.9927 | 0.0073 | 0.9927 | 6.7330 | 15.680 | 0.0001 | 2.0600 | 0.9767 | -5.320 | 0.3311 | 7.056e-01 | 1.638e-02 | 1.000e+00 | 1.059e-02 | 3.127e-01 | 6.697 | 8.388e+00 | 1.252e+00 | 1.104e-01 | 3.653e-02 | ||
LPAL13_000051300 | LPAL13_000051300 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000772 | 11 | 2344 | + | forward | Not Assigned | 2334.0 | 2333 | 6.338 | 0.0169 | 5.875 | 0.1116 | 1.5040 | 0.7507 | 0.4368 | -3.0740 | 11.077 | 10.6988 | 3.5100 | 2.406 | 0.0467 | 0.4505 | 100.500 | 1.5580 | 4.069 | 0.0000 | 5.701 | 2.511 | 40.6590 | 231.83 | 201.65 | 1.099e+05 | 5.803 | 0.7902 | 0.2098 | 0.7902 | 2.1570 | 8.694 | 0.0032 | -1.3160 | 0.7534 | -5.038 | 0.4530 | 9.051e-01 | 1.638e-02 | 1.016e-01 | 1.588e-01 | 4.557e-01 | 4.129 | 3.301e+00 | 7.996e-01 | 1.521e-01 | 6.792e-02 | ||
LPAL13_000040700 | LPAL13_000040700 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000598 | 54 | 1067 | + | forward | Not Assigned | 1014.0 | 1013 | 5.828 | 0.0158 | 5.239 | 0.0996 | 1.9130 | 0.3864 | -1.4860 | -3.7180 | 7.371 | 3.4602 | 2.2310 | 2.484 | 0.0334 | 0.3941 | 16.030 | 1.4240 | 4.094 | 0.0000 | 22.963 | 4.521 | 1.4101 | 32.60 | 27.68 | 1.160e+03 | 4.141 | 0.9758 | 0.0242 | 0.9758 | -0.2624 | 9.176 | 0.0025 | -2.4010 | 1.7250 | -4.168 | 0.0877 | 4.729e-01 | 1.329e-02 | 7.503e-02 | 1.695e-02 | 3.990e-01 | 3.950 | 1.390e+00 | 3.519e-01 | 3.007e-02 | 2.494e-03 | ||
LPAL13_320026300 | LPAL13_320026300 | hypothetical protein, conserved | protein coding | LpaL13_32 | 754268 | 755485 | - | reverse | 32 | 1218.0 | 1217 | 5.815 | 0.0278 | 5.694 | 0.0655 | 3.4130 | 0.6600 | 4.9200 | -2.2900 | 15.806 | 20.3610 | 7.2110 | 3.657 | 0.0091 | 0.2468 | 1278.000 | 1.5290 | 3.804 | 0.0001 | 6.746 | 2.754 | 402.5215 | 2715.39 | 2350.20 | 2.570e+06 | 6.912 | 0.0000 | 0.0000 | 0.0000 | 5.7510 | 12.290 | 0.0005 | 1.8270 | 0.9769 | -5.144 | 0.3310 | 6.460e-01 | 2.039e-02 | 5.306e-02 | 0.000e+00 | 2.382e-01 | 6.561 | 8.993e+00 | 1.371e+00 | 1.105e-01 | 3.645e-02 | ||
LPAL13_000053200 | LPAL13_000053200 | hypothetical protein | protein coding | LPAL13_SCAF000804 | 5037 | 5249 | - | reverse | Not Assigned | 213.0 | 212 | 5.778 | 0.0201 | 5.471 | 0.1116 | 2.9810 | 0.4146 | 1.0020 | -3.2220 | 8.306 | 8.5830 | 4.2250 | 3.250 | 0.0142 | 0.2874 | 65.110 | 1.4550 | 3.970 | 0.0001 | 7.862 | 2.975 | 17.8618 | 140.49 | 121.13 | 8.740e+03 | 5.786 | 0.2220 | 0.7780 | 0.2220 | 1.5190 | 8.712 | 0.0032 | -0.9762 | 1.6440 | -4.252 | 0.1034 | 4.022e-01 | 1.638e-02 | 1.058e-01 | 6.005e-01 | 2.825e-01 | 4.714 | 1.697e-02 | 3.601e-03 | 3.554e-02 | 3.456e-03 | ||
LPAL13_040019400 | LPAL13_040019400 | hypothetical protein | protein coding | LpaL13_04 | 440768 | 441127 | - | reverse | 4 | 360.0 | 359 | 5.617 | 0.0011 | 5.667 | 0.0377 | 3.3550 | 0.1598 | -0.2906 | -3.3870 | 1.964 | 1.0915 | 3.0970 | 6.278 | 0.0002 | 0.0379 | 24.280 | 1.1290 | 4.974 | 0.0000 | 75.038 | 6.229 | 0.5691 | 43.45 | 36.68 | 2.570e+03 | 5.770 | 0.9286 | 0.0714 | 0.9286 | 0.1557 | 16.350 | 0.0001 | -1.7790 | 2.6670 | -3.051 | 0.0090 | 1.690e-01 | 8.222e-04 | 2.489e-02 | 4.933e-02 | 3.791e-02 | 4.880 | 1.579e-01 | 3.236e-02 | 3.005e-03 | 2.661e-05 | ||
LPAL13_000044900 | LPAL13_000044900 | actin-related protein 2, putative | protein coding | LPAL13_SCAF000645 | 507 | 1685 | - | reverse | Not Assigned | 1179.0 | 1178 | 5.110 | 0.0725 | 5.001 | 0.1465 | 3.3200 | 0.6565 | 3.9790 | -2.6360 | 15.764 | 18.4621 | 6.6150 | 3.501 | 0.0107 | 0.2613 | 702.700 | 1.5800 | 3.235 | 0.0012 | 4.266 | 2.093 | 326.2122 | 1391.52 | 1223.32 | 5.911e+05 | 4.376 | 0.7596 | 0.2404 | 0.7596 | 4.8930 | 7.468 | 0.0063 | 1.1900 | 0.9848 | -5.036 | 0.3271 | 6.443e-01 | 6.832e-02 | 1.149e-01 | 1.861e-01 | 2.613e-01 | 4.475 | 4.114e-01 | 9.195e-02 | 1.115e-01 | 3.486e-02 | ||
LPAL13_080010600 | LPAL13_080010600 | hypothetical protein, conserved | protein coding | LpaL13_08 | 195555 | 195749 | - | reverse | 8 | 195.0 | 194 | 4.396 | 0.0794 | 5.726 | 0.0967 | 1.5700 | 0.3970 | -2.1930 | -3.8060 | 5.256 | 2.8515 | 1.6130 | 2.017 | 0.0749 | 0.5131 | 9.189 | 1.3980 | 3.145 | 0.0017 | 39.569 | 5.306 | 0.4700 | 18.98 | 16.06 | 6.054e+02 | 2.954 | 0.9980 | 0.0020 | 0.9980 | -1.0000 | 9.357 | 0.0022 | -3.1470 | 1.6900 | -4.208 | 0.0942 | 4.868e-01 | 6.871e-02 | 8.252e-02 | 1.774e-03 | 5.141e-01 | 3.684 | 1.813e+00 | 4.920e-01 | 3.270e-02 | 2.839e-03 | ||
LPAL13_000017600 | LPAL13_000017600 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000146 | 359 | 586 | + | forward | Not Assigned | 228.0 | 227 | 4.238 | 0.0088 | 4.215 | 0.0655 | 3.1330 | 0.4819 | 4.1660 | -0.2452 | 4.997 | 7.6872 | 4.4110 | 3.679 | 0.0095 | 0.2495 | 522.000 | 0.9440 | 4.489 | 0.0000 | 5.282 | 2.401 | 195.8671 | 1034.62 | 902.19 | 5.173e+05 | 5.379 | 0.7636 | 0.2364 | 0.7636 | 4.4620 | 11.480 | 0.0007 | 2.3240 | 1.4670 | -4.646 | 0.1456 | 4.648e-01 | 4.841e-03 | 5.587e-02 | 2.372e-01 | 2.457e-01 | 4.013 | 5.846e-01 | 1.457e-01 | 4.877e-02 | 7.032e-03 | ||
LPAL13_000011700 | LPAL13_000011700 | hypothetical protein | protein coding | LPAL13_SCAF000076 | 101 | 364 | - | reverse | Not Assigned | 264.0 | 263 | 3.781 | 0.2096 | 3.190 | 0.3457 | 0.9487 | 0.6462 | -1.7460 | -3.3690 | 6.611 | 6.7544 | 1.6230 | 1.406 | 0.2027 | 0.6676 | 10.640 | 1.5930 | 2.374 | 0.0176 | 2.193 | 1.133 | 11.5161 | 25.27 | 23.10 | 6.457e+02 | 1.612 | 0.9988 | 0.0012 | 0.9988 | -0.7439 | 3.726 | 0.0536 | -3.0780 | 1.0150 | -4.759 | 0.3125 | 6.916e-01 | 1.926e-01 | 3.290e-01 | 1.080e-03 | 6.741e-01 | 2.359 | 1.277e+00 | 5.411e-01 | 1.279e-01 | 2.588e-02 | ||
LPAL13_300029400 | LPAL13_300029400 | hypothetical protein, conserved | protein coding | LpaL13_30 | 853953 | 854150 | - | reverse | 30 | 198.0 | 197 | 3.622 | 0.0226 | 3.591 | 0.0836 | 2.3670 | 0.2956 | 1.6850 | -0.7559 | 2.287 | 3.8595 | 2.4410 | 2.888 | 0.0270 | 0.3711 | 83.200 | 0.9295 | 3.897 | 0.0001 | 5.875 | 2.555 | 27.3700 | 160.86 | 139.78 | 1.550e+04 | 4.845 | 0.3256 | 0.6744 | 0.3256 | 1.8240 | 10.030 | 0.0015 | -0.0441 | 2.0380 | -3.760 | 0.0442 | 3.418e-01 | 2.039e-02 | 7.557e-02 | 4.613e-01 | 3.784e-01 | 3.388 | 2.178e-01 | 6.428e-02 | 1.529e-02 | 6.290e-04 | ||
LPAL13_000026500 | LPAL13_000026500 | hypothetical protein | protein coding | LPAL13_SCAF000301 | 144 | 494 | - | reverse | Not Assigned | 351.0 | 350 | 3.429 | 0.0458 | 3.350 | 0.1036 | 1.2590 | 0.5605 | -0.1199 | -1.8470 | 6.713 | 3.6573 | 1.7280 | 1.909 | 0.0892 | 0.5434 | 34.270 | 0.9714 | 3.530 | 0.0004 | 7.151 | 2.838 | 10.3735 | 74.24 | 64.16 | 6.426e+03 | 4.797 | 0.7472 | 0.2528 | 0.7472 | 0.7298 | 8.949 | 0.0028 | -1.0640 | 1.2470 | -4.684 | 0.2154 | 6.477e-01 | 4.823e-02 | 9.656e-02 | 1.640e-01 | 5.473e-01 | 2.538 | 6.052e-01 | 2.385e-01 | 7.286e-02 | 1.524e-02 | ||
LPAL13_350011800 | LPAL13_350011800 | hypothetical protein, conserved | protein coding | LpaL13_35 | 171009 | 171242 | + | forward | 35 | 234.0 | 233 | 3.252 | 0.0237 | 3.240 | 0.0811 | 2.6250 | 0.3185 | 2.6390 | -0.3719 | 3.096 | 4.1725 | 3.0110 | 3.383 | 0.0132 | 0.2798 | 147.500 | 0.8413 | 3.866 | 0.0001 | 5.933 | 2.569 | 55.1376 | 327.20 | 284.24 | 7.387e+04 | 5.539 | 0.7813 | 0.2187 | 0.7813 | 2.6450 | 10.220 | 0.0014 | 0.9678 | 1.9510 | -3.912 | 0.0539 | 3.865e-01 | 1.748e-02 | 6.787e-02 | 1.500e-01 | 2.808e-01 | 3.353 | 7.114e-01 | 2.122e-01 | 1.848e-02 | 9.431e-04 | ||
LPAL13_000035500 | LPAL13_000035500 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000492 | 7045 | 7410 | + | forward | Not Assigned | 366.0 | 365 | 3.046 | 0.0146 | 3.036 | 0.0655 | 2.2610 | 0.3307 | 4.1990 | 1.5110 | 3.729 | 2.4782 | 2.6880 | 3.694 | 0.0059 | 0.2117 | 405.500 | 0.7307 | 4.169 | 0.0000 | 5.884 | 2.557 | 159.4778 | 938.47 | 815.47 | 4.054e+05 | 5.969 | 0.2751 | 0.7249 | 0.2751 | 4.1070 | 12.180 | 0.0005 | 2.7790 | 1.9030 | -4.229 | 0.0599 | 4.023e-01 | 1.329e-02 | 5.306e-02 | 7.251e-01 | 2.038e-01 | 2.983 | 2.599e-01 | 8.714e-02 | 2.014e-02 | 1.187e-03 | ||
LPAL13_000014000 | LPAL13_000014000 | hypothetical protein | protein coding | LPAL13_SCAF000119 | 655 | 942 | + | forward | Not Assigned | 288.0 | 287 | 2.993 | 0.0146 | 2.981 | 0.0655 | 2.6310 | 0.2234 | 2.3600 | -0.1140 | 1.735 | 1.3946 | 2.4740 | 4.620 | 0.0020 | 0.1382 | 103.100 | 0.7231 | 4.139 | 0.0000 | 8.159 | 3.028 | 26.6764 | 217.73 | 187.56 | 1.404e+04 | 6.525 | 0.0002 | 0.9998 | 0.0002 | 2.1600 | 12.060 | 0.0005 | 0.8840 | 2.3480 | -3.242 | 0.0209 | 2.562e-01 | 1.329e-02 | 5.539e-02 | 6.463e-01 | 1.382e-01 | 3.277 | 1.260e+00 | 3.844e-01 | 7.137e-03 | 1.413e-04 | ||
LPAL13_220019500 | LPAL13_220019500 | hypothetical protein | protein coding | LpaL13_22 | 578260 | 578538 | + | forward | 22 | 279.0 | 278 | 2.980 | 0.0093 | 2.966 | 0.0485 | 1.9830 | 0.4115 | 3.3980 | 0.8174 | 2.962 | 3.1977 | 2.5810 | 3.263 | 0.0142 | 0.2874 | 250.100 | 0.6700 | 4.447 | 0.0000 | 4.103 | 2.037 | 124.2553 | 509.87 | 448.98 | 1.119e+05 | 4.080 | 0.7662 | 0.2338 | 0.7662 | 3.4140 | 14.020 | 0.0002 | 2.3500 | 1.6580 | -4.492 | 0.1005 | 3.992e-01 | 5.746e-03 | 4.981e-02 | 2.346e-01 | 2.888e-01 | 2.626 | 2.924e-03 | 1.114e-03 | 3.356e-02 | 3.360e-03 | ||
LPAL13_170014500 | LPAL13_170014500 | hypothetical protein, conserved | protein coding | LpaL13_17 | 361708 | 362040 | + | forward | 17 | 333.0 | 332 | 2.840 | 0.1999 | 2.302 | 0.4607 | 1.0220 | 0.6482 | -1.4260 | -2.4980 | 6.661 | 4.1852 | 1.0720 | 1.126 | 0.2916 | 0.7303 | 17.700 | 1.1800 | 2.406 | 0.0161 | 4.559 | 2.189 | 8.0539 | 36.75 | 32.22 | 1.905e+03 | 2.687 | 0.9969 | 0.0031 | 0.9969 | -0.3072 | 2.484 | 0.1150 | -2.5500 | 1.0080 | -4.798 | 0.3161 | 7.529e-01 | 1.694e-01 | 4.442e-01 | 2.265e-03 | 7.341e-01 | 1.891 | 4.382e-01 | 2.317e-01 | 1.491e-01 | 2.337e-02 | ||
LPAL13_000011800 | LPAL13_000011800 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000076 | 446 | 640 | - | reverse | Not Assigned | 195.0 | 194 | 2.721 | 0.2376 | 2.680 | 0.3032 | 0.4672 | 0.8177 | -2.3460 | -3.3070 | 4.451 | 3.4997 | 0.9602 | 1.130 | 0.2930 | 0.7312 | 8.105 | 1.1990 | 2.269 | 0.0232 | 7.713 | 2.947 | 2.0868 | 16.16 | 13.94 | 7.314e+02 | 2.428 | 0.9990 | 0.0010 | 0.9990 | -1.0270 | 4.270 | 0.0388 | -3.2160 | 0.5817 | -4.948 | 0.5621 | 9.542e-01 | 2.120e-01 | 2.911e-01 | 1.274e-03 | 7.350e-01 | 1.757 | 1.182e+00 | 6.730e-01 | 2.080e-01 | 9.408e-02 | ||
LPAL13_170006400 | LPAL13_170006400 | receptor-type adenylate cyclase a | protein coding | LpaL13_17 | 43122 | 44198 | - | reverse | 17 | 1077.0 | 1076 | 2.689 | 0.0001 | 2.676 | 0.0030 | 2.1970 | 0.1183 | 3.3390 | 0.9561 | 1.476 | 0.6494 | 2.3830 | 6.065 | 0.0001 | 0.0331 | 219.700 | 0.4767 | 5.641 | 0.0000 | 7.931 | 2.988 | 56.7322 | 450.03 | 387.93 | 1.589e+05 | 7.401 | 0.1579 | 0.8421 | 0.1579 | 3.2250 | 23.860 | 0.0000 | 2.5530 | 2.9490 | -1.974 | 0.0040 | 1.139e-01 | 5.269e-05 | 2.187e-03 | 5.757e-01 | 3.310e-02 | 2.605 | 2.571e-01 | 9.872e-02 | 1.328e-03 | 5.289e-06 |
knitr::kable(head(sus_sig_sva$deseq$downs$sensitive_vs_resistant, n = 20))
gid | annotgeneproduct | annotgenetype | chromosome | start | end | strand | annotgeneentrezid | annotgenename | annotstrand | annotchromosome | annotcdslength | length | deseq_logfc | deseq_adjp | edger_logfc | edger_adjp | limma_logfc | limma_adjp | basic_nummed | basic_denmed | basic_numvar | basic_denvar | basic_logfc | basic_t | basic_p | basic_adjp | deseq_basemean | deseq_lfcse | deseq_stat | deseq_p | ebseq_fc | ebseq_logfc | ebseq_c1mean | ebseq_c2mean | ebseq_mean | ebseq_var | ebseq_postfc | ebseq_ppee | ebseq_ppde | ebseq_adjp | edger_logcpm | edger_lr | edger_p | limma_ave | limma_t | limma_b | limma_p | limma_adjp_ihw | deseq_adjp_ihw | edger_adjp_ihw | ebseq_adjp_ihw | basic_adjp_ihw | lfc_meta | lfc_var | lfc_varbymed | p_meta | p_var | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LPAL13_000038500 | LPAL13_000038500 | hypothetical protein | protein coding | LPAL13_SCAF000575 | 39 | 251 | + | forward | Not Assigned | 213.0 | 212 | -2.091 | 0.1269 | -2.091 | 0.0996 | -3.9290 | 0.0003 | -2.2000 | 1.3670 | 4.1969 | 1.5731 | -3.5680 | -5.688 | 0.0001 | 0.0376 | 24.890 | 0.7462 | -2.802 | 0.0051 | 0.1845 | -2.4384 | 93.33 | 17.210 | 29.228 | 1832.8 | 0.2378 | 0.0574 | 0.9426 | 0.0574 | 0.1024 | 9.214 | 0.0024 | -1.3780 | -5.977 | 7.2490 | 0.0000 | 2.462e-04 | 1.284e-01 | 9.438e-02 | 6.095e-01 | 3.564e-02 | -2.675 | 1.178e-01 | -4.404e-02 | 2.492e-03 | 6.442e-06 | ||
LPAL13_230016400 | LPAL13_230016400 | na/h antiporter-like protein | protein coding | LpaL13_23 | 350725 | 355113 | - | reverse | 23 | 4389.0 | 4388 | -2.089 | 0.2707 | -2.078 | 0.2760 | -1.9580 | 0.1561 | -3.1410 | -0.6244 | 4.7278 | 17.5993 | -2.5170 | -1.434 | 0.2058 | 0.6696 | 20.530 | 0.9754 | -2.141 | 0.0322 | 0.1066 | -3.2300 | 214.06 | 22.806 | 53.004 | 28677.0 | 0.2063 | 0.0000 | 1.0000 | 0.0000 | -0.7043 | 4.701 | 0.0302 | -3.4410 | -2.682 | -2.7110 | 0.0086 | 1.520e-01 | 2.458e-01 | 2.629e-01 | 6.832e-01 | 6.750e-01 | -2.117 | 2.036e-01 | -9.618e-02 | 2.366e-02 | 1.715e-04 | ||
LPAL13_190021800 | LPAL13_190021800 | atp-dependent zinc metallopeptidase, putative | protein coding | LpaL13_19 | 593155 | 594951 | - | reverse | 19 | 1797.0 | 1796 | -1.963 | 0.2377 | -1.978 | 0.2516 | -2.4300 | 0.0426 | -3.3430 | -0.4277 | 3.3613 | 9.2643 | -2.9150 | -2.270 | 0.0660 | 0.4971 | 8.895 | 0.8652 | -2.269 | 0.0233 | 0.0928 | -3.4291 | 106.88 | 9.914 | 25.225 | 6086.5 | 0.2013 | 0.0000 | 0.0000 | 0.0000 | -1.6050 | 5.099 | 0.0239 | -3.5880 | -3.928 | -0.1367 | 0.0002 | 4.731e-02 | 2.448e-01 | 2.020e-01 | 0.000e+00 | 4.986e-01 | -2.148 | 3.683e-02 | -1.715e-02 | 1.580e-02 | 1.835e-04 | ||
LPAL13_000012100 | LPAL13_000012100 | hypothetical protein | protein coding | LPAL13_SCAF000080 | 1637 | 1894 | - | reverse | Not Assigned | 258.0 | 257 | -1.877 | 0.2449 | -1.872 | 0.2049 | -3.1150 | 0.0264 | -2.2920 | 1.0840 | 5.5781 | 0.1631 | -3.3760 | -7.520 | 0.0000 | 0.0000 | 26.840 | 0.8356 | -2.246 | 0.0247 | 0.3138 | -1.6723 | 58.98 | 18.498 | 24.890 | 783.4 | 0.3338 | 0.9778 | 0.0222 | 0.9778 | 0.2123 | 5.973 | 0.0145 | -1.3610 | -4.171 | 1.1960 | 0.0001 | 2.118e-02 | 2.215e-01 | 1.777e-01 | 1.551e-02 | 1.976e-05 | -2.240 | 3.047e-02 | -1.360e-02 | 1.310e-02 | 1.532e-04 | ||
LPAL13_310039200 | LPAL13_310039200 | hypothetical protein | protein coding | LpaL13_31 | 1301745 | 1301972 | - | reverse | 31 | 228.0 | 227 | -1.770 | 0.0844 | -1.776 | 0.0732 | -1.7070 | 0.0981 | 1.2990 | 3.2320 | 2.0151 | 0.5994 | -1.9330 | -4.789 | 0.0004 | 0.0527 | 166.700 | 0.5691 | -3.110 | 0.0019 | 0.4179 | -1.2586 | 334.46 | 139.780 | 170.519 | 32674.7 | 0.4749 | 0.9967 | 0.0033 | 0.9967 | 2.8240 | 10.740 | 0.0010 | 1.9850 | -3.118 | -1.6700 | 0.0024 | 1.150e-01 | 8.714e-02 | 5.587e-02 | 2.814e-03 | 5.352e-02 | -1.799 | 1.653e-01 | -9.190e-02 | 1.770e-03 | 4.572e-07 | ||
LPAL13_000012000 | LPAL13_000012000 | hypothetical protein | protein coding | LPAL13_SCAF000080 | 710 | 1159 | - | reverse | Not Assigned | 450.0 | 449 | -1.681 | 0.2796 | -1.688 | 0.2364 | -3.2580 | 0.0296 | 0.2199 | 3.4740 | 7.2074 | 1.5132 | -3.2540 | -4.709 | 0.0002 | 0.0422 | 181.400 | 0.7966 | -2.111 | 0.0348 | 0.2798 | -1.8373 | 442.30 | 123.769 | 174.064 | 46363.5 | 0.3268 | 0.8804 | 0.1196 | 0.8804 | 2.9370 | 5.346 | 0.0208 | 1.2740 | -4.120 | 1.3910 | 0.0001 | 3.473e-02 | 2.281e-01 | 2.235e-01 | 7.793e-02 | 4.307e-02 | -2.118 | 1.032e-01 | -4.870e-02 | 1.855e-02 | 3.052e-04 | ||
LPAL13_050005000 | LPAL13_050005000 | hypothetical protein | protein coding | LpaL13_05 | 3394 | 3612 | - | reverse | 5 | 219.0 | 218 | -1.554 | 0.2720 | -1.564 | 0.2271 | -3.2260 | 0.0035 | -0.2027 | 2.7720 | 3.4982 | 0.1021 | -2.9750 | -8.370 | 0.0000 | 0.0000 | 81.790 | 0.7272 | -2.137 | 0.0326 | 0.2880 | -1.7959 | 208.35 | 59.995 | 83.419 | 9594.8 | 0.3145 | 0.9004 | 0.0996 | 0.9004 | 1.7790 | 5.530 | 0.0187 | 0.5335 | -5.104 | 4.8070 | 0.0000 | 4.100e-03 | 2.462e-01 | 2.130e-01 | 1.002e-01 | 4.935e-06 | -2.051 | 1.283e-01 | -6.253e-02 | 1.709e-02 | 2.671e-04 | ||
LPAL13_170012500 | LPAL13_170012500 | unspecified product | tRNA encoding | LpaL13_17 | undefined | undefined | + | forward | 17 | 0.0 | undefined | -1.485 | 0.0201 | -1.491 | 0.0424 | -1.6540 | 0.0385 | -1.0590 | 0.6721 | 0.8350 | 2.0797 | -1.7310 | -2.835 | 0.0310 | 0.3895 | 15.070 | 0.3729 | -3.982 | 0.0001 | 0.3008 | -1.7331 | 61.24 | 18.414 | 25.177 | 1105.5 | 0.3901 | 0.9327 | 0.0673 | 0.9327 | -0.5232 | 15.360 | 0.0001 | -1.1000 | -3.994 | 0.6436 | 0.0001 | 3.996e-02 | 1.638e-02 | 3.413e-02 | 5.251e-02 | 3.885e-01 | -1.574 | 8.330e-02 | -5.291e-02 | 9.438e-05 | 8.474e-10 | ||
LPAL13_140019300 | LPAL13_140019300 | bt1 family, putative | protein coding | LpaL13_14 | 530784 | 531350 | + | forward | 14 | 567.0 | 566 | -1.476 | 0.0930 | -1.485 | 0.0763 | -1.8490 | 0.0601 | 4.7670 | 6.6090 | 0.7180 | 2.1059 | -1.8420 | -3.014 | 0.0254 | 0.3632 | 1707.000 | 0.4855 | -3.040 | 0.0024 | 0.2530 | -1.9830 | 4355.82 | 1101.838 | 1615.624 | 3386453.6 | 0.2931 | 0.9143 | 0.0857 | 0.9143 | 6.1650 | 10.460 | 0.0012 | 5.4370 | -3.611 | -0.5668 | 0.0005 | 7.373e-02 | 8.987e-02 | 6.911e-02 | 8.629e-02 | 5.091e-01 | -1.613 | 2.209e-02 | -1.369e-02 | 1.356e-03 | 9.027e-07 | ||
LPAL13_220018100 | LPAL13_220018100 | 60s ribosomal protein l14, putative | protein coding | LpaL13_22 | 517892 | 518419 | + | forward | 22 | 528.0 | 527 | -1.456 | 0.2696 | -1.468 | 0.2244 | -1.5060 | 0.1294 | 0.4183 | 2.5250 | 3.4619 | 4.9553 | -2.1070 | -2.180 | 0.0694 | 0.5057 | 74.590 | 0.6767 | -2.152 | 0.0314 | 0.2603 | -1.9417 | 449.24 | 116.932 | 169.401 | 89466.6 | 0.3722 | 0.0410 | 0.9590 | 0.0410 | 1.5570 | 5.583 | 0.0181 | -0.0588 | -2.855 | -2.2410 | 0.0053 | 1.278e-01 | 2.753e-01 | 2.130e-01 | 7.234e-01 | 5.057e-01 | -1.506 | 9.206e-02 | -6.114e-02 | 1.826e-02 | 1.707e-04 | ||
LPAL13_180013900 | LPAL13_180013900 | hypothetical protein | protein coding | LpaL13_18 | 351792 | 352085 | + | forward | 18 | 294.0 | 293 | -1.440 | 0.0946 | -1.448 | 0.0836 | -1.9510 | 0.0264 | -0.3401 | 1.7010 | 1.0858 | 0.2305 | -2.0410 | -7.589 | 0.0000 | 0.0012 | 37.000 | 0.4760 | -3.025 | 0.0025 | 0.3809 | -1.3925 | 88.46 | 33.687 | 42.335 | 963.5 | 0.3777 | 0.7646 | 0.2354 | 0.7646 | 0.6607 | 10.050 | 0.0015 | 0.0938 | -4.174 | 1.4470 | 0.0001 | 2.118e-02 | 8.682e-02 | 7.101e-02 | 1.782e-01 | 1.368e-03 | -1.615 | 4.067e-03 | -2.518e-03 | 1.357e-03 | 1.484e-06 | ||
LPAL13_340039600 | LPAL13_340039600 | hypothetical protein | protein coding | LpaL13_34 | 1247554 | 1247757 | - | reverse | 34 | 204.0 | 203 | -1.419 | 0.2199 | -1.431 | 0.1814 | -2.4160 | 0.0601 | 1.1310 | 3.8380 | 3.8763 | 0.6060 | -2.7070 | -5.743 | 0.0000 | 0.0074 | 209.100 | 0.6098 | -2.327 | 0.0199 | 0.2517 | -1.9900 | 513.91 | 129.367 | 190.084 | 40331.1 | 0.2831 | 0.9820 | 0.0180 | 0.9820 | 3.1250 | 6.505 | 0.0108 | 2.0560 | -3.546 | -0.4705 | 0.0006 | 7.196e-02 | 1.904e-01 | 1.464e-01 | 1.837e-02 | 7.362e-03 | -1.683 | 1.952e-02 | -1.160e-02 | 1.044e-02 | 9.366e-05 | ||
LPAL13_350073400 | LPAL13_350073400 | hypothetical protein | protein coding | LpaL13_35 | 2342701 | 2342883 | + | forward | 35 | 183.0 | 182 | -1.341 | 0.1589 | -1.348 | 0.1482 | -1.5020 | 0.1297 | -0.5515 | 1.4110 | 2.5225 | 1.5709 | -1.9630 | -3.363 | 0.0093 | 0.2480 | 40.680 | 0.5138 | -2.611 | 0.0090 | 0.3126 | -1.6775 | 115.82 | 36.202 | 48.773 | 5397.3 | 0.4095 | 0.8961 | 0.1039 | 0.8961 | 0.7744 | 7.245 | 0.0071 | 0.0322 | -2.847 | -2.2110 | 0.0054 | 1.575e-01 | 1.284e-01 | 1.214e-01 | 8.077e-02 | 2.445e-01 | -1.411 | 5.483e-02 | -3.884e-02 | 7.176e-03 | 3.349e-06 | ||
LPAL13_000052700 | LPAL13_000052700 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000789 | 102 | 398 | - | reverse | Not Assigned | 297.0 | 296 | -1.251 | 0.1863 | -1.253 | 0.1435 | -1.9880 | 0.0430 | 0.3101 | 1.3430 | 1.0364 | 1.9051 | -1.0330 | -1.746 | 0.1309 | 0.6042 | 48.600 | 0.5045 | -2.481 | 0.0131 | 0.4705 | -1.0876 | 113.76 | 53.524 | 63.035 | 6424.5 | 0.6426 | 0.9089 | 0.0911 | 0.9089 | 1.1240 | 7.523 | 0.0061 | 0.4429 | -3.901 | 0.6130 | 0.0002 | 4.731e-02 | 1.947e-01 | 1.204e-01 | 5.946e-02 | 6.092e-01 | -1.520 | 8.251e-03 | -5.428e-03 | 6.462e-03 | 4.199e-05 | ||
LPAL13_000029000 | LPAL13_000029000 | hypothetical protein | protein coding | LPAL13_SCAF000368 | 992 | 1243 | + | forward | Not Assigned | 252.0 | 251 | -1.238 | 0.0925 | -1.239 | 0.0967 | -1.8300 | 0.0147 | -1.5320 | 0.4581 | 1.9348 | 1.2514 | -1.9900 | -3.836 | 0.0047 | 0.1996 | 12.530 | 0.4062 | -3.047 | 0.0023 | 0.3505 | -1.5124 | 44.08 | 15.445 | 19.967 | 375.7 | 0.3476 | 0.9773 | 0.0227 | 0.9773 | -0.7822 | 9.360 | 0.0022 | -1.3720 | -4.501 | 2.0030 | 0.0000 | 1.073e-02 | 8.612e-02 | 7.537e-02 | 1.586e-02 | 2.006e-01 | -1.424 | 9.720e-04 | -6.826e-04 | 1.515e-03 | 1.681e-06 | ||
LPAL13_000036900 | LPAL13_000036900 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000515 | 1206 | 1448 | - | reverse | Not Assigned | 243.0 | 242 | -1.153 | 0.2053 | -1.156 | 0.1945 | -1.1620 | 0.1880 | -1.2290 | -0.2827 | 1.0582 | 2.2565 | -0.9463 | -1.479 | 0.1902 | 0.6584 | 13.110 | 0.4832 | -2.386 | 0.0170 | 0.5931 | -0.7537 | 30.05 | 17.817 | 19.749 | 299.5 | 0.5698 | 0.9967 | 0.0033 | 0.9967 | -0.5727 | 6.208 | 0.0127 | -1.3210 | -2.529 | -2.9270 | 0.0130 | 1.783e-01 | 1.906e-01 | 1.853e-01 | 2.492e-03 | 6.574e-01 | -1.227 | 6.906e-02 | -5.629e-02 | 1.426e-02 | 5.766e-06 | ||
LPAL13_310010700 | LPAL13_310010700 | unspecified product | tRNA encoding | LpaL13_31 | undefined | undefined | + | forward | 31 | 0.0 | undefined | -1.145 | 0.2938 | -1.148 | 0.2755 | -2.1570 | 0.0178 | -2.4410 | -0.6141 | 1.3044 | 2.4921 | -1.8270 | -2.705 | 0.0352 | 0.4040 | 7.418 | 0.5550 | -2.064 | 0.0390 | 0.3393 | -1.5594 | 22.39 | 7.589 | 9.925 | 240.6 | 0.4424 | 0.9643 | 0.0357 | 0.9643 | -1.4780 | 4.713 | 0.0299 | -2.2720 | -4.378 | 1.0750 | 0.0000 | 1.971e-02 | 2.998e-01 | 2.311e-01 | 2.482e-02 | 4.101e-01 | -1.459 | 4.704e-02 | -3.225e-02 | 2.300e-02 | 4.165e-04 | ||
LPAL13_320038700 | LPAL13_320038700 | hypothetical protein, conserved | protein coding | LpaL13_32 | 1175024 | 1175257 | + | forward | 32 | 234.0 | 233 | -1.036 | 0.0651 | -1.047 | 0.0655 | -1.2640 | 0.0264 | 2.5620 | 3.7120 | 0.4995 | 0.1452 | -1.1500 | -5.764 | 0.0001 | 0.0241 | 236.000 | 0.3119 | -3.324 | 0.0009 | 0.5046 | -0.9869 | 434.30 | 219.124 | 253.099 | 19812.8 | 0.5143 | 0.9845 | 0.0155 | 0.9845 | 3.3090 | 12.090 | 0.0005 | 3.0670 | -4.199 | 1.6080 | 0.0001 | 2.077e-02 | 5.975e-02 | 5.306e-02 | 1.231e-02 | 2.321e-02 | -1.175 | 2.077e-02 | -1.768e-02 | 4.852e-04 | 1.724e-07 | ||
LPAL13_340039700 | LPAL13_340039700 | snare domain containing protein, putative | protein coding | LpaL13_34 | 1248192 | 1248947 | - | reverse | 34 | 756.0 | 755 | -1.028 | 0.1426 | -1.041 | 0.0996 | -1.3740 | 0.0601 | 4.5810 | 6.1530 | 0.6679 | 0.7584 | -1.5730 | -4.098 | 0.0049 | 0.1996 | 1247.000 | 0.3790 | -2.713 | 0.0067 | 0.3481 | -1.5226 | 2654.24 | 923.846 | 1197.066 | 926548.0 | 0.3772 | 0.9957 | 0.0043 | 0.9957 | 5.7080 | 9.187 | 0.0024 | 5.2520 | -3.608 | -0.5758 | 0.0005 | 1.000e+00 | 1.067e-01 | 8.876e-02 | 3.210e-03 | 2.006e-01 | -1.184 | 9.372e-03 | -7.915e-03 | 3.200e-03 | 1.001e-05 | ||
LPAL13_310008200 | LPAL13_310008200 | hypothetical protein | protein coding | LpaL13_31 | 92723 | 93040 | - | reverse | 31 | 318.0 | 317 | -1.019 | 0.0117 | -1.031 | 0.0064 | -0.8623 | 0.0529 | 4.3430 | 5.0130 | 0.5127 | 0.7282 | -0.6702 | -1.808 | 0.1176 | 0.5843 | 623.600 | 0.2344 | -4.349 | 0.0000 | 0.6088 | -0.7160 | 1217.36 | 741.101 | 816.299 | 205797.2 | 0.6286 | 0.9970 | 0.0030 | 0.9970 | 4.7200 | 21.410 | 0.0000 | 4.5300 | -3.750 | -0.0577 | 0.0003 | 5.132e-02 | 1.067e-02 | 4.389e-03 | 2.286e-03 | 5.909e-01 | -1.061 | 7.325e-02 | -6.901e-02 | 1.055e-04 | 2.815e-08 |
sus_ma <- sus_table_sva[["plots"]][["sensitive_vs_resistant"]][["deseq_ma_plots"]][["plot"]]
dev <- pp(file = "images/sus_ma_sva.png")
sus_ma
closed <- dev.off()
sus_ma
## test <- ggplt(sus_ma)
Now let us look for ontology categories which are increased in the 2.3 samples followed by the 2.2 samples.
## Gene categories more represented in the 2.3 group.
zy_go_up <- simple_goseq(sig_genes = zy_sig_sva[["deseq"]][["ups"]][[1]],
go_db = lp_go, length_db = lp_lengths)
## Found 12 go_db genes and 45 length_db genes out of 45.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## The score column is null, defaulting to score.
## Possible columns are:
## [1] "category" "over_represented_pvalue"
## [3] "under_represented_pvalue" "numDEInCat"
## [5] "numInCat" "term"
## [7] "ontology" "qvalue"
## The score column is null, defaulting to score.
## Possible columns are:
## [1] "category" "over_represented_pvalue"
## [3] "under_represented_pvalue" "numDEInCat"
## [5] "numInCat" "term"
## [7] "ontology" "qvalue"
## The score column is null, defaulting to score.
## Possible columns are:
## [1] "category" "over_represented_pvalue"
## [3] "under_represented_pvalue" "numDEInCat"
## [5] "numInCat" "term"
## [7] "ontology" "qvalue"
## Gene categories more represented in the 2.2 group.
zy_go_down <- simple_goseq(sig_genes = zy_sig_sva[["deseq"]][["downs"]][[1]],
go_db = lp_go, length_db = lp_lengths)
## Found 17 go_db genes and 83 length_db genes out of 83.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## The score column is null, defaulting to score.
## Possible columns are:
## [1] "category" "over_represented_pvalue"
## [3] "under_represented_pvalue" "numDEInCat"
## [5] "numInCat" "term"
## [7] "ontology" "qvalue"
## The score column is null, defaulting to score.
## Possible columns are:
## [1] "category" "over_represented_pvalue"
## [3] "under_represented_pvalue" "numDEInCat"
## [5] "numInCat" "term"
## [7] "ontology" "qvalue"
## The score column is null, defaulting to score.
## Possible columns are:
## [1] "category" "over_represented_pvalue"
## [3] "under_represented_pvalue" "numDEInCat"
## [5] "numInCat" "term"
## [7] "ontology" "qvalue"
In the function ‘combined_de_tables()’ above, one of the tasks performed is to look at the agreement among DESeq2, limma, and edgeR. The following show a couple of these for the set of genes observed with a fold-change >= |2| and adjusted p-value <= 0.05.
zy_table_sva[["venns"]][[1]][["p_lfc1"]][["up_noweight"]]
zy_table_sva[["venns"]][[1]][["p_lfc1"]][["down_noweight"]]
zy_go_up[["pvalue_plots"]][["bpp_plot_over"]]
zy_go_down[["pvalue_plots"]][["bpp_plot_over"]]
Najib read me an email listing off the gene names associated with the zymodeme classification. I took those names and cross referenced them against the Leishmania panamensis gene annotations and found the following:
They are:
Given these 6 gene IDs (NH has two gene IDs associated with it), I can do some looking for specific differences among the various samples.
The following creates a colorspace (red to green) heatmap showing the observed expression of these genes in every sample.
my_genes <- c("LPAL13_120010900", "LPAL13_340013000", "LPAL13_000054100",
"LPAL13_140006100", "LPAL13_180018500", "LPAL13_320022300",
"other")
my_names <- c("ALAT", "ASAT", "G6PD", "NHv1", "NHv2", "MPI", "other")
zymo_expt <- exclude_genes_expt(zy_norm, ids = my_genes, method = "keep")
## remove_genes_expt(), before removal, there were 8558 genes, now there are 6.
## There are 83 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20065 TMRC20005 TMRC20066 TMRC20039 TMRC20037 TMRC20038 TMRC20067
## 0.13101 0.12475 0.13212 0.10576 0.12993 0.10996 0.11280 0.11629
## TMRC20068 TMRC20041 TMRC20015 TMRC20009 TMRC20010 TMRC20016 TMRC20011 TMRC20012
## 0.11537 0.11795 0.11463 0.11346 0.10972 0.10586 0.11013 0.12054
## TMRC20013 TMRC20017 TMRC20014 TMRC20018 TMRC20019 TMRC20070 TMRC20020 TMRC20021
## 0.12046 0.10627 0.10885 0.11452 0.12234 0.11244 0.11003 0.10613
## TMRC20022 TMRC20024 TMRC20036 TMRC20069 TMRC20033 TMRC20026 TMRC20031 TMRC20076
## 0.13059 0.11239 0.12013 0.11614 0.11254 0.13841 0.10009 0.12004
## TMRC20073 TMRC20055 TMRC20079 TMRC20071 TMRC20078 TMRC20094 TMRC20042 TMRC20058
## 0.12250 0.13474 0.12661 0.12320 0.13405 0.11729 0.13142 0.11794
## TMRC20072 TMRC20059 TMRC20048 TMRC20088 TMRC20060 TMRC20077 TMRC20074 TMRC20063
## 0.14322 0.11008 0.10298 0.12927 0.10836 0.12188 0.12063 0.11661
## TMRC20053 TMRC20052 TMRC20064 TMRC20075 TMRC20051 TMRC20050 TMRC20049 TMRC20062
## 0.11807 0.11032 0.11372 0.11096 0.12820 0.11525 0.13945 0.12844
## TMRC20110 TMRC20080 TMRC20043 TMRC20083 TMRC20054 TMRC20085 TMRC20046 TMRC20089
## 0.13858 0.11529 0.11351 0.12376 0.12761 0.12192 0.13680 0.11539
## TMRC20090 TMRC20044 TMRC20105 TMRC20109 TMRC20098 TMRC20096 TMRC20097 TMRC20101
## 0.11167 0.13379 0.12203 0.12670 0.11626 0.11655 0.11884 0.11886
## TMRC20092 TMRC20082 TMRC20099 TMRC20100 TMRC20087 TMRC20104 TMRC20086 TMRC20107
## 0.11555 0.10870 0.12198 0.11055 0.12326 0.11716 0.10977 0.09639
## TMRC20081 TMRC20106 TMRC20095
## 0.10449 0.09802 0.07963
zymo_heatmap <- plot_sample_heatmap(zymo_expt, row_label = my_names)
zymo_heatmap
In contrast, the following plots take the set of genes which are shared among all differential expression methods (|lfc| >= 1.0 and adjp <= 0.05) and use them to make categories of genes which are increased in 2.3 or 2.2.
shared_zymo <- intersect_significant(zy_table_sva)
## Deleting the file excel/intersect_significant.xlsx before writing the tables.
up_shared <- shared_zymo[["ups"]][[1]][["data"]][["all"]]
rownames(up_shared)
## [1] "LPAL13_000033300" "LPAL13_000012000" "LPAL13_000038500" "LPAL13_000012100"
## [5] "LPAL13_310031300" "LPAL13_000038400" "LPAL13_050005000" "LPAL13_340039600"
## [9] "LPAL13_310031000" "LPAL13_350063000" "LPAL13_310035500" "LPAL13_310039200"
## [13] "LPAL13_140019300" "LPAL13_180013900" "LPAL13_210015500" "LPAL13_340039700"
## [17] "LPAL13_170015400" "LPAL13_350013200" "LPAL13_250006300" "LPAL13_330024000"
## [21] "LPAL13_350073400" "LPAL13_140019100" "LPAL13_350012400" "LPAL13_210005000"
## [25] "LPAL13_320038700" "LPAL13_140019200" "LPAL13_240009700" "LPAL13_000052700"
## [29] "LPAL13_160014500" "LPAL13_230011200" "LPAL13_110007300" "LPAL13_330021800"
## [33] "LPAL13_250025700" "LPAL13_350073200" "LPAL13_040007800" "LPAL13_050009600"
## [37] "LPAL13_160014100" "LPAL13_230011500" "LPAL13_230011400" "LPAL13_310032500"
## [41] "LPAL13_020006700" "LPAL13_230011300" "LPAL13_310028500" "LPAL13_110015700"
## [45] "LPAL13_140015200"
upshared_expt <- exclude_genes_expt(zy_norm, ids = rownames(up_shared), method = "keep")
## remove_genes_expt(), before removal, there were 8558 genes, now there are 45.
## There are 83 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20065 TMRC20005 TMRC20066 TMRC20039 TMRC20037 TMRC20038 TMRC20067
## 0.3820 0.4541 0.1296 0.4152 0.1755 0.4444 0.5717 0.3505
## TMRC20068 TMRC20041 TMRC20015 TMRC20009 TMRC20010 TMRC20016 TMRC20011 TMRC20012
## 0.4157 0.1778 0.4587 0.1596 0.4407 0.3439 0.1740 0.1533
## TMRC20013 TMRC20017 TMRC20014 TMRC20018 TMRC20019 TMRC20070 TMRC20020 TMRC20021
## 0.3926 0.1854 0.1847 0.3676 0.1472 0.4299 0.1400 0.3947
## TMRC20022 TMRC20024 TMRC20036 TMRC20069 TMRC20033 TMRC20026 TMRC20031 TMRC20076
## 0.1466 0.1622 0.2068 0.1817 0.1675 0.1413 0.1313 0.1621
## TMRC20073 TMRC20055 TMRC20079 TMRC20071 TMRC20078 TMRC20094 TMRC20042 TMRC20058
## 0.5195 0.1947 0.5633 0.5266 0.2058 0.4444 0.1630 0.6361
## TMRC20072 TMRC20059 TMRC20048 TMRC20088 TMRC20060 TMRC20077 TMRC20074 TMRC20063
## 0.1894 0.3108 0.3467 0.1652 0.1457 0.1431 0.1875 0.1627
## TMRC20053 TMRC20052 TMRC20064 TMRC20075 TMRC20051 TMRC20050 TMRC20049 TMRC20062
## 0.1889 0.4666 0.4353 0.3627 0.6329 0.1664 0.1810 0.6454
## TMRC20110 TMRC20080 TMRC20043 TMRC20083 TMRC20054 TMRC20085 TMRC20046 TMRC20089
## 0.1651 0.4781 0.4394 0.1535 0.5741 0.4565 0.1786 0.3500
## TMRC20090 TMRC20044 TMRC20105 TMRC20109 TMRC20098 TMRC20096 TMRC20097 TMRC20101
## 0.5010 0.1823 0.5202 0.1419 0.4296 0.1681 0.1501 0.1520
## TMRC20092 TMRC20082 TMRC20099 TMRC20100 TMRC20087 TMRC20104 TMRC20086 TMRC20107
## 0.1528 0.4339 0.5033 0.4915 0.1464 0.5274 0.1244 0.3250
## TMRC20081 TMRC20106 TMRC20095
## 0.1310 0.1474 0.3672
We can plot a quick heatmap to get a sense of the differences observed between the genes which are different between the two zymodemes.
high_23_heatmap <- plot_sample_heatmap(upshared_expt, row_label = rownames(up_shared))
high_23_heatmap
down_shared <- shared_zymo[["downs"]][[1]][["data"]][["all"]]
downshared_expt <- exclude_genes_expt(zy_norm, ids = rownames(down_shared), method = "keep")
## remove_genes_expt(), before removal, there were 8558 genes, now there are 72.
## There are 83 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20065 TMRC20005 TMRC20066 TMRC20039 TMRC20037 TMRC20038 TMRC20067
## 0.2722 0.2195 0.7385 0.2646 0.7297 0.2471 0.2381 0.2732
## TMRC20068 TMRC20041 TMRC20015 TMRC20009 TMRC20010 TMRC20016 TMRC20011 TMRC20012
## 0.2400 0.7427 0.2245 0.7219 0.2075 0.2425 0.6361 0.6178
## TMRC20013 TMRC20017 TMRC20014 TMRC20018 TMRC20019 TMRC20070 TMRC20020 TMRC20021
## 0.2076 0.7378 0.7128 0.2025 0.7281 0.2203 0.7615 0.1944
## TMRC20022 TMRC20024 TMRC20036 TMRC20069 TMRC20033 TMRC20026 TMRC20031 TMRC20076
## 0.7718 0.8085 0.7476 0.7725 0.8176 0.7821 0.6668 0.6793
## TMRC20073 TMRC20055 TMRC20079 TMRC20071 TMRC20078 TMRC20094 TMRC20042 TMRC20058
## 0.2279 0.7993 0.2225 0.2098 0.6046 0.2204 0.6085 0.2555
## TMRC20072 TMRC20059 TMRC20048 TMRC20088 TMRC20060 TMRC20077 TMRC20074 TMRC20063
## 0.5997 0.1700 0.1866 0.6912 0.8300 0.6289 0.7540 0.7041
## TMRC20053 TMRC20052 TMRC20064 TMRC20075 TMRC20051 TMRC20050 TMRC20049 TMRC20062
## 0.6669 0.2101 0.2318 0.2134 0.2260 0.6841 0.7789 0.2244
## TMRC20110 TMRC20080 TMRC20043 TMRC20083 TMRC20054 TMRC20085 TMRC20046 TMRC20089
## 0.6208 0.1874 0.2059 0.6972 0.2434 0.1994 0.6962 0.1795
## TMRC20090 TMRC20044 TMRC20105 TMRC20109 TMRC20098 TMRC20096 TMRC20097 TMRC20101
## 0.2052 0.6935 0.2150 0.7554 0.2251 0.8318 0.6096 0.7053
## TMRC20092 TMRC20082 TMRC20099 TMRC20100 TMRC20087 TMRC20104 TMRC20086 TMRC20107
## 0.6574 0.2108 0.2167 0.1884 0.6159 0.2361 0.8149 0.1680
## TMRC20081 TMRC20106 TMRC20095
## 0.7189 1.0601 0.2706
high_22_heatmap <- plot_sample_heatmap(downshared_expt, row_label = rownames(down_shared))
high_22_heatmap
A recent suggestion included a query about the relationship of our amastigote TMRC2 samples which were the result of infecting a set of macrophages vs. these promastigote samples.
So far, we have kept these two experiments separate, now let us merge them.
macrophage_sheet <- "sample_sheets/tmrc2_macrophage_samples_202203.xlsx"
tmrc2_macrophage <- create_expt(macrophage_sheet,
file_column="lpanamensisv36hisatfile",
gene_info=all_lp_annot,
annotation="org.Lpanamensis.MHOMCOL81L13.v46.eg.db") %>%
set_expt_conditions(fact="macrophagezymodeme") %>%
set_expt_batches(fact="macrophagetreatment")
## Reading the sample metadata.
## The sample definitions comprises: 28 rows(samples) and 68 columns(metadata fields).
## Warning in create_expt(macrophage_sheet, file_column =
## "lpanamensisv36hisatfile", : Some samples were removed when cross referencing
## the samples against the count data.
## Matched 8778 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 8778 features and 22 samples.
tmrc2_macrophage_norm <- normalize_expt(tmrc2_macrophage, transform="log2", convert="cpm", norm="quant", filter=TRUE)
## Removing 0 low-count genes (8778 remaining).
## transform_counts: Found 1678 values equal to 0, adding 1 to the matrix.
all_tmrc2 <- combine_expts(lp_expt, tmrc2_macrophage)
all_nosb <- all_tmrc2
pData(all_nosb)[["stage"]] <- "promastigote"
na_idx <- is.na(pData(all_nosb)[["macrophagetreatment"]])
pData(all_nosb)[na_idx, "macrophagetreatment"] <- "undefined"
all_nosb <- subset_expt(all_nosb, subset="macrophagetreatment!='inf_sb'")
## subset_expt(): There were 123, now there are 111 samples.
ama_idx <- pData(all_nosb)[["macrophagetreatment"]] == "inf"
pData(all_nosb)[ama_idx, "stage" ] <- "amastigote"
pData(all_nosb)[["batch"]] <- pData(all_nosb)[["stage"]]
all_norm <- normalize_expt(all_nosb, convert="cpm", norm="quant", transform="log2", filter=TRUE)
## Removing 130 low-count genes (8648 remaining).
## transform_counts: Found 22 values equal to 0, adding 1 to the matrix.
plot_pca(all_norm)$plot
## plot labels was not set and there are more than 100 samples, disabling it.
I think the above picture is sort of the opposite of what we want to compare in a DE analysis for this set of data, e.g. we want to compare promastigotes from amastigotes?
all_nosb <- set_expt_batches(all_nosb, fact="condition") %>%
set_expt_conditions(fact="stage")
pro_ama <- all_pairwise(all_nosb, filter=TRUE, model_batch="svaseq")
## Removing 0 low-count genes (8648 remaining).
## Setting 3898 low elements to zero.
## transform_counts: Found 3898 values equal to 0, adding 1 to the matrix.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
pro_ama_table <- combine_de_tables(pro_ama, excel="excel/tmrc2_pro_vs_ama.xlsx")
## Deleting the file excel/tmrc2_pro_vs_ama.xlsx before writing the tables.
Over the last couple of weeks, I redid all the variant searches with a newer, (I think) more sensitive and more specific variant tool. In addition I changed my script which interprets the results so that it is able to extract any tags from it, instead of just the one or two that my previous script handled. In addition, at least in theory it is now able to provide the set of amino acid substitutions for every gene in species without or with introns (not really relevant for Leishmania panamensis).
However, as of this writing, I have not re-performed the same tasks with the 2016 data, primarily because it will require remapping all of the samples. As a result, for the moment I cannot combine the older and newer samples. Thus, any of the following blocks which use the 2016 data are currently disabled.
old_expt <- create_expt("sample_sheets/tmrc2_samples_20191203.xlsx",
file_column = "tophat2file")
## Reading the sample metadata.
## Dropped 13 rows from the sample metadata because the sample ID is blank.
## The sample definitions comprises: 50 rows(samples) and 38 columns(metadata fields).
## Warning in create_expt("sample_sheets/tmrc2_samples_20191203.xlsx", file_column
## = "tophat2file"): Some samples were removed when cross referencing the samples
## against the count data.
## Matched 8841 annotations and counts.
## Bringing together the count matrix and gene information.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 8841 features and 33 samples.
##tt <- lp_expt[["expressionset"]]
##rownames(tt) <- gsub(pattern = "^exon_", replacement = "", x = rownames(tt))
##rownames(tt) <- gsub(pattern = "\\.E1$", replacement = "", x = rownames(tt))
##lp_expt$expressionset <- tt
tt <- old_expt$expressionset
rownames(tt) <- gsub(pattern = "^exon_", replacement = "", x = rownames(tt))
rownames(tt) <- gsub(pattern = "\\.1$", replacement = "", x = rownames(tt))
old_expt$expressionset <- tt
rm(tt)
One other important caveat, we have a group of new samples which have not yet run through the variant search pipeline, so I need to remove them from consideration. Though it looks like they finished overnight…
## The next line drops the samples which are missing the SNP pipeline.
lp_snp <- subset_expt(lp_expt, subset="!is.na(pData(lp_expt)[['freebayessummary']])")
## subset_expt(): There were 101, now there are 101 samples.
new_snps <- sm(count_expt_snps(lp_snp, annot_column = "freebayessummary", snp_column="PAIRED"))
old_snps <- sm(count_expt_snps(old_expt, annot_column = "bcftable", snp_column = 2))
nonzero_snps <- exprs(new_snps) != 0
colSums(nonzero_snps)
## tmrc20001 tmrc20065 tmrc20005 tmrc20007 tmrc20008 tmrc20027 tmrc20028 tmrc20032
## 0 93649 0 0 0 351343 338580 146302
## tmrc20040 tmrc20066 tmrc20039 tmrc20037 tmrc20038 tmrc20067 tmrc20068 tmrc20041
## 58753 93615 25115 98958 97676 93954 96583 53184
## tmrc20015 tmrc20009 tmrc20010 tmrc20016 tmrc20011 tmrc20012 tmrc20013 tmrc20017
## 96398 15890 93816 146124 13914 456 94766 48288
## tmrc20014 tmrc20018 tmrc20019 tmrc20070 tmrc20020 tmrc20021 tmrc20022 tmrc20025
## 17245 140438 14829 97336 15484 101127 18143 364240
## tmrc20024 tmrc20036 tmrc20069 tmrc20033 tmrc20026 tmrc20031 tmrc20076 tmrc20073
## 18471 60087 18792 33663 15074 19139 18385 96169
## tmrc20055 tmrc20079 tmrc20071 tmrc20078 tmrc20094 tmrc20042 tmrc20058 tmrc20072
## 22246 96224 94353 18836 87878 19734 94524 50292
## tmrc20059 tmrc20048 tmrc20057 tmrc20088 tmrc20056 tmrc20060 tmrc20077 tmrc20074
## 94091 97164 48944 15594 22683 21506 18773 22132
## tmrc20063 tmrc20053 tmrc20052 tmrc20064 tmrc20075 tmrc20051 tmrc20050 tmrc20049
## 28254 20181 100709 93173 97982 94125 17200 16168
## tmrc20062 tmrc20110 tmrc20080 tmrc20043 tmrc20083 tmrc20054 tmrc20085 tmrc20046
## 93677 16997 96528 95623 21167 93603 89765 48608
## tmrc20093 tmrc20089 tmrc20047 tmrc20090 tmrc20044 tmrc20045 tmrc20061 tmrc20105
## 48254 90421 92637 91564 14861 50403 116906 86758
## tmrc20108 tmrc20109 tmrc20098 tmrc20096 tmrc20097 tmrc20101 tmrc20092 tmrc20082
## 97005 17932 92927 17534 46863 17753 16578 109577
## tmrc20102 tmrc20099 tmrc20100 tmrc20091 tmrc20084 tmrc20087 tmrc20103 tmrc20104
## 92380 91383 94381 15059 46548 14947 49368 94237
## tmrc20086 tmrc20107 tmrc20081 tmrc20106 tmrc20095
## 15813 95370 19533 18830 81200
## My old_snps is using an older annotation incorrectly, so fix it here:
Biobase::annotation(old_snps$expressionset) <- Biobase::annotation(new_snps$expressionset)
both_snps <- combine_expts(new_snps, old_snps)
both_norm <- normalize_expt(both_snps, transform = "log2", norm = "quant")
## Error in colSums(count_table): 'x' must be numeric
## strains <- both_norm[["design"]][["strain"]]
both_strain <- set_expt_conditions(both_norm, fact = "strain")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'pData': object 'both_norm' not found
The data structure ‘both_norm’ now contains our 2016 data along with the newer data collected since 2019.
The following plot shows the SNP profiles of all samples (old and new) where the colors at the top show either the 2.2 strains (orange), 2.3 strains (green), the previous samples (purple), or the various lab strains (pink etc).
new_variant_heatmap <- plot_disheat(new_snps)
## Warning in dist(t(expt_data)): NAs introduced by coercion
dev <- pp(file = "images/raw_snp_disheat.png", height=12, width=12)
new_variant_heatmap$plot
closed <- dev.off()
new_variant_heatmap$plot
The function get_snp_sets() takes the provided metadata factor (in this case ‘condition’) and looks for variants which are exclusive to each element in it. In this case, this is looking for differences between 2.2 and 2.3, as well as the set shared among them.
snp_sets <- get_snp_sets(both_snps, factor = "condition")
Biobase::annotation(old_expt$expressionset) = Biobase::annotation(lp_expt$expressionset)
both_expt <- combine_expts(lp_expt, old_expt)
snp_genes <- sm(snps_vs_genes(both_expt, snp_sets, expt_name_col = "chromosome"))
## I think we have some metrics here we can plot...
snp_subset <- snp_subset_genes(
both_expt, both_snps,
genes = c("LPAL13_120010900", "LPAL13_340013000", "LPAL13_000054100",
"LPAL13_140006100", "LPAL13_180018500", "LPAL13_320022300"))
zymo_heat <- plot_sample_heatmap(snp_subset, row_label = rownames(exprs(snp_subset)))
zymo_heat
Najib has asked a few times about the relationship between variants and DE genes. In subsequent conversations I figured out what he really wants to learn is variants in the UTR (most likely 5’) which might affect expression of genes. The following explicitly does not help this question, but is a paralog: is there a relationship between variants in the CDS and differential expression?
vars_df <- data.frame(ID = names(snp_genes$summary_by_gene), variants = as.numeric(snp_genes$summary_by_gene))
## Error in data.frame(ID = names(snp_genes$summary_by_gene), variants = as.numeric(snp_genes$summary_by_gene)): object 'snp_genes' not found
vars_df[["variants"]] <- log2(vars_df[["variants"]] + 1)
## Error in eval(expr, envir, enclos): object 'vars_df' not found
vars_by_de_gene <- merge(zy_df, vars_df, by.x="row.names", by.y="ID")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'y' in selecting a method for function 'merge': object 'vars_df' not found
cor.test(vars_by_de_gene$deseq_logfc, vars_by_de_gene$variants)
## Error in cor.test(vars_by_de_gene$deseq_logfc, vars_by_de_gene$variants): object 'vars_by_de_gene' not found
variants_wrt_logfc <- plot_linear_scatter(vars_by_de_gene[, c("deseq_logfc", "variants")])
## Error in data.frame(df[, c(1, 2)]): object 'vars_by_de_gene' not found
variants_wrt_logfc$scatter
## Error in eval(expr, envir, enclos): object 'variants_wrt_logfc' not found
## It looks like there might be some genes of interest, even though this is not actually
## the question of interest.
Didn’t I create a set of densities by chromosome? Oh I think they come in from get_snp_sets()
clinical_sets <- get_snp_sets(new_snps, factor = "clinicalresponse")
## The factor cure has 38 rows.
## Error in matrixStats::rowMedians(data[, columns], na.rm = TRUE): Argument 'x' must be of type logical, integer or numeric, not 'character'.
density_vec <- clinical_sets[["density"]]
## Error in eval(expr, envir, enclos): object 'clinical_sets' not found
chromosome_idx <- grep(pattern = "LpaL", x = names(density_vec))
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'grep': object 'density_vec' not found
density_df <- as.data.frame(density_vec[chromosome_idx])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'density_vec' not found
density_df[["chr"]] <- rownames(density_df)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'rownames': object 'density_df' not found
colnames(density_df) <- c("density_vec", "chr")
## Error in colnames(density_df) <- c("density_vec", "chr"): object 'density_df' not found
ggplot(density_df, aes_string(x = "chr", y = "density_vec")) +
ggplot2::geom_col() +
ggplot2::theme(axis.text = ggplot2::element_text(size = 10, colour = "black"),
axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5))
## Error in ggplot(density_df, aes_string(x = "chr", y = "density_vec")): object 'density_df' not found
## clinical_written <- write_variants(new_snps)
clinical_genes <- snps_vs_genes(lp_expt, clinical_sets, expt_name_col = "chromosome")
## Error in snps_vs_genes(lp_expt, clinical_sets, expt_name_col = "chromosome"): object 'clinical_sets' not found
snp_density <- merge(as.data.frame(clinical_genes[["summary_by_gene"]]),
as.data.frame(fData(lp_expt)),
by = "row.names")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'merge': error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'clinical_genes' not found
snp_density <- snp_density[, c(1, 2, 4, 15)]
## Error in eval(expr, envir, enclos): object 'snp_density' not found
colnames(snp_density) <- c("name", "snps", "product", "length")
## Error in colnames(snp_density) <- c("name", "snps", "product", "length"): object 'snp_density' not found
snp_density[["product"]] <- tolower(snp_density[["product"]])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'tolower': object 'snp_density' not found
snp_density[["length"]] <- as.numeric(snp_density[["length"]])
## Error in eval(expr, envir, enclos): object 'snp_density' not found
snp_density[["density"]] <- snp_density[["snps"]] / snp_density[["length"]]
## Error in eval(expr, envir, enclos): object 'snp_density' not found
snp_idx <- order(snp_density[["density"]], decreasing = TRUE)
## Error in eval(quote(list(...)), env): object 'snp_density' not found
snp_density <- snp_density[snp_idx, ]
## Error in eval(expr, envir, enclos): object 'snp_density' not found
removers <- c("amastin", "gp63", "leishmanolysin")
for (r in removers) {
drop_idx <- grepl(pattern = r, x = snp_density[["product"]])
snp_density <- snp_density[!drop_idx, ]
}
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'grepl': object 'snp_density' not found
## Filter these for [A|a]mastin gp63 Leishmanolysin
clinical_snps <- snps_intersections(lp_expt, clinical_sets, chr_column = "chromosome")
## Error in snps_intersections(lp_expt, clinical_sets, chr_column = "chromosome"): object 'clinical_sets' not found
fail_ref_snps <- as.data.frame(clinical_snps[["inters"]][["failure, reference strain"]])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'clinical_snps' not found
fail_ref_snps <- rbind(fail_ref_snps,
as.data.frame(clinical_snps[["inters"]][["failure"]]))
## Error in eval(quote(list(...)), env): object 'fail_ref_snps' not found
cure_snps <- as.data.frame(clinical_snps[["inters"]][["cure"]])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'clinical_snps' not found
head(fail_ref_snps)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'head': object 'fail_ref_snps' not found
head(cure_snps)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'head': object 'cure_snps' not found
write.csv(file="csv/cure_variants.txt", x=rownames(cure_snps))
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'rownames': object 'cure_snps' not found
write.csv(file="csv/fail_variants.txt", x=rownames(fail_ref_snps))
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'rownames': object 'fail_ref_snps' not found
annot <- fData(lp_expt)
clinical_interest <- as.data.frame(clinical_snps[["gene_summaries"]][["cure"]])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'clinical_snps' not found
clinical_interest <- merge(clinical_interest,
as.data.frame(clinical_snps[["gene_summaries"]][["failure, reference strain"]]),
by = "row.names")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'merge': object 'clinical_interest' not found
rownames(clinical_interest) <- clinical_interest[["Row.names"]]
## Error in eval(expr, envir, enclos): object 'clinical_interest' not found
clinical_interest[["Row.names"]] <- NULL
## Error in clinical_interest[["Row.names"]] <- NULL: object 'clinical_interest' not found
colnames(clinical_interest) <- c("cure_snps","fail_snps")
## Error in colnames(clinical_interest) <- c("cure_snps", "fail_snps"): object 'clinical_interest' not found
annot <- merge(annot, clinical_interest, by = "row.names")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'y' in selecting a method for function 'merge': object 'clinical_interest' not found
rownames(annot) <- annot[["Row.names"]]
annot[["Row.names"]] <- NULL
fData(lp_expt$expressionset) <- annot
The heatmap produced here should show the variants only for the zymodeme genes.
I am thinking that if we find clusters of locations which are variant, that might provide some PCR testing possibilities.
## Drop the 2.1, 2.4, unknown, and null
pruned_snps <- subset_expt(new_snps, subset="condition=='z2.2'|condition=='z2.3'")
## subset_expt(): There were 101, now there are 83 samples.
new_sets <- get_snp_sets(pruned_snps, factor = "zymodemecategorical")
## The factor z22 has 43 rows.
## Error in matrixStats::rowMedians(data[, columns], na.rm = TRUE): Argument 'x' must be of type logical, integer or numeric, not 'character'.
summary(new_sets)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'summary': object 'new_sets' not found
## 1000000: 2.2
## 0100000: 2.3
summary(new_sets[["intersections"]][["10"]])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'summary': object 'new_sets' not found
write.csv(file="csv/variants_22.csv", x=new_sets[["intersections"]][["10"]])
## Error in is.data.frame(x): object 'new_sets' not found
summary(new_sets[["intersections"]][["01"]])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'summary': object 'new_sets' not found
write.csv(file="csv/variants_23.csv", x=new_sets[["intersections"]][["01"]])
## Error in is.data.frame(x): object 'new_sets' not found
Thus we see that there are 3,553 variants associated with 2.2 and 81,589 associated with 2.3.
The following function uses the positional data to look for sequential mismatches associated with zymodeme in the hopes that there will be some regions which would provide good potential targets for a PCR-based assay.
sequential_variants <- function(snp_sets, conditions = NULL, minimum = 3, maximum_separation = 3) {
if (is.null(conditions)) {
conditions <- 1
}
intersection_sets <- snp_sets[["intersections"]]
intersection_names <- snp_sets[["set_names"]]
chosen_intersection <- 1
if (is.numeric(conditions)) {
chosen_intersection <- conditions
} else {
intersection_idx <- intersection_names == conditions
chosen_intersection <- names(intersection_names)[intersection_idx]
}
possible_positions <- intersection_sets[[chosen_intersection]]
position_table <- data.frame(row.names = possible_positions)
pat <- "^chr_(.+)_pos_(.+)_ref_.*$"
position_table[["chr"]] <- gsub(pattern = pat, replacement = "\\1", x = rownames(position_table))
position_table[["pos"]] <- as.numeric(gsub(pattern = pat, replacement = "\\2", x = rownames(position_table)))
position_idx <- order(position_table[, "chr"], position_table[, "pos"])
position_table <- position_table[position_idx, ]
position_table[["dist"]] <- 0
last_chr <- ""
for (r in 1:nrow(position_table)) {
this_chr <- position_table[r, "chr"]
if (r == 1) {
position_table[r, "dist"] <- position_table[r, "pos"]
last_chr <- this_chr
next
}
if (this_chr == last_chr) {
position_table[r, "dist"] <- position_table[r, "pos"] - position_table[r - 1, "pos"]
} else {
position_table[r, "dist"] <- position_table[r, "pos"]
}
last_chr <- this_chr
}
## Working interactively here.
doubles <- position_table[["dist"]] == 1
doubles <- position_table[doubles, ]
write.csv(doubles, "doubles.csv")
one_away <- position_table[["dist"]] == 2
one_away <- position_table[one_away, ]
write.csv(one_away, "one_away.csv")
two_away <- position_table[["dist"]] == 3
two_away <- position_table[two_away, ]
write.csv(two_away, "two_away.csv")
combined <- rbind(doubles, one_away)
combined <- rbind(combined, two_away)
position_idx <- order(combined[, "chr"], combined[, "pos"])
combined <- combined[position_idx, ]
this_chr <- ""
for (r in 1:nrow(combined)) {
this_chr <- combined[r, "chr"]
if (r == 1) {
combined[r, "dist_pair"] <- combined[r, "pos"]
last_chr <- this_chr
next
}
if (this_chr == last_chr) {
combined[r, "dist_pair"] <- combined[r, "pos"] - combined[r - 1, "pos"]
} else {
combined[r, "dist_pair"] <- combined[r, "pos"]
}
last_chr <- this_chr
}
dist_pair_maximum <- 1000
dist_pair_minimum <- 200
dist_pair_idx <- combined[["dist_pair"]] <= dist_pair_maximum &
combined[["dist_pair"]] >= dist_pair_minimum
remaining <- combined[dist_pair_idx, ]
no_weak_idx <- grepl(pattern="ref_(G|C)", x=rownames(remaining))
remaining <- remaining[no_weak_idx, ]
print(head(table(position_table[["dist"]])))
sequentials <- position_table[["dist"]] <= maximum_separation
message("There are ", sum(sequentials), " candidate regions.")
## The following can tell me how many runs of each length occurred, that is not quite what I want.
## Now use run length encoding to find the set of sequential sequentials!
rle_result <- rle(sequentials)
rle_values <- rle_result[["values"]]
## The following line is equivalent to just leaving values alone:
## true_values <- rle_result[["values"]] == TRUE
rle_lengths <- rle_result[["lengths"]]
true_sequentials <- rle_lengths[rle_values]
rle_idx <- cumsum(rle_lengths)[which(rle_values)]
position_table[["last_sequential"]] <- 0
count <- 0
for (r in rle_idx) {
count <- count + 1
position_table[r, "last_sequential"] <- true_sequentials[count]
}
message("The maximum sequential set is: ", max(position_table[["last_sequential"]]), ".")
wanted_idx <- position_table[["last_sequential"]] >= minimum
wanted <- position_table[wanted_idx, c("chr", "pos")]
return(wanted)
}
zymo22_sequentials <- sequential_variants(new_sets, conditions = "z22", minimum=1, maximum_separation=2)
dim(zymo22_sequentials)
## 7 candidate regions for zymodeme 2.2 -- thus I am betting that the reference strain is a 2.2
zymo23_sequentials <- sequential_variants(new_sets, conditions = "z23",
minimum = 2, maximum_separation = 2)
dim(zymo23_sequentials)
## In contrast, there are lots (587) of interesting regions for 2.3!
The first 4 candidate regions from my set of remaining: * Chr Pos. Distance * LpaL13-15 238433 448 * LpaL13-18 142844 613 * LpaL13-29 830342 252 * LpaL13-33 1331507 843
Lets define a couple of terms: * Third: Each of the 4 above positions. * Second: Third - Distance * End: Third + PrimerLen * Start: Second - Primerlen
In each instance, these are the last positions, so we want to grab three things:
## * LpaL13-15 238433 448
first_candidate_chr <- genome[["LpaL13_15"]]
primer_length <- 22
amplicon_length <- 448
first_candidate_third <- 238433
first_candidate_second <- first_candidate_third - amplicon_length
first_candidate_start <- first_candidate_second - primer_length
first_candidate_end <- first_candidate_third + primer_length
first_candidate_region <- subseq(first_candidate_chr, first_candidate_start, first_candidate_end)
first_candidate_region
first_candidate_5p <- subseq(first_candidate_chr, first_candidate_start, first_candidate_second)
as.character(first_candidate_5p)
first_candidate_3p <- spgs::reverseComplement(subseq(first_candidate_chr, first_candidate_third, first_candidate_end))
first_candidate_3p
## * LpaL13-18 142844 613
second_candidate_chr <- genome[["LpaL13_18"]]
primer_length <- 22
amplicon_length <- 613
second_candidate_third <- 142844
second_candidate_second <- second_candidate_third - amplicon_length
second_candidate_start <- second_candidate_second - primer_length
second_candidate_end <- second_candidate_third + primer_length
second_candidate_region <- subseq(second_candidate_chr, second_candidate_start, second_candidate_end)
second_candidate_region
second_candidate_5p <- subseq(second_candidate_chr, second_candidate_start, second_candidate_second)
as.character(second_candidate_5p)
second_candidate_3p <- spgs::reverseComplement(subseq(second_candidate_chr, second_candidate_third, second_candidate_end))
second_candidate_3p
## * LpaL13-29 830342 252
third_candidate_chr <- genome[["LpaL13_29"]]
primer_length <- 22
amplicon_length <- 252
third_candidate_third <- 830342
third_candidate_second <- third_candidate_third - amplicon_length
third_candidate_start <- third_candidate_second - primer_length
third_candidate_end <- third_candidate_third + primer_length
third_candidate_region <- subseq(third_candidate_chr, third_candidate_start, third_candidate_end)
third_candidate_region
third_candidate_5p <- subseq(third_candidate_chr, third_candidate_start, third_candidate_second)
as.character(third_candidate_5p)
third_candidate_3p <- spgs::reverseComplement(subseq(third_candidate_chr, third_candidate_third, third_candidate_end))
third_candidate_3p
## You are a garbage polypyrimidine tract.
## Which is actually interesting if the mutations mess it up.
## * LpaL13-33 1331507 843
fourth_candidate_chr <- genome[["LpaL13_33"]]
primer_length <- 22
amplicon_length <- 843
fourth_candidate_third <- 1331507
fourth_candidate_second <- fourth_candidate_third - amplicon_length
fourth_candidate_start <- fourth_candidate_second - primer_length
fourth_candidate_end <- fourth_candidate_third + primer_length
fourth_candidate_region <- subseq(fourth_candidate_chr, fourth_candidate_start, fourth_candidate_end)
fourth_candidate_region
fourth_candidate_5p <- subseq(fourth_candidate_chr, fourth_candidate_start, fourth_candidate_second)
as.character(fourth_candidate_5p)
fourth_candidate_3p <- spgs::reverseComplement(subseq(fourth_candidate_chr, fourth_candidate_third, fourth_candidate_end))
fourth_candidate_3p
I made a fun little function which should find regions which have lots of variants associated with a given experimental factor.
pheno <- subset_expt(lp_expt, subset = "condition=='z2.2'|condition=='z2.3'")
## subset_expt(): There were 101, now there are 83 samples.
pheno <- subset_expt(pheno, subset = "!is.na(pData(pheno)[['bcftable']])")
## subset_expt(): There were 83, now there are 55 samples.
pheno_snps <- sm(count_expt_snps(pheno, annot_column = "bcftable"))
fun_stuff <- snp_density_primers(
pheno_snps,
bsgenome = "BSGenome.Leishmania.panamensis.MHOMCOL81L13.v53",
gff = "reference/TriTrypDB-53_LpanamensisMHOMCOL81L13.gff")
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo = TRUE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo = TRUE)
## Returning a df with 16 columns and 35190 rows.
drop_scaffolds <- grepl(x = rownames(fun_stuff$favorites), pattern = "SCAF")
favorite_primer_regions <- fun_stuff[["favorites"]][!drop_scaffolds, ]
favorite_primer_regions[["bin"]] <- rownames(favorite_primer_regions)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Biostrings':
##
## collapse, intersect, setdiff, setequal, union
## The following object is masked from 'package:XVector':
##
## slice
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:hpgltools':
##
## combine
## The following object is masked from 'package:testthat':
##
## matches
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following object is masked from 'package:matrixStats':
##
## count
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
favorite_primer_regions <- favorite_primer_regions %>%
relocate(bin)
Here is my note from our meeting:
Cross reference primers to DE genes of 2.2/2.3 and/or resistance/suscpetible, add a column to the primer spreadsheet with the DE genes (in retrospect I am guessing this actually means to put the logFC as a column.
One nice thing, I did a semantic removal on the lp_expt, so the set of logFC/pvalues should not have any of the offending types; thus I should be able to automagically get rid of them in the merge.
logfc <- zy_table_sva[["data"]][["z23_vs_z22"]]
logfc_columns <- logfc[, c("deseq_logfc", "deseq_adjp")]
colnames(logfc_columns) <- c("z23_logfc", "z23_adjp")
new_table <- merge(favorite_primer_regions, logfc_columns,
by.x = "closest_gene_before_id", by.y = "row.names")
sus <- sus_table_sva[["data"]][["sensitive_vs_resistant"]]
sus_columns <- sus[, c("deseq_logfc", "deseq_adjp")]
colnames(sus_columns) <- c("sus_logfc", "sus_adjp")
new_table <- merge(new_table, sus_columns,
by.x = "closest_gene_before_id", by.y = "row.names") %>%
relocate(bin)
written <- write_xlsx(data=new_table,
excel="excel/favorite_primers_xref_zy_sus.xlsx")
We can cross reference the variants against the zymodeme status and plot a heatmap of the results and hopefully see how they separate.
snp_genes <- sm(snps_vs_genes(lp_expt, new_sets, expt_name_col = "chromosome"))
## Error in snps_vs_genes(lp_expt, new_sets, expt_name_col = "chromosome"): object 'new_sets' not found
clinical_colors_v2 <- list(
"z22" = "#0000cc",
"z23" = "#cc0000")
new_zymo_norm <- normalize_expt(pruned_snps, normq = "quant") %>%
set_expt_conditions(fact = "zymodemecategorical") %>%
set_expt_colors(clinical_colors_v2)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'pData': error in evaluating the argument 'object' in selecting a method for function 'pData': 'x' must be numeric
zymo_heat <- plot_disheat(new_zymo_norm)
## Error in plot_heatmap(expt_data, expt_colors = expt_colors, expt_design = expt_design, : object 'new_zymo_norm' not found
dev <- pp(file = "images/onlyz22_z23_snp_heatmap.pdf", width=12, height=12)
zymo_heat[["plot"]]
## Error in eval(expr, envir, enclos): object 'zymo_heat' not found
closed <- dev.off()
zymo_heat[["plot"]]
## Error in eval(expr, envir, enclos): object 'zymo_heat' not found
Now let us try to make a heatmap which includes some of the annotation data.
des <- both_norm[["design"]]
## Error in eval(expr, envir, enclos): object 'both_norm' not found
undef_idx <- is.na(des[["strain"]])
## Error in eval(expr, envir, enclos): object 'des' not found
des[undef_idx, "strain"] <- "unknown"
## Error in des[undef_idx, "strain"] <- "unknown": object 'des' not found
##hmcols <- colorRampPalette(c("yellow","black","darkblue"))(256)
correlations <- hpgl_cor(exprs(both_norm))
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'exprs': object 'both_norm' not found
na_idx <- is.na(correlations)
## Error in eval(expr, envir, enclos): object 'correlations' not found
correlations[na_idx] <- 0
## Error in correlations[na_idx] <- 0: object 'correlations' not found
zymo_missing_idx <- is.na(des[["zymodemecategorical"]])
## Error in eval(expr, envir, enclos): object 'des' not found
des[["zymodemecategorical"]] <- as.character(des[["zymodemecategorical"]])
## Error in eval(expr, envir, enclos): object 'des' not found
des[["clinicalcategorical"]] <- as.character(des[["clinicalcategorical"]])
## Error in eval(expr, envir, enclos): object 'des' not found
des[zymo_missing_idx, "zymodemecategorical"] <- "unknown"
## Error in des[zymo_missing_idx, "zymodemecategorical"] <- "unknown": object 'des' not found
mydendro <- list(
"clustfun" = hclust,
"lwd" = 2.0)
col_data <- as.data.frame(des[, c("zymodemecategorical", "clinicalcategorical")])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'des' not found
unknown_clinical <- is.na(col_data[["clinicalcategorical"]])
## Error in eval(expr, envir, enclos): object 'col_data' not found
row_data <- as.data.frame(des[, c("strain")])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'des' not found
colnames(col_data) <- c("zymodeme", "outcome")
## Error in colnames(col_data) <- c("zymodeme", "outcome"): object 'col_data' not found
col_data[unknown_clinical, "outcome"] <- "undefined"
## Error in col_data[unknown_clinical, "outcome"] <- "undefined": object 'col_data' not found
colnames(row_data) <- c("strain")
## Error in colnames(row_data) <- c("strain"): object 'row_data' not found
myannot <- list(
"Col" = list("data" = col_data),
"Row" = list("data" = row_data))
## Error in eval(expr, envir, enclos): object 'col_data' not found
myclust <- list("cuth" = 1.0,
"col" = BrewerClusterCol)
mylabs <- list(
"Row" = list("nrow" = 4),
"Col" = list("nrow" = 4))
hmcols <- colorRampPalette(c("darkblue", "beige"))(240)
zymo_annot_heat <- annHeatmap2(
correlations,
dendrogram = mydendro,
annotation = myannot,
cluster = myclust,
labels = mylabs,
## The following controls if the picture is symmetric
scale = "none",
col = hmcols)
## Error in annHeatmap2(correlations, dendrogram = mydendro, annotation = myannot, : object 'correlations' not found
dev <- pp(file = "images/dendro_heatmap.png", height = 20, width = 20)
plot(zymo_annot_heat)
## Error in plot(zymo_annot_heat): object 'zymo_annot_heat' not found
closed <- dev.off()
plot(zymo_annot_heat)
## Error in plot(zymo_annot_heat): object 'zymo_annot_heat' not found
Print the larger heatmap so that all the labels appear. Keep in mind that as we get more samples, this image needs to continue getting bigger.
big heatmap
xref_prop <- table(pheno_snps[["conditions"]])
pheno_snps$conditions
## [1] "z2.3" "z2.3" "z2.2" "z2.3" "z2.2" "z2.3" "z2.3" "z2.3" "z2.3" "z2.2"
## [11] "z2.3" "z2.2" "z2.3" "z2.3" "z2.2" "z2.2" "z2.3" "z2.2" "z2.2" "z2.3"
## [21] "z2.2" "z2.3" "z2.2" "z2.3" "z2.2" "z2.2" "z2.2" "z2.2" "z2.2" "z2.2"
## [31] "z2.2" "z2.3" "z2.2" "z2.3" "z2.3" "z2.2" "z2.2" "z2.3" "z2.2" "z2.3"
## [41] "z2.3" "z2.2" "z2.2" "z2.2" "z2.2" "z2.3" "z2.3" "z2.3" "z2.2" "z2.3"
## [51] "z2.3" "z2.3" "z2.3" "z2.2" "z2.2"
idx_tbl <- exprs(pheno_snps) > 5
new_tbl <- data.frame(row.names = rownames(exprs(pheno_snps)))
for (n in names(xref_prop)) {
new_tbl[[n]] <- 0
idx_cols <- which(pheno_snps[["conditions"]] == n)
prop_col <- rowSums(idx_tbl[, idx_cols]) / xref_prop[n]
new_tbl[n] <- prop_col
}
keepers <- grepl(x = rownames(new_tbl), pattern = "LpaL13")
new_tbl <- new_tbl[keepers, ]
new_tbl[["strong22"]] <- 1.001 - new_tbl[["z2.2"]]
new_tbl[["strong23"]] <- 1.001 - new_tbl[["z2.3"]]
s22_na <- new_tbl[["strong22"]] > 1
new_tbl[s22_na, "strong22"] <- 1
s23_na <- new_tbl[["strong23"]] > 1
new_tbl[s23_na, "strong23"] <- 1
new_tbl[["SNP"]] <- rownames(new_tbl)
new_tbl[["Chromosome"]] <- gsub(x = new_tbl[["SNP"]], pattern = "chr_(.*)_pos_.*", replacement = "\\1")
new_tbl[["Position"]] <- gsub(x = new_tbl[["SNP"]], pattern = ".*_pos_(\\d+)_.*", replacement = "\\1")
new_tbl <- new_tbl[, c("SNP", "Chromosome", "Position", "strong22", "strong23")]
library(CMplot)
## Much appreciate for using CMplot.
## Full description, Bug report, Suggestion and the latest codes:
## https://github.com/YinLiLin/CMplot
simplify <- new_tbl
simplify[["strong22"]] <- NULL
CMplot(simplify, bin.size = 100000)
## SNP-Density Plotting.
## Circular-Manhattan Plotting strong23.
## Rectangular-Manhattan Plotting strong23.
## QQ Plotting strong23.
## Plots are stored in: /mnt/cbcb/fs01_abelew/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019
CMplot(new_tbl, plot.type="m", multracks=TRUE, threshold = c(0.01, 0.05),
threshold.lwd=c(1,1), threshold.col=c("black","grey"),
amplify=TRUE, bin.size=10000,
chr.den.col=c("darkgreen", "yellow", "red"),
signal.col=c("red", "green", "blue"),
signal.cex=1, file="jpg", memo="", dpi=300, file.output=TRUE, verbose=TRUE)
## Multracks-Manhattan Plotting strong22.
## Multracks-Manhattan Plotting strong23.
## Multraits-Rectangular Plotting...(finished 73%)
Multraits-Rectangular Plotting...(finished 74%)
Multraits-Rectangular Plotting...(finished 75%)
Multraits-Rectangular Plotting...(finished 76%)
Multraits-Rectangular Plotting...(finished 77%)
Multraits-Rectangular Plotting...(finished 78%)
Multraits-Rectangular Plotting...(finished 79%)
Multraits-Rectangular Plotting...(finished 80%)
Multraits-Rectangular Plotting...(finished 81%)
Multraits-Rectangular Plotting...(finished 82%)
Multraits-Rectangular Plotting...(finished 83%)
Multraits-Rectangular Plotting...(finished 84%)
Multraits-Rectangular Plotting...(finished 85%)
Multraits-Rectangular Plotting...(finished 86%)
Multraits-Rectangular Plotting...(finished 87%)
Multraits-Rectangular Plotting...(finished 88%)
Multraits-Rectangular Plotting...(finished 89%)
Multraits-Rectangular Plotting...(finished 90%)
Multraits-Rectangular Plotting...(finished 91%)
Multraits-Rectangular Plotting...(finished 92%)
Multraits-Rectangular Plotting...(finished 93%)
Multraits-Rectangular Plotting...(finished 94%)
Multraits-Rectangular Plotting...(finished 95%)
Multraits-Rectangular Plotting...(finished 96%)
Multraits-Rectangular Plotting...(finished 97%)
Multraits-Rectangular Plotting...(finished 98%)
Multraits-Rectangular Plotting...(finished 99%)
Multraits-Rectangular Plotting...(finished 100%)
## Plots are stored in: /mnt/cbcb/fs01_abelew/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019
This tool looks a little opaque, but provides sample data with things that make sense to me and should be pretty easy to recapitulate in our data.
## For this, let us use the 'new_snps' data structure.
## Caveat here: these need to be coerced to numbers.
my_covariates <- pData(new_snps)[, c("zymodemecategorical", "clinicalcategorical")]
for (col in colnames(my_covariates)) {
my_covariates[[col]] <- as.numeric(as.factor(my_covariates[[col]]))
}
my_covariates <- t(my_covariates)
my_geneloc <- fData(lp_expt)[, c("gid", "chromosome", "start", "end")]
colnames(my_geneloc) <- c("geneid", "chr", "left", "right")
my_ge <- exprs(normalize_expt(lp_expt, transform = "log2", filter = TRUE, convert = "cpm"))
used_samples <- tolower(colnames(my_ge)) %in% colnames(exprs(new_snps))
my_ge <- my_ge[, used_samples]
my_snpsloc <- data.frame(rownames = rownames(exprs(new_snps)))
## Oh, caveat here: Because of the way I stored the data,
## I could have duplicate rows which presumably will make matrixEQTL sad
my_snpsloc[["chr"]] <- gsub(pattern = "^chr_(.+)_pos(.+)_ref_.*$", replacement = "\\1",
x = rownames(my_snpsloc))
my_snpsloc[["pos"]] <- gsub(pattern = "^chr_(.+)_pos(.+)_ref_.*$", replacement = "\\2",
x = rownames(my_snpsloc))
test <- duplicated(my_snpsloc)
## Each duplicated row would be another variant at that position;
## so in theory we would do a rle to number them I am guessing
## However, I do not have different variants so I think I can ignore this for the moment
## but will need to make my matrix either 0 or 1.
if (sum(test) > 0) {
message("There are: ", sum(duplicated), " duplicated entries.")
keep_idx <- ! test
my_snpsloc <- my_snpsloc[keep_idx, ]
}
my_snps <- exprs(new_snps)
one_idx <- my_snps > 0
my_snps[one_idx] <- 1
## Ok, at this point I think I have all the pieces which this method wants...
## Oh, no I guess not; it actually wants the data as a set of filenames...
library(MatrixEQTL)
write.table(my_snps, "eqtl/snps.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(my_snps, "eqtl/snps.tsv", )
write.table(my_snpsloc, "eqtl/snpsloc.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(my_snpsloc, "eqtl/snpsloc.tsv")
write.table(as.data.frame(my_ge), "eqtl/ge.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(as.data.frame(my_ge), "eqtl/ge.tsv")
write.table(as.data.frame(my_geneloc), "eqtl/geneloc.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(as.data.frame(my_geneloc), "eqtl/geneloc.tsv")
write.table(as.data.frame(my_covariates), "eqtl/covariates.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(as.data.frame(my_covariates), "eqtl/covariates.tsv")
useModel = modelLINEAR # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = "eqtl/snps.tsv"
snps_location_file_name = "eqtl/snpsloc.tsv"
expression_file_name = "eqtl/ge.tsv"
gene_location_file_name = "eqtl/geneloc.tsv"
covariates_file_name = "eqtl/covariates.tsv"
# Output file name
output_file_name_cis = tempfile()
output_file_name_tra = tempfile()
# Only associations significant at this level will be saved
pvOutputThreshold_cis = 0.1
pvOutputThreshold_tra = 0.1
# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric()
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6
## Load genotype data
snps = SlicedData$new()
snps$fileDelimiter = "\t" # the TAB character
snps$fileOmitCharacters = "NA" # denote missing values;
snps$fileSkipRows = 1 # one row of column labels
snps$fileSkipColumns = 1 # one column of row labels
snps$fileSliceSize = 2000 # read file in slices of 2,000 rows
snps$LoadFile(SNP_file_name)
## Load gene expression data
gene = SlicedData$new()
gene$fileDelimiter = "\t" # the TAB character
gene$fileOmitCharacters = "NA" # denote missing values;
gene$fileSkipRows = 1 # one row of column labels
gene$fileSkipColumns = 1 # one column of row labels
gene$fileSliceSize = 2000 # read file in slices of 2,000 rows
gene$LoadFile(expression_file_name)
## Load covariates
cvrt = SlicedData$new()
cvrt$fileDelimiter = "\t" # the TAB character
cvrt$fileOmitCharacters = "NA" # denote missing values;
cvrt$fileSkipRows = 1 # one row of column labels
cvrt$fileSkipColumns = 1 # one column of row labels
if(length(covariates_file_name) > 0) {
cvrt$LoadFile(covariates_file_name)
}
## Run the analysis
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE)
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE)
me = Matrix_eQTL_main(
snps = snps,
gene = gene,
cvrt = cvrt,
output_file_name = output_file_name_tra,
pvOutputThreshold = pvOutputThreshold_tra,
useModel = useModel,
errorCovariance = errorCovariance,
verbose = TRUE,
output_file_name.cis = output_file_name_cis,
pvOutputThreshold.cis = pvOutputThreshold_cis,
snpspos = snpspos,
genepos = genepos,
cisDist = cisDist,
pvalue.hist = "qqplot",
min.pv.by.genesnp = FALSE,
noFDRsaveMemory = FALSE);
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
message(paste0("Saving to ", savefile))
tmp <- sm(saveme(filename = savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 12cc39929e5afa444a4ddf2f7b7a0472713666e3
## This is hpgltools commit: Wed Jun 22 15:35:44 2022 -0400: 12cc39929e5afa444a4ddf2f7b7a0472713666e3
## Saving to tmrc2_02sample_estimation_v202206.rda.xz
tmp <- loadme(filename = savefile)