sample_sheet <- glue::glue("sample_sheets/tmrc2_samples_20210620.xlsx")
This document is intended to provide a general overview of the TMRC2 samples which have thus far been sequenced. In some cases, this includes only those samples starting in 2019; in other instances I am including our previous (2015-2016) samples.
In all cases the processing performed was:
The analyses in this document use the matrices of counts/gene from #3 and variants/position from #4 in order to provide some images and metrics describing the samples we have sequenced so far.
Everything which follows depends on the Existing TriTrypDB annotations revision 46, circa 2019. The following block loads a database of these annotations and turns it into a matrix where the rows are genes and columns are all the annotation types provided by TriTrypDB.
The same database was used to create a matrix of orthologous genes between L.panamensis and all of the other species in the TriTrypDB.
tt <- sm(library(EuPathDB))
tt <- sm(library(org.Lpanamensis.MHOMCOL81L13.v46.eg.db))
pan_db <- org.Lpanamensis.MHOMCOL81L13.v46.eg.db
all_fields <- columns(pan_db)
all_lp_annot <- sm(load_orgdb_annotations(
pan_db,
keytype = "gid",
fields = c("annot_gene_entrez_id", "annot_gene_name",
"annot_strand", "annot_chromosome", "annot_cds_length",
"annot_gene_product")))$genes
lp_go <- sm(load_orgdb_go(pan_db))
lp_lengths <- all_lp_annot[, c("gid", "annot_cds_length")]
colnames(lp_lengths) <- c("ID", "length")
all_lp_annot[["annot_gene_product"]] <- tolower(all_lp_annot[["annot_gene_product"]])
orthos <- sm(EuPathDB::extract_eupath_orthologs(db = pan_db))
hisat_annot <- all_lp_annot
## rownames(hisat_annot) <- paste0("exon_", rownames(hisat_annot), ".E1")
Resequence samples: TMRC20002, TMRC20006, TMRC20004 (maybe TMRC20008 and TMRC20029)
The process of sample estimation takes two primary inputs:
An expressionset is a data structure used in R to examine RNASeq data. It is comprised of annotations, metadata, and expression data. In the case of our processing pipeline, the location of the expression data is provided by the filenames in the metadata.
The first lines of the following block create the Expressionset. All of the following lines perform various normalizations and generate plots from it.
The following samples are much lower coverage:
20210610: I made some manual changes to the sample sheet which I downloaded, filling in some zymodeme with ‘unknown’
sanitize_columns <- c("passagenumber", "clinicalresponse", "clinicalcategorical",
"zymodemecategorical", "phenotypiccharacteristics")
lp_expt <- sm(create_expt(sample_sheet,
gene_info = hisat_annot,
id_column = "hpglidentifier",
file_column = "lpanamensisv36hisatfile")) %>%
set_expt_conditions(fact = "zymodemecategorical") %>%
subset_expt(nonzero = 8600) %>%
semantic_expt_filter(semantic = c("amastin", "gp63", "leishmanolysin"),
semantic_column = "annot_gene_product") %>%
sanitize_expt_metadata(columns = sanitize_columns) %>%
set_expt_factors(columns = sanitize_columns, class = "factor")
## The samples (and read coverage) removed when filtering 8600 non-zero genes are:
## TMRC20002 TMRC20004 TMRC20006 TMRC20029 TMRC20008
## 11681227 564812 6670348 1658096 6249790
## subset_expt(): There were 74, now there are 69 samples.
## semantic_expt_filter(): Removed 68 genes.
libsizes <- plot_libsize(lp_expt)
pp(file = "images/lp_expt_libsizes.png", image = libsizes$plot, width = 12, height = 9)
## I think samples 7,10 should be removed at minimum, probably also 9,11
nonzero <- plot_nonzero(lp_expt)
nonzero$plot
## Warning: ggrepel: 45 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
lp_box <- plot_boxplot(lp_expt)
## 5042 entries are 0. We are on a log scale, adding 1 to the data.
pp(file = "images/lp_expt_boxplot.png", image = lp_box, width = 12, height = 9)
filter_plot <- plot_libsize_prepost(lp_expt)
filter_plot$lowgene_plot
## Warning: Using alpha for a discrete variable is not advised.
filter_plot$count_plot
Najib’s favorite plots are of course the PCA/TNSE. These are nice to look at in order to get a sense of the relationships between samples. They also provide a good opportunity to see what happens when one applies different normalizations, surrogate analyses, filters, etc. In addition, one may set different experimental factors as the primary ‘condition’ (usually the color of plots) and surrogate ‘batches’.
Column ‘Q’ in the sample sheet, make a categorical version of it with these parameters:
starting <- as.numeric(pData(lp_expt)[["susceptibilityinfectionreduction32ugmlsbvhistoricaldata"]])
sus_categorical <- starting
na_idx <- is.na(starting)
sus_categorical[na_idx] <- "unknown"
resist_idx <- starting <= 0.35
sus_categorical[resist_idx] <- "resistant"
indeterminant_idx <- starting >= 0.36 & starting <= 0.48
sus_categorical[indeterminant_idx] <- "ambiguous"
susceptible_idx <- starting >= 0.49
sus_categorical[susceptible_idx] <- "sensitive"
pData(lp_expt$expressionset)[["sus_category"]] <- sus_categorical
clinical_samples <- lp_expt %>%
set_expt_batches(fact = sus_categorical)
clinical_norm <- sm(normalize_expt(clinical_samples, norm = "quant", transform = "log2",
convert = "cpm", batch = FALSE, filter = TRUE))
zymo_pca <- plot_pca(clinical_norm, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
pp(file = "images/zymo_pca_sus_shape.png", image = zymo_pca$plot)
zymo_3dpca <- plot_3d_pca(zymo_pca)
zymo_3dpca$plot
clinical_n <- sm(normalize_expt(clinical_samples, transform = "log2",
convert = "cpm", batch = FALSE, filter = TRUE))
zymo_tsne <- plot_tsne(clinical_n, plot_title = "TSNE of parasite expression values")
zymo_tsne$plot
clinical_nb <- normalize_expt(clinical_samples, convert = "cpm", transform = "log2",
filter = TRUE, batch = "svaseq")
## Removing 144 low-count genes (8566 remaining).
## batch_counts: Before batch/surrogate estimation, 904 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 3181 entries are 0<x<1: 1%.
## Setting 349 low elements to zero.
## transform_counts: Found 349 values equal to 0, adding 1 to the matrix.
clinical_nb_pca <- plot_pca(clinical_nb, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
pp(file = "images/clinical_nb_pca_sus_shape.png", image = clinical_nb_pca$plot)
clinical_nb_tsne <- plot_tsne(clinical_nb, plot_title = "TSNE of parasite expression values")
clinical_nb_tsne$plot
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
corheat <- plot_corheat(clinical_norm, plot_title = "Correlation heatmap of parasite
expression values
")
corheat$plot
plot_sm(clinical_norm)$plot
## Performing correlation.
cf_expt <- set_expt_conditions(lp_expt, fact = "clinicalcategorical") %>%
set_expt_batches(fact = sus_categorical)
cf_norm <- normalize_expt(cf_expt, convert = "cpm", transform = "log2",
norm = "quant", filter = TRUE)
## Removing 144 low-count genes (8566 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
start_cf <- plot_pca(cf_norm, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
pp(file = "images/cf_sus_shape.png", image = start_cf$plot)
cf_nb <- normalize_expt(cf_expt, convert = "cpm", transform = "log2",
norm = "quant", filter = TRUE, batch = "svaseq")
## Warning in normalize_expt(cf_expt, convert = "cpm", transform = "log2", :
## Quantile normalization and sva do not always play well together.
## Removing 144 low-count genes (8566 remaining).
## batch_counts: Before batch/surrogate estimation, 2 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 3869 entries are 0<x<1: 1%.
## Setting 185 low elements to zero.
## transform_counts: Found 185 values equal to 0, adding 1 to the matrix.
cf_nb_pca <- plot_pca(cf_nb, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
pp(file = "images/cf_sus_share_nb.png", image = cf_nb_pca$plot)
cf_norm <- normalize_expt(cf_expt, transform = "log2", convert = "cpm",
filter = TRUE, norm = "quant")
## Removing 144 low-count genes (8566 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
test <- pca_information(cf_norm,
expt_factors = c("clinicalcategorical", "zymodemecategorical",
"pathogenstrain", "passagenumber"),
num_components = 6, plot_pcas = TRUE)
test$anova_p
## PC1 PC2 PC3 PC4 PC5 PC6
## clinicalcategorical 0.1111934 0.33404 8.018e-03 0.02413 0.47581 0.592902
## zymodemecategorical 0.0005176 0.74336 1.232e-01 0.42741 0.43642 0.062042
## pathogenstrain 0.7086904 0.55723 3.573e-05 0.10951 0.02648 0.340918
## passagenumber 0.2663969 0.00033 3.641e-02 0.15180 0.03254 0.000199
test$cor_heatmap
sus_expt <- set_expt_conditions(lp_expt, fact = "sus_category") %>%
set_expt_batches(fact = "zymodemecategorical")
sus_norm <- normalize_expt(sus_expt, transform = "log2", convert = "cpm",
norm = "quant", filter = TRUE)
## Removing 144 low-count genes (8566 remaining).
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
sus_pca <- plot_pca(sus_norm, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
pp(file = "images/sus_norm_pca.png", image = sus_pca[["plot"]])
sus_nb <- normalize_expt(sus_expt, transform = "log2", convert = "cpm",
batch = "svaseq", filter = TRUE)
## Removing 144 low-count genes (8566 remaining).
## batch_counts: Before batch/surrogate estimation, 904 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 3181 entries are 0<x<1: 1%.
## Setting 217 low elements to zero.
## transform_counts: Found 217 values equal to 0, adding 1 to the matrix.
sus_nb_pca <- plot_pca(sus_nb, plot_title = "PCA of parasite expression values",
plot_labels = FALSE)
pp(file = "images/sus_nb_pca.png", image = sus_nb_pca[["plot"]])
At this time, we do not have very many samples, so the set of metrics/plots is fairly limited. There is really only one factor in the metadata which we can use for performing differential expression analyses, the ‘zymodeme’.
The following sections perform a series of analyses which seek to elucidate differences between the zymodemes 2.2 and 2.3 either through differential expression or variant profiles.
TODO: Do this with and without sva and compare the results.
zy_expt <- subset_expt(lp_expt, subset = "condition=='z2.2'|condition=='z2.3'")
## subset_expt(): There were 69, now there are 34 samples.
zy_norm <- normalize_expt(zy_expt, filter = TRUE, convert = "cpm", norm = "quant")
## Removing 166 low-count genes (8544 remaining).
zy_de_nobatch <- sm(all_pairwise(zy_expt, filter = TRUE, model_batch = "svaseq"))
zy_de <- sm(all_pairwise(zy_expt, filter = TRUE, model_batch = "svaseq"))
zy_table <- sm(combine_de_tables(zy_de, excel = glue::glue("excel/zy_tables-v{ver}.xlsx")))
zy_sig <- sm(extract_significant_genes(zy_table, excel = glue::glue("excel/zy_sig-v{ver}.xlsx")))
zy_table[["plots"]][["z23_vs_z22"]][["deseq_ma_plots"]][["plot"]]
In contrast, we can search for genes which are differentially expressed with respect to cure/failure status.
cf_de <- sm(all_pairwise(cf_expt, filter = TRUE, model_batch = "svaseq"))
cf_table <- sm(combine_de_tables(cf_de, excel = glue::glue("excel/cf_tables-v{ver}.xlsx")))
cf_sig <- sm(extract_significant_genes(cf_table, excel = glue::glue("excel/cf_sig-v{ver}.xlsx")))
Finally, we can use our category of susceptibility and look for genes which change from sensitive to resistant. Keep in mind, though, that for the moment we have a lot of ambiguous and unknown strains.
sus_de <- sm(all_pairwise(sus_expt, filter = TRUE, model_batch = "svaseq"))
sus_table <- sm(combine_de_tables(sus_de, excel = glue::glue("excel/sus_tables-v{ver}.xlsx")))
sus_sig <- sm(extract_significant_genes(sus_table, excel = glue::glue("excel/sus_sig-v{ver}.xlsx")))
knitr::kable(head(sus_sig$deseq$ups$sensitive_vs_resistant, n = 20))
gid | annotgeneproduct | annotgenetype | chromosome | start | end | strand | annotgeneentrezid | annotgenename | annotstrand | annotchromosome | annotcdslength | length | deseq_logfc | deseq_adjp | edger_logfc | edger_adjp | limma_logfc | limma_adjp | basic_nummed | basic_denmed | basic_numvar | basic_denvar | basic_logfc | basic_t | basic_p | basic_adjp | deseq_basemean | deseq_lfcse | deseq_stat | deseq_p | ebseq_fc | ebseq_logfc | ebseq_c1mean | ebseq_c2mean | ebseq_mean | ebseq_var | ebseq_postfc | ebseq_ppee | ebseq_ppde | ebseq_adjp | edger_logcpm | edger_lr | edger_p | limma_ave | limma_t | limma_b | limma_p | limma_adjp_ihw | deseq_adjp_ihw | edger_adjp_ihw | ebseq_adjp_ihw | basic_adjp_ihw | lfc_meta | lfc_var | lfc_varbymed | p_meta | p_var | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LPAL13_000044900 | LPAL13_000044900 | actin-related protein 2, putative | protein coding | LPAL13_SCAF000645 | 507 | 1685 | - | reverse | Not Assigned | 1179.0 | 1178 | 28.790 | 0e+00 | 13.450 | 0.0000 | 9.1400 | 0.2589 | 3.9680 | -4.1900 | 15.758 | 0.1218 | 8.1590 | 10.190 | 0.0000 | 0.0000 | 867.100 | 1.1860 | 24.290 | 0 | 117203.26 | 16.839 | 0.0000 | 1172.023 | 813.905 | 5.037e+05 | 327.867 | 1.0000 | 0e+00 | 1.0000 | 4.9820 | 55.270 | 0.0000 | 1.3800 | 1.5830 | -4.3100 | 0.1182 | 2.588e-01 | 3.794e-126 | 2.374e-10 | 0.000e+00 | 1.028e-07 | 14.700 | 7.202e+00 | 4.899e-01 | 3.940e-02 | 4.657e-03 | ||
LPAL13_350020000 | LPAL13_350020000 | hypothetical protein, conserved | protein coding | LpaL13_35 | 484442 | 484858 | - | reverse | 35 | 417.0 | 416 | 17.500 | 0e+00 | 2.942 | 0.0493 | -0.2957 | 0.6963 | -4.5670 | -4.1900 | 3.211 | 0.1218 | -0.3763 | -1.008 | 0.3224 | 0.4835 | 4.138 | 1.1860 | 14.750 | 0 | 294.37 | 8.201 | 0.0000 | 2.934 | 2.037 | 9.619e+01 | 1.618 | 1.0000 | 0e+00 | 1.0000 | -2.9030 | 6.485 | 0.0109 | -4.5860 | -0.6233 | -4.8550 | 0.5352 | 8.534e-01 | 2.079e-45 | 5.396e-02 | 0.000e+00 | 4.998e-01 | 5.380 | 5.332e+01 | 9.910e+00 | 1.820e-01 | 9.358e-02 | ||
LPAL13_190021900 | LPAL13_190021900 | hypothetical protein, conserved | protein coding | LpaL13_19 | 595931 | 597058 | - | reverse | 19 | 1128.0 | 1127 | 15.810 | 0e+00 | 1.768 | 0.2763 | -0.5523 | 0.4411 | -5.0370 | -4.1900 | 3.376 | 0.1218 | -0.8466 | -2.215 | 0.0352 | 0.1021 | 4.940 | 1.4840 | 10.660 | 0 | 515.35 | 9.009 | 0.0000 | 5.144 | 3.572 | 4.312e+02 | 2.597 | 1.0000 | 0e+00 | 1.0000 | -2.8270 | 2.303 | 0.1291 | -4.7900 | -1.1280 | -4.6280 | 0.2634 | 5.553e-01 | 2.793e-23 | 2.761e-01 | 0.000e+00 | 1.424e-01 | 4.609 | 5.058e+01 | 1.097e+01 | 1.308e-01 | 1.735e-02 | ||
LPAL13_000035800 | LPAL13_000035800 | hypothetical protein | protein coding | LPAL13_SCAF000500 | 737 | 1006 | - | reverse | Not Assigned | 270.0 | 269 | 14.830 | 0e+00 | 14.040 | 0.0000 | 10.4400 | 0.3129 | 5.3280 | -3.9500 | 15.105 | 0.4187 | 9.2790 | 11.580 | 0.0000 | 0.0000 | 2937.000 | 1.1750 | 12.620 | 0 | 29229.70 | 14.835 | 0.1385 | 4339.174 | 3013.358 | 1.388e+07 | 1210.588 | 0.0000 | 0e+00 | 0.0000 | 6.7360 | 79.660 | 0.0000 | 2.3090 | 1.4270 | -4.4450 | 0.1583 | 3.145e-01 | 7.141e-33 | 3.808e-15 | 0.000e+00 | 1.011e-08 | 15.280 | 1.846e+01 | 1.209e+00 | 5.277e-02 | 8.353e-03 | ||
LPAL13_320026300 | LPAL13_320026300 | hypothetical protein, conserved | protein coding | LpaL13_32 | 754268 | 755485 | - | reverse | 32 | 1218.0 | 1217 | 14.070 | 0e+00 | 13.230 | 0.0000 | 9.9100 | 0.3245 | 4.7860 | -4.0160 | 18.218 | 0.7007 | 8.8020 | 9.888 | 0.0000 | 0.0000 | 1581.000 | 1.1460 | 12.280 | 0 | 16393.43 | 14.001 | 0.1294 | 2285.921 | 1587.485 | 2.058e+06 | 618.456 | 0.0000 | 0e+00 | 0.0000 | 5.8450 | 71.220 | 0.0000 | 2.0570 | 1.3970 | -4.4710 | 0.1671 | 3.261e-01 | 2.635e-31 | 1.367e-13 | 0.000e+00 | 5.579e-08 | 12.060 | 3.904e+00 | 3.237e-01 | 5.570e-02 | 9.307e-03 | ||
LPAL13_000051300 | LPAL13_000051300 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000772 | 11 | 2344 | + | forward | Not Assigned | 2334.0 | 2333 | 8.967 | 0e+00 | 9.591 | 0.0000 | 3.8260 | 0.2711 | 0.2400 | -3.9870 | 10.249 | 0.5655 | 4.2270 | 6.223 | 0.0000 | 0.0000 | 143.300 | 1.2930 | 6.937 | 0 | 1623.33 | 10.665 | 0.0951 | 170.535 | 118.456 | 6.846e+04 | 52.767 | 0.0000 | 0e+00 | 0.0000 | 2.4300 | 31.780 | 0.0000 | -1.0670 | 1.5430 | -4.3460 | 0.1275 | 3.418e-01 | 1.942e-09 | 1.628e-06 | 0.000e+00 | 3.848e-05 | 7.556 | 4.714e+00 | 6.239e-01 | 4.250e-02 | 5.419e-03 | ||
LPAL13_000053200 | LPAL13_000053200 | hypothetical protein | protein coding | LPAL13_SCAF000804 | 5037 | 5249 | - | reverse | Not Assigned | 213.0 | 212 | 8.808 | 0e+00 | 10.230 | 0.0000 | 5.8820 | 0.0404 | 1.0040 | -4.1900 | 9.085 | 0.1218 | 5.1940 | 8.488 | 0.0000 | 0.0000 | 78.670 | 1.1190 | 7.873 | 0 | 12683.94 | 13.631 | 0.0000 | 126.829 | 88.076 | 7.967e+03 | 36.176 | 1.0000 | 0e+00 | 1.0000 | 1.5630 | 48.860 | 0.0000 | -0.9051 | 2.7470 | -2.9650 | 0.0077 | 4.049e-02 | 2.700e-12 | 1.331e-09 | 0.000e+00 | 9.009e-07 | 9.230 | 3.524e+00 | 3.818e-01 | 2.565e-03 | 1.974e-05 | ||
LPAL13_000040700 | LPAL13_000040700 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000598 | 54 | 1067 | + | forward | Not Assigned | 1014.0 | 1013 | 6.621 | 0e+00 | 8.066 | 0.0000 | 2.9160 | 0.1304 | -1.2440 | -4.1900 | 6.505 | 0.1218 | 2.9470 | 5.658 | 0.0000 | 0.0002 | 21.230 | 1.1850 | 5.589 | 0 | 2601.37 | 11.345 | 0.0000 | 26.004 | 18.058 | 4.051e+02 | 7.843 | 1.0000 | 0e+00 | 1.0000 | -0.1262 | 29.820 | 0.0000 | -2.3940 | 2.0710 | -3.8370 | 0.0422 | 1.303e-01 | 2.394e-06 | 3.756e-06 | 0.000e+00 | 1.737e-04 | 5.924 | 2.243e+00 | 3.786e-01 | 1.407e-02 | 5.936e-04 | ||
LPAL13_000017600 | LPAL13_000017600 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000146 | 359 | 586 | + | forward | Not Assigned | 228.0 | 227 | 6.561 | 0e+00 | 6.541 | 0.0000 | 5.8580 | 0.0947 | 4.4470 | -1.1880 | 4.357 | 2.8136 | 5.6350 | 8.593 | 0.0000 | 0.0000 | 641.800 | 0.6968 | 9.416 | 0 | 80.11 | 6.324 | 12.0678 | 967.579 | 675.617 | 3.961e+05 | 63.477 | 0.0000 | 1e+00 | 0.0000 | 4.5460 | 53.840 | 0.0000 | 2.3770 | 2.2720 | -3.4020 | 0.0263 | 9.437e-02 | 9.787e-18 | 2.802e-10 | 9.782e-01 | 1.186e-06 | 7.024 | 5.957e+00 | 8.482e-01 | 8.770e-03 | 2.307e-04 | ||
LPAL13_300029400 | LPAL13_300029400 | hypothetical protein, conserved | protein coding | LpaL13_30 | 853953 | 854150 | - | reverse | 30 | 198.0 | 197 | 6.188 | 0e+00 | 6.104 | 0.0000 | 4.8200 | 0.0056 | 1.7310 | -2.3560 | 1.486 | 1.8118 | 4.0860 | 8.631 | 0.0000 | 0.0000 | 90.410 | 0.7565 | 8.179 | 0 | 59.50 | 5.895 | 2.0963 | 125.321 | 87.669 | 9.233e+03 | 22.740 | 0.0000 | 1e+00 | 0.0000 | 1.7260 | 47.780 | 0.0000 | 0.0613 | 3.6880 | -1.0330 | 0.0005 | 6.288e-03 | 4.385e-13 | 1.950e-09 | 9.220e-01 | 9.535e-06 | 5.956 | 9.318e-01 | 1.564e-01 | 1.510e-04 | 6.840e-08 | ||
LPAL13_080010600 | LPAL13_080010600 | hypothetical protein, conserved | protein coding | LpaL13_08 | 195555 | 195749 | - | reverse | 8 | 195.0 | 194 | 6.085 | 0e+00 | 7.559 | 0.0000 | 2.3050 | 0.0851 | -1.8760 | -4.1900 | 4.928 | 0.1218 | 2.3150 | 5.072 | 0.0000 | 0.0005 | 11.600 | 1.1850 | 5.135 | 0 | 1847.08 | 10.851 | 0.0000 | 18.461 | 12.820 | 4.800e+02 | 6.091 | 1.0000 | 0e+00 | 1.0000 | -0.9235 | 25.500 | 0.0000 | -3.1370 | 2.3350 | -3.5650 | 0.0226 | 1.065e-01 | 2.046e-05 | 2.278e-05 | 0.000e+00 | 7.148e-04 | 5.036 | 4.676e+00 | 9.285e-01 | 7.517e-03 | 1.695e-04 | ||
LPAL13_000011700 | LPAL13_000011700 | hypothetical protein | protein coding | LPAL13_SCAF000076 | 101 | 364 | - | reverse | Not Assigned | 264.0 | 263 | 6.023 | 0e+00 | 7.515 | 0.0000 | 2.6230 | 0.0730 | -1.3920 | -4.1900 | 6.768 | 0.1218 | 2.7980 | 5.271 | 0.0000 | 0.0004 | 14.620 | 1.2020 | 5.009 | 0 | 2444.22 | 11.255 | 0.0000 | 24.432 | 16.967 | 4.690e+02 | 7.539 | 1.0000 | 0e+00 | 1.0000 | -0.5872 | 24.350 | 0.0000 | -2.9280 | 2.4220 | -3.4300 | 0.0181 | 8.226e-02 | 3.391e-05 | 3.780e-05 | 0.000e+00 | 5.174e-04 | 4.986 | 1.302e+00 | 2.610e-01 | 6.037e-03 | 1.093e-04 | ||
LPAL13_040019400 | LPAL13_040019400 | hypothetical protein | protein coding | LpaL13_04 | 440768 | 441127 | - | reverse | 4 | 360.0 | 359 | 5.619 | 0e+00 | 5.475 | 0.0000 | 3.4630 | 0.0330 | -0.4122 | -3.4870 | 1.700 | 1.1787 | 3.0750 | 7.348 | 0.0000 | 0.0000 | 35.230 | 0.9680 | 5.804 | 0 | 48.41 | 5.597 | 0.6939 | 34.070 | 23.871 | 1.800e+03 | 8.712 | 0.0000 | 0e+00 | 0.0000 | 0.3962 | 28.430 | 0.0000 | -1.6470 | 2.8520 | -2.8270 | 0.0058 | 3.288e-02 | 8.378e-07 | 6.754e-06 | 0.000e+00 | 1.261e-05 | 4.858 | 5.010e-02 | 1.031e-02 | 1.921e-03 | 1.107e-05 | ||
LPAL13_350011800 | LPAL13_350011800 | hypothetical protein, conserved | protein coding | LpaL13_35 | 171009 | 171242 | + | forward | 35 | 234.0 | 233 | 5.114 | 0e+00 | 5.092 | 0.0000 | 4.2440 | 0.0085 | 2.8760 | -0.8274 | 2.432 | 0.1624 | 3.7030 | 11.060 | 0.0000 | 0.0000 | 180.500 | 0.5840 | 8.757 | 0 | 31.92 | 4.997 | 9.4332 | 301.450 | 212.223 | 5.943e+04 | 24.041 | 0.0000 | 1e+00 | 0.0000 | 2.7130 | 52.890 | 0.0000 | 1.2340 | 3.4990 | -0.9341 | 0.0008 | 8.501e-03 | 3.221e-15 | 3.187e-10 | 9.139e-01 | 7.140e-09 | 4.970 | 1.003e+00 | 2.017e-01 | 2.770e-04 | 2.301e-07 | ||
LPAL13_080010800 | LPAL13_080010800 | hypothetical protein | protein coding | LpaL13_08 | 199409 | 199792 | - | reverse | 8 | 384.0 | 383 | 5.103 | 1e-04 | 6.605 | 0.0000 | 1.6340 | 0.2740 | -2.3530 | -4.1900 | 4.142 | 0.1218 | 1.8380 | 4.371 | 0.0002 | 0.0020 | 10.840 | 1.0900 | 4.681 | 0 | 1048.70 | 10.034 | 0.0000 | 10.477 | 7.276 | 1.077e+02 | 3.577 | 1.0000 | 0e+00 | 1.0000 | -0.8700 | 24.340 | 0.0000 | -3.1120 | 1.5350 | -4.3530 | 0.1294 | 3.452e-01 | 8.581e-05 | 3.781e-05 | 0.000e+00 | 2.099e-03 | 4.021 | 3.492e+00 | 8.685e-01 | 4.313e-02 | 5.581e-03 | ||
LPAL13_170014500 | LPAL13_170014500 | hypothetical protein, conserved | protein coding | LpaL13_17 | 361708 | 362040 | + | forward | 17 | 333.0 | 332 | 5.076 | 0e+00 | 4.987 | 0.0003 | 2.7700 | 0.0577 | -0.6435 | -3.1940 | 6.976 | 1.6024 | 2.5510 | 3.914 | 0.0004 | 0.0040 | 22.230 | 0.9987 | 5.082 | 0 | 43.02 | 5.427 | 1.0218 | 44.379 | 31.131 | 1.643e+03 | 10.334 | 0.0000 | 0e+00 | 0.0000 | -0.2522 | 18.620 | 0.0000 | -2.3990 | 2.5520 | -3.1990 | 0.0130 | 5.776e-02 | 1.753e-05 | 3.458e-04 | 0.000e+00 | 5.619e-03 | 4.250 | 5.433e-03 | 1.278e-03 | 4.329e-03 | 5.600e-05 | ||
LPAL13_200050100 | LPAL13_200050100 | hypothetical protein | protein coding | LpaL13_20.1 | 1627529 | 1627717 | + | forward | 20.1 | 189.0 | 188 | 4.805 | 0e+00 | 4.777 | 0.0000 | 4.8970 | 0.0029 | 2.4590 | -1.9470 | 1.007 | 2.4930 | 4.4060 | 8.528 | 0.0000 | 0.0000 | 121.100 | 0.6203 | 7.746 | 0 | 26.30 | 4.717 | 7.3797 | 194.359 | 137.226 | 2.167e+04 | 18.260 | 0.0000 | 1e+00 | 0.0000 | 2.1650 | 42.950 | 0.0000 | 0.8115 | 3.9780 | -0.2381 | 0.0002 | 2.978e-03 | 6.748e-12 | 1.553e-08 | 9.782e-01 | 3.559e-05 | 5.158 | 3.490e+00 | 6.767e-01 | 5.737e-05 | 9.873e-09 | ||
LPAL13_000011800 | LPAL13_000011800 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000076 | 446 | 640 | - | reverse | Not Assigned | 195.0 | 194 | 4.650 | 3e-04 | 5.124 | 0.0006 | 1.0280 | 0.4707 | -2.5010 | -3.9760 | 3.657 | 0.6133 | 1.4750 | 3.282 | 0.0024 | 0.0148 | 11.660 | 1.0740 | 4.330 | 0 | 56.34 | 5.816 | 0.1523 | 9.134 | 6.389 | 7.337e+01 | 2.950 | 0.9995 | 5e-04 | 0.9995 | -0.8430 | 17.460 | 0.0000 | -3.0630 | 1.0660 | -4.6780 | 0.2901 | 4.710e-01 | 4.381e-04 | 5.725e-04 | 1.052e-02 | 1.691e-02 | 3.222 | 3.146e+00 | 9.765e-01 | 9.671e-02 | 2.805e-02 | ||
LPAL13_000014000 | LPAL13_000014000 | hypothetical protein | protein coding | LPAL13_SCAF000119 | 655 | 942 | + | forward | Not Assigned | 288.0 | 287 | 4.385 | 0e+00 | 4.368 | 0.0000 | 3.9720 | 0.0193 | 2.4360 | -1.2160 | 1.511 | 2.0003 | 3.6520 | 7.419 | 0.0000 | 0.0000 | 131.400 | 0.5415 | 8.097 | 0 | 19.75 | 4.304 | 9.7344 | 192.485 | 136.645 | 1.297e+04 | 14.798 | 0.0000 | 1e+00 | 0.0000 | 2.2840 | 48.520 | 0.0000 | 1.1030 | 3.1230 | -1.8390 | 0.0026 | 2.154e-02 | 7.791e-13 | 1.472e-09 | 9.782e-01 | 5.140e-05 | 4.454 | 1.205e+00 | 2.705e-01 | 8.787e-04 | 2.316e-06 | ||
LPAL13_000026500 | LPAL13_000026500 | hypothetical protein | protein coding | LPAL13_SCAF000301 | 144 | 494 | - | reverse | Not Assigned | 351.0 | 350 | 4.326 | 0e+00 | 4.262 | 0.0001 | 2.3650 | 0.2373 | 0.1451 | -2.4630 | 5.401 | 2.0725 | 2.6080 | 4.101 | 0.0003 | 0.0031 | 46.180 | 0.8041 | 5.380 | 0 | 21.34 | 4.415 | 2.5893 | 55.452 | 39.299 | 1.544e+03 | 9.481 | 0.0000 | 0e+00 | 0.0000 | 0.9106 | 22.390 | 0.0000 | -0.8720 | 1.6500 | -4.2470 | 0.1035 | 2.971e-01 | 4.800e-06 | 8.635e-05 | 0.000e+00 | 3.664e-03 | 3.528 | 1.101e-01 | 3.122e-02 | 3.450e-02 | 3.571e-03 |
knitr::kable(head(sus_sig$deseq$downs$sensitive_vs_resistant, n = 20))
gid | annotgeneproduct | annotgenetype | chromosome | start | end | strand | annotgeneentrezid | annotgenename | annotstrand | annotchromosome | annotcdslength | length | deseq_logfc | deseq_adjp | edger_logfc | edger_adjp | limma_logfc | limma_adjp | basic_nummed | basic_denmed | basic_numvar | basic_denvar | basic_logfc | basic_t | basic_p | basic_adjp | deseq_basemean | deseq_lfcse | deseq_stat | deseq_p | ebseq_fc | ebseq_logfc | ebseq_c1mean | ebseq_c2mean | ebseq_mean | ebseq_var | ebseq_postfc | ebseq_ppee | ebseq_ppde | ebseq_adjp | edger_logcpm | edger_lr | edger_p | limma_ave | limma_t | limma_b | limma_p | limma_adjp_ihw | deseq_adjp_ihw | edger_adjp_ihw | ebseq_adjp_ihw | basic_adjp_ihw | lfc_meta | lfc_var | lfc_varbymed | p_meta | p_var | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LPAL13_000033300 | LPAL13_000033300 | hypothetical protein, conserved | protein coding | LPAL13_SCAF000463 | 551 | 811 | + | forward | Not Assigned | 261.0 | 260 | -5.322 | 0.0007 | -5.223 | 0.0020 | -5.983 | 0.0000 | -3.7320 | 3.4740 | 10.6232 | 0.0632 | -7.206 | -10.980 | 0 | 0e+00 | 132.20 | 1.2910 | -4.124 | 0.0000 | 0.1238 | -3.013 | 311.93 | 38.621 | 122.131 | 2.245e+04 | 0.1327 | 0.0000 | 0.0000 | 0.0000 | 2.2630 | 14.420 | 0.0001 | -1.1130 | -5.851 | 5.7350 | 0e+00 | 2.516e-05 | 8.893e-04 | 1.987e-03 | 0.000e+00 | 2.663e-08 | -5.509 | 0.000e+00 | 0.000e+00 | 6.127e-05 | 5.779e-09 | ||
LPAL13_000038400 | LPAL13_000038400 | expression-site associated gene (esag3), putative | protein coding | LPAL13_SCAF000573 | 101 | 1360 | + | forward | Not Assigned | 1260.0 | 1259 | -2.799 | 0.0000 | -2.809 | 0.0000 | -3.316 | 0.0001 | 4.6320 | 8.2380 | 3.1190 | 0.0291 | -3.606 | -10.100 | 0 | 0e+00 | 3715.00 | 0.5536 | -5.056 | 0.0000 | 0.1737 | -2.526 | 8823.76 | 1532.426 | 3760.333 | 1.554e+07 | 0.1785 | 0.0000 | 0.0000 | 0.0000 | 7.0750 | 28.830 | 0.0000 | 5.8210 | -5.237 | 4.7890 | 0e+00 | 1.258e-04 | 2.334e-05 | 5.627e-06 | 0.000e+00 | 7.122e-08 | -2.958 | 9.900e-03 | -3.347e-03 | 7.520e-07 | 7.760e-13 | ||
LPAL13_350063000 | LPAL13_350063000 | hypothetical protein | protein coding | LpaL13_35 | 1964328 | 1964543 | - | reverse | 35 | 216.0 | 215 | -2.781 | 0.0000 | -2.773 | 0.0000 | -3.437 | 0.0000 | -2.3210 | 1.2090 | 2.1742 | 0.2275 | -3.530 | -10.760 | 0 | 0e+00 | 21.51 | 0.4794 | -5.801 | 0.0000 | 0.1384 | -2.853 | 55.15 | 7.623 | 22.146 | 6.307e+02 | 0.1596 | 0.0000 | 1.0000 | 0.0000 | -0.3596 | 32.400 | 0.0000 | -1.4690 | -6.951 | 8.4370 | 0e+00 | 2.404e-06 | 7.052e-07 | 1.210e-06 | 1.000e+00 | 7.140e-09 | -3.001 | 1.424e-04 | -4.744e-05 | 6.983e-09 | 2.915e-17 | ||
LPAL13_140019300 | LPAL13_140019300 | bt1 family, putative | protein coding | LpaL13_14 | 530784 | 531350 | + | forward | 14 | 567.0 | 566 | -2.639 | 0.0000 | -2.644 | 0.0000 | -2.458 | 0.0000 | 4.6450 | 7.0660 | 0.4796 | 1.1129 | -2.421 | -6.978 | 0 | 2e-04 | 1878.00 | 0.3824 | -6.901 | 0.0000 | 0.1702 | -2.555 | 4611.26 | 784.782 | 1953.985 | 5.164e+06 | 0.1758 | 0.0000 | 1.0000 | 0.0000 | 6.0910 | 54.610 | 0.0000 | 5.3840 | -6.676 | 10.3300 | 0e+00 | 3.699e-06 | 1.409e-09 | 2.374e-10 | 1.000e+00 | 1.516e-04 | -2.635 | 1.954e-01 | -7.417e-02 | 1.855e-09 | 1.029e-17 | ||
LPAL13_000012000 | LPAL13_000012000 | hypothetical protein | protein coding | LPAL13_SCAF000080 | 710 | 1159 | - | reverse | Not Assigned | 450.0 | 449 | -2.602 | 0.0007 | -2.610 | 0.0004 | -3.114 | 0.0031 | 0.1005 | 3.9570 | 7.6499 | 0.1802 | -3.856 | -6.792 | 0 | 0e+00 | 210.50 | 0.6351 | -4.097 | 0.0000 | 0.2237 | -2.160 | 451.41 | 100.977 | 208.054 | 4.893e+04 | 0.2352 | 0.1870 | 0.8130 | 0.1870 | 2.9370 | 18.490 | 0.0000 | 1.3640 | -3.956 | 0.6667 | 2e-04 | 3.089e-03 | 8.965e-04 | 3.655e-04 | 7.672e-01 | 1.860e-05 | -2.741 | 3.114e-02 | -1.136e-02 | 8.143e-05 | 8.259e-09 | ||
LPAL13_310039200 | LPAL13_310039200 | hypothetical protein | protein coding | LpaL13_31 | 1301745 | 1301972 | - | reverse | 31 | 228.0 | 227 | -2.372 | 0.0000 | -2.379 | 0.0000 | -2.403 | 0.0000 | 1.2220 | 3.7790 | 1.3487 | 0.2180 | -2.557 | -9.416 | 0 | 0e+00 | 195.60 | 0.4028 | -5.888 | 0.0000 | 0.2457 | -2.025 | 396.35 | 97.357 | 188.716 | 3.434e+04 | 0.2570 | 0.4434 | 0.5566 | 0.4434 | 2.8330 | 37.170 | 0.0000 | 2.0290 | -5.735 | 6.6830 | 0e+00 | 3.061e-05 | 6.831e-07 | 1.937e-07 | 6.151e-01 | 2.663e-08 | -2.489 | 1.173e-01 | -4.714e-02 | 8.503e-08 | 2.044e-14 | ||
LPAL13_000012100 | LPAL13_000012100 | hypothetical protein | protein coding | LPAL13_SCAF000080 | 1637 | 1894 | - | reverse | Not Assigned | 258.0 | 257 | -2.248 | 0.0096 | -2.249 | 0.0089 | -3.387 | 0.0004 | -2.2050 | 1.2020 | 6.3050 | 0.6808 | -3.407 | -6.079 | 0 | 0e+00 | 31.28 | 0.6975 | -3.223 | 0.0013 | 0.2746 | -1.865 | 66.02 | 18.121 | 32.755 | 1.805e+03 | 0.3056 | 0.0536 | 0.9464 | 0.0536 | 0.2198 | 10.740 | 0.0010 | -1.4250 | -4.811 | 2.5060 | 0e+00 | 3.761e-04 | 9.729e-03 | 8.888e-03 | 9.371e-01 | 4.288e-05 | -2.655 | 4.154e-02 | -1.565e-02 | 7.749e-04 | 4.519e-07 | ||
LPAL13_310031000 | LPAL13_310031000 | hypothetical protein, conserved | protein coding | LpaL13_31 | 1075172 | 1075459 | - | reverse | 31 | 288.0 | 287 | -2.241 | 0.0000 | -2.241 | 0.0000 | -2.859 | 0.0000 | -2.0160 | 1.0070 | 3.4877 | 0.5506 | -3.023 | -6.944 | 0 | 0e+00 | 26.73 | 0.4390 | -5.104 | 0.0000 | 0.2698 | -1.890 | 55.87 | 15.064 | 27.532 | 1.035e+03 | 0.2953 | 0.1254 | 0.8746 | 0.1254 | 0.0391 | 26.060 | 0.0000 | -1.2030 | -6.166 | 6.4050 | 0e+00 | 1.323e-05 | 1.587e-05 | 1.825e-05 | 9.127e-01 | 4.714e-06 | -2.487 | 2.700e-03 | -1.085e-03 | 2.354e-07 | 2.736e-14 | ||
LPAL13_340039600 | LPAL13_340039600 | hypothetical protein | protein coding | LpaL13_34 | 1247554 | 1247757 | - | reverse | 34 | 204.0 | 203 | -2.208 | 0.0004 | -2.215 | 0.0002 | -2.697 | 0.0014 | 1.2470 | 4.2040 | 3.6334 | 0.0559 | -2.958 | -7.626 | 0 | 0e+00 | 225.10 | 0.5166 | -4.275 | 0.0000 | 0.2254 | -2.149 | 518.32 | 116.820 | 239.500 | 4.536e+04 | 0.2307 | 0.0000 | 1.0000 | 0.0000 | 3.0230 | 20.280 | 0.0000 | 1.9620 | -4.284 | 1.6790 | 1e-04 | 1.406e-03 | 5.729e-04 | 1.808e-04 | 9.782e-01 | 4.567e-06 | -2.359 | 1.405e-02 | -5.958e-03 | 2.846e-05 | 7.653e-10 | ||
LPAL13_310035500 | LPAL13_310035500 | hypothetical protein | protein coding | LpaL13_31 | 1198439 | 1198957 | - | reverse | 31 | 519.0 | 518 | -2.169 | 0.0265 | -2.123 | 0.0227 | -3.429 | 0.0000 | -4.1830 | -0.3803 | 4.1409 | 0.4809 | -3.802 | -8.310 | 0 | 0e+00 | 7.27 | 0.7714 | -2.812 | 0.0049 | 0.2939 | -1.767 | 18.43 | 5.409 | 9.386 | 3.219e+02 | 0.3480 | 0.0000 | 0.0000 | 0.0000 | -1.9310 | 8.444 | 0.0037 | -3.1810 | -6.740 | 5.7090 | 0e+00 | 3.053e-06 | 3.767e-02 | 2.266e-02 | 0.000e+00 | 3.015e-07 | -2.558 | 1.427e-01 | -5.579e-02 | 2.864e-03 | 6.553e-06 | ||
LPAL13_310031300 | LPAL13_310031300 | hypothetical protein, conserved | protein coding | LpaL13_31 | 1084772 | 1085059 | - | reverse | 31 | 288.0 | 287 | -1.983 | 0.0034 | -1.986 | 0.0032 | -3.070 | 0.0003 | -1.0680 | 2.0860 | 3.9512 | 0.7614 | -3.154 | -6.616 | 0 | 0e+00 | 63.55 | 0.5516 | -3.595 | 0.0003 | 0.2465 | -2.020 | 130.45 | 32.149 | 62.186 | 5.901e+03 | 0.2667 | 0.0527 | 0.9473 | 0.0527 | 1.2070 | 13.220 | 0.0003 | -0.2180 | -4.845 | 3.0360 | 0e+00 | 3.404e-04 | 4.648e-03 | 3.217e-03 | 8.854e-01 | 9.612e-06 | -2.320 | 2.907e-02 | -1.253e-02 | 2.033e-04 | 2.923e-08 | ||
LPAL13_140019100 | LPAL13_140019100 | bt1 family, putative | protein coding | LpaL13_14 | 525164 | 525514 | + | forward | 14 | 351.0 | 350 | -1.955 | 0.0000 | -1.960 | 0.0000 | -2.011 | 0.0000 | 3.9170 | 5.9980 | 0.3503 | 0.5491 | -2.081 | -8.231 | 0 | 0e+00 | 885.30 | 0.3037 | -6.435 | 0.0000 | 0.2333 | -2.100 | 1937.62 | 451.965 | 905.916 | 6.953e+05 | 0.2374 | 0.0000 | 1.0000 | 0.0000 | 5.0060 | 49.810 | 0.0000 | 4.5800 | -7.140 | 12.2000 | 0e+00 | 1.185e-06 | 4.354e-08 | 9.680e-10 | 9.139e-01 | 2.321e-05 | -1.977 | 6.493e-02 | -3.284e-02 | 3.148e-10 | 1.946e-19 | ||
LPAL13_050005000 | LPAL13_050005000 | hypothetical protein | protein coding | LpaL13_05 | 3394 | 3612 | - | reverse | 5 | 219.0 | 218 | -1.936 | 0.0085 | -1.942 | 0.0054 | -2.728 | 0.0003 | 0.1998 | 2.6770 | 2.0583 | 0.1721 | -2.477 | -7.913 | 0 | 0e+00 | 91.42 | 0.5913 | -3.275 | 0.0011 | 0.2820 | -1.826 | 177.99 | 50.188 | 89.238 | 6.186e+03 | 0.2912 | 0.0006 | 0.9994 | 0.0006 | 1.7220 | 11.960 | 0.0005 | 0.4951 | -4.939 | 3.6870 | 0e+00 | 3.023e-04 | 1.047e-02 | 5.406e-03 | 9.782e-01 | 9.009e-07 | -2.176 | 2.023e-02 | -9.296e-03 | 5.357e-04 | 2.765e-07 | ||
LPAL13_000038500 | LPAL13_000038500 | hypothetical protein | protein coding | LPAL13_SCAF000575 | 39 | 251 | + | forward | Not Assigned | 213.0 | 212 | -1.919 | 0.0126 | -1.926 | 0.0190 | -3.310 | 0.0001 | -1.9280 | 1.4320 | 4.6678 | 0.6770 | -3.360 | -6.743 | 0 | 0e+00 | 32.43 | 0.6149 | -3.122 | 0.0018 | 0.2833 | -1.820 | 77.47 | 21.939 | 38.905 | 2.414e+03 | 0.3098 | 0.1589 | 0.8411 | 0.1589 | 0.2246 | 8.871 | 0.0029 | -1.3010 | -5.462 | 4.3980 | 0e+00 | 7.805e-05 | 1.249e-02 | 2.088e-02 | 8.530e-01 | 7.838e-06 | -2.243 | 1.477e-01 | -6.584e-02 | 1.566e-03 | 2.139e-06 | ||
LPAL13_340039700 | LPAL13_340039700 | snare domain containing protein, putative | protein coding | LpaL13_34 | 1248192 | 1248947 | - | reverse | 34 | 756.0 | 755 | -1.757 | 0.0000 | -1.764 | 0.0000 | -1.884 | 0.0000 | 4.6120 | 6.6510 | 0.6865 | 0.0521 | -2.038 | -11.360 | 0 | 0e+00 | 1384.00 | 0.3134 | -5.606 | 0.0000 | 0.2859 | -1.806 | 2810.80 | 803.689 | 1416.973 | 1.132e+06 | 0.2899 | 0.0000 | 1.0000 | 0.0000 | 5.6510 | 36.560 | 0.0000 | 5.2460 | -6.216 | 8.5080 | 0e+00 | 1.131e-05 | 1.689e-06 | 2.435e-07 | 9.788e-01 | 5.841e-09 | -1.792 | 1.985e-02 | -1.108e-02 | 1.949e-08 | 3.035e-16 | ||
LPAL13_170015400 | LPAL13_170015400 | hypothetical protein, conserved | protein coding | LpaL13_17 | 395975 | 396307 | + | forward | 17 | 333.0 | 332 | -1.608 | 0.0000 | -1.615 | 0.0000 | -1.672 | 0.0001 | 1.2490 | 3.2680 | 0.9405 | 0.1360 | -2.019 | -9.032 | 0 | 0e+00 | 153.70 | 0.2829 | -5.684 | 0.0000 | 0.3146 | -1.669 | 270.09 | 84.958 | 141.528 | 1.297e+04 | 0.3199 | 0.0000 | 1.0000 | 0.0000 | 2.4770 | 33.070 | 0.0000 | 2.0470 | -5.245 | 4.8990 | 0e+00 | 1.550e-04 | 1.689e-06 | 1.051e-06 | 9.782e-01 | 6.425e-08 | -1.673 | 2.122e-02 | -1.269e-02 | 5.724e-07 | 9.452e-13 | ||
LPAL13_350073400 | LPAL13_350073400 | hypothetical protein | protein coding | LpaL13_35 | 2342701 | 2342883 | + | forward | 35 | 183.0 | 182 | -1.517 | 0.0077 | -1.521 | 0.0079 | -2.098 | 0.0006 | -0.1050 | 1.8100 | 0.9225 | 0.8801 | -1.915 | -5.600 | 0 | 4e-04 | 48.25 | 0.4575 | -3.315 | 0.0009 | 0.3090 | -1.694 | 113.46 | 35.053 | 59.011 | 6.521e+03 | 0.3352 | 0.0008 | 0.9992 | 0.0008 | 0.7712 | 11.050 | 0.0009 | -0.0586 | -4.629 | 2.5210 | 0e+00 | 7.422e-04 | 1.092e-02 | 8.673e-03 | 9.782e-01 | 4.687e-04 | -1.746 | 1.203e-02 | -6.891e-03 | 6.072e-04 | 2.613e-07 | ||
LPAL13_350013200 | LPAL13_350013200 | hypothetical protein, conserved | protein coding | LpaL13_35 | 223837 | 224070 | + | forward | 35 | 234.0 | 233 | -1.501 | 0.0055 | -1.502 | 0.0083 | -2.047 | 0.0003 | -2.0500 | -0.1507 | 1.3400 | 0.7031 | -1.899 | -5.540 | 0 | 2e-04 | 10.97 | 0.4373 | -3.432 | 0.0006 | 0.3587 | -1.479 | 24.88 | 8.918 | 13.796 | 2.950e+02 | 0.3917 | 0.7799 | 0.2201 | 0.7799 | -1.1530 | 10.920 | 0.0010 | -2.0910 | -4.904 | 2.1990 | 0e+00 | 2.971e-04 | 6.838e-03 | 8.365e-03 | 2.576e-01 | 2.400e-04 | -1.672 | 5.433e-03 | -3.250e-03 | 5.189e-04 | 2.281e-07 | ||
LPAL13_140019200 | LPAL13_140019200 | inositol-3-phosphate synthase | protein coding | LpaL13_14 | 527711 | 529291 | + | INO1 | forward | 14 | 1581.0 | 1580 | -1.449 | 0.0000 | -1.456 | 0.0000 | -1.517 | 0.0000 | 8.8250 | 10.3900 | 0.1716 | 0.4791 | -1.564 | -6.967 | 0 | 2e-04 | 19470.00 | 0.2714 | -5.341 | 0.0000 | 0.3510 | -1.510 | 36839.50 | 12931.785 | 20236.920 | 1.798e+08 | 0.3541 | 0.0000 | 1.0000 | 0.0000 | 9.4650 | 44.890 | 0.0000 | 9.2430 | -6.495 | 9.6440 | 0e+00 | 6.144e-06 | 4.912e-06 | 7.137e-09 | 1.000e+00 | 2.369e-04 | -1.517 | 4.357e-03 | -2.873e-03 | 3.475e-08 | 2.541e-15 | |
LPAL13_320038700 | LPAL13_320038700 | hypothetical protein, conserved | protein coding | LpaL13_32 | 1175024 | 1175257 | + | forward | 32 | 234.0 | 233 | -1.417 | 0.0000 | -1.424 | 0.0000 | -1.412 | 0.0000 | 2.5510 | 3.9570 | 0.4252 | 0.1118 | -1.406 | -8.528 | 0 | 0e+00 | 262.40 | 0.2333 | -6.074 | 0.0000 | 0.4144 | -1.271 | 436.05 | 180.701 | 258.724 | 2.162e+04 | 0.4183 | 0.0011 | 0.9989 | 0.0011 | 3.2550 | 39.100 | 0.0000 | 3.0070 | -5.752 | 6.8070 | 0e+00 | 3.038e-05 | 2.830e-07 | 9.066e-08 | 9.220e-01 | 1.867e-07 | -1.443 | 1.145e-02 | -7.937e-03 | 7.848e-08 | 1.809e-14 |
sus_ma <- sus_table[["plots"]][["sensitive_vs_resistant"]][["deseq_ma_plots"]][["plot"]]
pp(file = "images/sus_ma.png", image = sus_ma)
## test <- ggplt(sus_ma)
Now let us look for ontology categories which are increased in the 2.3 samples followed by the 2.2 samples.
## Gene categories more represented in the 2.3 group.
zy_go_up <- sm(simple_goseq(sig_genes = zy_sig[["deseq"]][["ups"]][[1]],
go_db = lp_go, length_db = lp_lengths))
## Gene categories more represented in the 2.2 group.
zy_go_down <- sm(simple_goseq(sig_genes = zy_sig[["deseq"]][["downs"]][[1]],
go_db = lp_go, length_db = lp_lengths))
In the function ‘combined_de_tables()’ above, one of the tasks performed is to look at the agreement among DESeq2, limma, and edgeR. The following show a couple of these for the set of genes observed with a fold-change >= |2| and adjusted p-value <= 0.05.
zy_table[["venns"]][[1]][["p_lfc1"]][["up_noweight"]]
zy_table[["venns"]][[1]][["p_lfc1"]][["down_noweight"]]
zy_go_up$pvalue_plots$bpp_plot_over
zy_go_down$pvalue_plots$bpp_plot_over
Remind myself, the data structures are (zy|sus)_(de|table|sig).
zy_df <- zy_table[["data"]][["z23_vs_z22"]]
sus_df <- sus_table[["data"]][["sensitive_vs_resistant"]]
both_df <- merge(zy_df, sus_df, by = "row.names")
plot_df <- both_df[, c("deseq_logfc.x", "deseq_logfc.y")]
rownames(plot_df) <- both_df[["Row.names"]]
colnames(plot_df) <- c("z23_vs_z22", "sensitive_vs_resistant")
compare <- plot_linear_scatter(plot_df)
## Warning in plot_multihistogram(df): NAs introduced by coercion
pp(file = "images/compare_sus_zy.png", image = compare$scatter)
compare$cor
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = -136, df = 8542, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8343 -0.8210
## sample estimates:
## cor
## -0.8278
Najib read me an email listing off the gene names associated with the zymodeme classification. I took those names and cross referenced them against the Leishmania panamensis gene annotations and found the following:
They are:
Given these 6 gene IDs (NH has two gene IDs associated with it), I can do some looking for specific differences among the various samples.
The following creates a colorspace (red to green) heatmap showing the observed expression of these genes in every sample.
my_genes <- c("LPAL13_120010900", "LPAL13_340013000", "LPAL13_000054100",
"LPAL13_140006100", "LPAL13_180018500", "LPAL13_320022300",
"other")
my_names <- c("ALAT", "ASAT", "G6PD", "NHv1", "NHv2", "MPI", "other")
zymo_expt <- exclude_genes_expt(zy_norm, ids = my_genes, method = "keep")
## Before removal, there were 8544 genes, now there are 6.
## There are 34 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20065 TMRC20005 TMRC20066 TMRC20039 TMRC20037 TMRC20038 TMRC20067
## 0.1313 0.1250 0.1325 0.1059 0.1303 0.1102 0.1129 0.1165
## TMRC20068 TMRC20041 TMRC20015 TMRC20009 TMRC20010 TMRC20016 TMRC20011 TMRC20012
## 0.1155 0.1181 0.1147 0.1137 0.1098 0.1059 0.1103 0.1207
## TMRC20013 TMRC20017 TMRC20014 TMRC20018 TMRC20021 TMRC20022 TMRC20077 TMRC20074
## 0.1205 0.1064 0.1089 0.1147 0.1063 0.1310 0.1221 0.1209
## TMRC20063 TMRC20053 TMRC20052 TMRC20064 TMRC20075 TMRC20051 TMRC20050 TMRC20049
## 0.1169 0.1184 0.1105 0.1140 0.1111 0.1285 0.1155 0.1400
## TMRC20062 TMRC20054
## 0.1288 0.1279
zymo_heatmap <- plot_sample_heatmap(zymo_expt, row_label = my_names)
zymo_heatmap
In contrast, the following plots take the set of genes which are shared among all differential expression methods (|lfc| >= 1.0 and adjp <= 0.05) and use them to make categories of genes which are increased in 2.3 or 2.2.
shared_zymo <- intersect_significant(zy_table)
## Deleting the file excel/intersect_significant.xlsx before writing the tables.
up_shared <- shared_zymo[["ups"]][[1]][["data"]][["all"]]
rownames(up_shared)
## [1] "LPAL13_000033300" "LPAL13_000012000" "LPAL13_310031300" "LPAL13_000038500"
## [5] "LPAL13_000038400" "LPAL13_000012100" "LPAL13_340039600" "LPAL13_310031000"
## [9] "LPAL13_310039200" "LPAL13_050005000" "LPAL13_350063000" "LPAL13_210015500"
## [13] "LPAL13_140019300" "LPAL13_180013900" "LPAL13_340039700" "LPAL13_170015400"
## [17] "LPAL13_270034100" "LPAL13_350013200" "LPAL13_250006300" "LPAL13_140019100"
## [21] "LPAL13_350012400" "LPAL13_350073400" "LPAL13_330021800" "LPAL13_240009700"
## [25] "LPAL13_000052700" "LPAL13_140019200" "LPAL13_250025700" "LPAL13_320038700"
## [29] "LPAL13_330021900" "LPAL13_210005000" "LPAL13_350073200" "LPAL13_310032500"
## [33] "LPAL13_230011200" "LPAL13_310028500" "LPAL13_230011400" "LPAL13_230011500"
## [37] "LPAL13_160014500" "LPAL13_050009600" "LPAL13_230011300" "LPAL13_040007800"
## [41] "LPAL13_160014100"
upshared_expt <- exclude_genes_expt(zy_norm, ids = rownames(up_shared), method = "keep")
## Before removal, there were 8544 genes, now there are 41.
## There are 34 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20065 TMRC20005 TMRC20066 TMRC20039 TMRC20037 TMRC20038 TMRC20067
## 0.32583 0.40481 0.09692 0.35825 0.14393 0.38093 0.50866 0.29289
## TMRC20068 TMRC20041 TMRC20015 TMRC20009 TMRC20010 TMRC20016 TMRC20011 TMRC20012
## 0.34705 0.14461 0.39129 0.12494 0.37945 0.27684 0.13830 0.11794
## TMRC20013 TMRC20017 TMRC20014 TMRC20018 TMRC20021 TMRC20022 TMRC20077 TMRC20074
## 0.32652 0.13750 0.14642 0.30204 0.33880 0.11629 0.11344 0.14166
## TMRC20063 TMRC20053 TMRC20052 TMRC20064 TMRC20075 TMRC20051 TMRC20050 TMRC20049
## 0.12976 0.15457 0.40198 0.36990 0.30504 0.56640 0.12676 0.14631
## TMRC20062 TMRC20054
## 0.59300 0.49938
We can plot a quick heatmap to get a sense of the differences observed between the genes which are different between the two zymodemes.
high_23_heatmap <- plot_sample_heatmap(upshared_expt, row_label = rownames(up_shared))
high_23_heatmap
down_shared <- shared_zymo[["downs"]][[1]][["data"]][["all"]]
downshared_expt <- exclude_genes_expt(zy_norm, ids = rownames(down_shared), method = "keep")
## Before removal, there were 8544 genes, now there are 67.
## There are 34 samples which kept less than 90 percent counts.
## TMRC20001 TMRC20065 TMRC20005 TMRC20066 TMRC20039 TMRC20037 TMRC20038 TMRC20067
## 0.2732 0.2311 0.7574 0.2684 0.7461 0.2422 0.2348 0.2786
## TMRC20068 TMRC20041 TMRC20015 TMRC20009 TMRC20010 TMRC20016 TMRC20011 TMRC20012
## 0.2430 0.7511 0.2312 0.7425 0.2165 0.2537 0.6578 0.6401
## TMRC20013 TMRC20017 TMRC20014 TMRC20018 TMRC20021 TMRC20022 TMRC20077 TMRC20074
## 0.2153 0.7571 0.7355 0.2065 0.2035 0.7990 0.6559 0.7732
## TMRC20063 TMRC20053 TMRC20052 TMRC20064 TMRC20075 TMRC20051 TMRC20050 TMRC20049
## 0.7174 0.6859 0.2162 0.2381 0.2163 0.2301 0.7039 0.8006
## TMRC20062 TMRC20054
## 0.2268 0.2470
high_22_heatmap <- plot_sample_heatmap(downshared_expt, row_label = rownames(down_shared))
high_22_heatmap
Now I will combine our previous samples and our new samples in the hopes of finding variant positions which help elucidate currently unknown aspects of either group via their clustering to known samples from the other group. In other words, we do not know the zymodeme annotations for the old samples nor the strain identities (or the shortcut ‘chronic vs. self-healing’) for the new samples. I hope to make educated guesses given the variant profiles. There are some differences in how the previous and current data sets were analyzed (though I have since redone the old samples so it should be trivial to remove those differences now).
I added our 2016 data to a specific TMRC2 sample sheet, dated 20191203. Thus I will load the data here. That previous data was mapped using tophat, so I will also need to make some changes to the gene names to accomodate the two mappings.
old_expt <- sm(create_expt("sample_sheets/tmrc2_samples_20191203.xlsx",
file_column = "tophat2file"))
tt <- lp_expt[["expressionset"]]
rownames(tt) <- gsub(pattern = "^exon_", replacement = "", x = rownames(tt))
rownames(tt) <- gsub(pattern = "\\.E1$", replacement = "", x = rownames(tt))
lp_expt$expressionset <- tt
tt <- old_expt$expressionset
rownames(tt) <- gsub(pattern = "^exon_", replacement = "", x = rownames(tt))
rownames(tt) <- gsub(pattern = "\\.1$", replacement = "", x = rownames(tt))
old_expt$expressionset <- tt
rm(tt)
One other important caveat, we have a group of new samples which have not yet run through the variant search pipeline, so I need to remove them from consideration. Though it looks like they finished overnight…
## The next line drops the samples which are missing the SNP pipeline.
lp_snp <- subset_expt(lp_expt, subset="!is.na(pData(lp_expt)[['bcftable']])")
## subset_expt(): There were 69, now there are 65 samples.
new_snps <- sm(count_expt_snps(lp_snp, annot_column = "bcftable"))
## Error : 'preprocessing/TMRC20063/outputs/vcfutils_lpanamensis_v36/r1_trimmed_lpanamensis_v36_count.txt' does not exist in current working directory ('/mnt/cbcb/fs01_abelew/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019').
## Error : 'preprocessing/TMRC20063/outputs/vcfutils_lpanamensis_v36/r1_trimmed_lpanamensis_v36_count.txt' does not exist in current working directory ('/mnt/cbcb/fs01_abelew/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019').
## Error in Biobase::`sampleNames<-`(`*tmp*`, value = colnames(snp_exprs)): number of new names (64) should equal number of rows in AnnotatedDataFrame (65)
old_snps <- sm(count_expt_snps(old_expt, annot_column = "bcftable", snp_column = 2))
both_snps <- combine_expts(new_snps, old_snps)
## Error in combine_expts(new_snps, old_snps): object 'new_snps' not found
both_norm <- sm(normalize_expt(both_snps, transform = "log2", convert = "cpm", filter = TRUE))
## Error in normalize_expt(both_snps, transform = "log2", convert = "cpm", : object 'both_snps' not found
## strains <- both_norm[["design"]][["strain"]]
both_strain <- set_expt_conditions(both_norm, fact = "strain")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'pData': object 'both_norm' not found
The data structure ‘both_norm’ now contains our 2016 data along with the newer data collected since 2019.
The following plot shows the SNP profiles of all samples (old and new) where the colors at the top show either the 2.2 strains (orange), 2.3 strains (green), the previous samples (purple), or the various lab strains (pink etc).
old_new_variant_heatmap <- plot_disheat(both_norm)
## Error in plot_heatmap(expt_data, expt_colors = expt_colors, expt_design = expt_design, : object 'both_norm' not found
pp(file = "images/raw_snp_disheat.png", image = old_new_variant_heatmap,
height = 12, width = 12)
## Error in pp(file = "images/raw_snp_disheat.png", image = old_new_variant_heatmap, : object 'old_new_variant_heatmap' not found
The function get_snp_sets() takes the provided metadata factor (in this case ‘condition’) and looks for variants which are exclusive to each element in it. In this case, this is looking for differences between 2.2 and 2.3, as well as the set shared among them.
snp_sets <- get_snp_sets(both_snps, factor = "condition")
## Error in get_snp_sets(both_snps, factor = "condition"): object 'both_snps' not found
both_expt <- combine_expts(lp_expt, old_expt)
snp_genes <- sm(snps_vs_genes(both_expt, snp_sets, expt_name_col = "chromosome"))
## Error in snps_vs_genes(both_expt, snp_sets, expt_name_col = "chromosome"): object 'snp_sets' not found
## I think we have some metrics here we can plot...
snp_subset <- sm(snp_subset_genes(
both_expt, both_snps,
genes = c("LPAL13_120010900", "LPAL13_340013000", "LPAL13_000054100",
"LPAL13_140006100", "LPAL13_180018500", "LPAL13_320022300")))
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'fData': object 'both_snps' not found
zymo_heat <- plot_sample_heatmap(snp_subset, row_label = rownames(exprs(snp_subset)))
## Error in plot_sample_heatmap(snp_subset, row_label = rownames(exprs(snp_subset))): object 'snp_subset' not found
zymo_heat
## Error in eval(expr, envir, enclos): object 'zymo_heat' not found
Didn’t I create a set of densities by chromosome? Oh I think they come in from get_snp_sets()
clinical_sets <- get_snp_sets(new_snps, factor = "clinicalresponse")
## Error in get_snp_sets(new_snps, factor = "clinicalresponse"): object 'new_snps' not found
density_vec <- clinical_sets[["density"]]
## Error in eval(expr, envir, enclos): object 'clinical_sets' not found
chromosome_idx <- grep(pattern = "LpaL", x = names(density_vec))
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'grep': object 'density_vec' not found
density_df <- as.data.frame(density_vec[chromosome_idx])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'density_vec' not found
density_df[["chr"]] <- rownames(density_df)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'rownames': object 'density_df' not found
colnames(density_df) <- c("density_vec", "chr")
## Error in colnames(density_df) <- c("density_vec", "chr"): object 'density_df' not found
ggplot(density_df, aes_string(x = "chr", y = "density_vec")) +
ggplot2::geom_col() +
ggplot2::theme(axis.text = ggplot2::element_text(size = 10, colour = "black"),
axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5))
## Error in ggplot(density_df, aes_string(x = "chr", y = "density_vec")): object 'density_df' not found
## clinical_written <- write_variants(new_snps)
clinical_genes <- sm(snps_vs_genes(lp_expt, clinical_sets, expt_name_col = "chromosome"))
## Error in snps_vs_genes(lp_expt, clinical_sets, expt_name_col = "chromosome"): object 'clinical_sets' not found
snp_density <- merge(as.data.frame(clinical_genes[["summary_by_gene"]]),
as.data.frame(fData(lp_expt)),
by = "row.names")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'merge': error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'clinical_genes' not found
snp_density <- snp_density[, c(1, 2, 4, 15)]
## Error in eval(expr, envir, enclos): object 'snp_density' not found
colnames(snp_density) <- c("name", "snps", "product", "length")
## Error in colnames(snp_density) <- c("name", "snps", "product", "length"): object 'snp_density' not found
snp_density[["product"]] <- tolower(snp_density[["product"]])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'tolower': object 'snp_density' not found
snp_density[["length"]] <- as.numeric(snp_density[["length"]])
## Error in eval(expr, envir, enclos): object 'snp_density' not found
snp_density[["density"]] <- snp_density[["snps"]] / snp_density[["length"]]
## Error in eval(expr, envir, enclos): object 'snp_density' not found
snp_idx <- order(snp_density[["density"]], decreasing = TRUE)
## Error in eval(quote(list(...)), env): object 'snp_density' not found
snp_density <- snp_density[snp_idx, ]
## Error in eval(expr, envir, enclos): object 'snp_density' not found
removers <- c("amastin", "gp63", "leishmanolysin")
for (r in removers) {
drop_idx <- grepl(pattern = r, x = snp_density[["product"]])
snp_density <- snp_density[!drop_idx, ]
}
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'grepl': object 'snp_density' not found
## Filter these for [A|a]mastin gp63 Leishmanolysin
clinical_snps <- snps_intersections(lp_expt, clinical_sets, chr_column = "chromosome")
## Error in snps_intersections(lp_expt, clinical_sets, chr_column = "chromosome"): object 'clinical_sets' not found
fail_ref_snps <- as.data.frame(clinical_snps[["inters"]][["failure, reference strain"]])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'clinical_snps' not found
cure_snps <- as.data.frame(clinical_snps[["inters"]][["cure"]])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'clinical_snps' not found
head(fail_ref_snps)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'head': object 'fail_ref_snps' not found
head(cure_snps)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'head': object 'cure_snps' not found
annot <- fData(lp_expt)
clinical_interest <- as.data.frame(clinical_snps[["gene_summaries"]][["cure"]])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'clinical_snps' not found
clinical_interest <- merge(clinical_interest,
as.data.frame(clinical_snps[["gene_summaries"]][["failure, reference strain"]]),
by = "row.names")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'merge': object 'clinical_interest' not found
rownames(clinical_interest) <- clinical_interest[["Row.names"]]
## Error in eval(expr, envir, enclos): object 'clinical_interest' not found
clinical_interest[["Row.names"]] <- NULL
## Error in clinical_interest[["Row.names"]] <- NULL: object 'clinical_interest' not found
colnames(clinical_interest) <- c("cure_snps","fail_snps")
## Error in colnames(clinical_interest) <- c("cure_snps", "fail_snps"): object 'clinical_interest' not found
annot <- merge(annot, clinical_interest, by = "row.names")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'y' in selecting a method for function 'merge': object 'clinical_interest' not found
rownames(annot) <- annot[["Row.names"]]
annot[["Row.names"]] <- NULL
fData(lp_expt$expressionset) <- annot
The heatmap produced here should show the variants only for the zymodeme genes.
I am thinking that if we find clusters of locations which are variant, that might provide some PCR testing possibilities.
new_sets <- get_snp_sets(new_snps, factor = "phenotypiccharacteristics")
## Error in get_snp_sets(new_snps, factor = "phenotypiccharacteristics"): object 'new_snps' not found
summary(new_sets)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'summary': object 'new_sets' not found
## 1000000: 2.2
## 0100000: 2.3
summary(new_sets[["intersections"]][["10000"]])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'summary': object 'new_sets' not found
summary(new_sets[["intersections"]][["01000"]])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'summary': object 'new_sets' not found
Thus we see that there are 511 variants associated with 2.2 and 49,790 associated with 2.3.
The following function uses the positional data to look for sequential mismatches associated with zymodeme in the hopes that there will be some regions which would provide good potential targets for a PCR-based assay.
sequential_variants <- function(snp_sets, conditions = NULL, minimum = 3, maximum_separation = 3) {
if (is.null(conditions)) {
conditions <- 1
}
intersection_sets <- snp_sets[["intersections"]]
intersection_names <- snp_sets[["set_names"]]
chosen_intersection <- 1
if (is.numeric(conditions)) {
chosen_intersection <- conditions
} else {
intersection_idx <- intersection_names == conditions
chosen_intersection <- names(intersection_names)[intersection_idx]
}
possible_positions <- intersection_sets[[chosen_intersection]]
position_table <- data.frame(row.names = possible_positions)
pat <- "^chr_(.+)_pos_(.+)_ref_.*$"
position_table[["chr"]] <- gsub(pattern = pat, replacement = "\\1", x = rownames(position_table))
position_table[["pos"]] <- as.numeric(gsub(pattern = pat, replacement = "\\2", x = rownames(position_table)))
position_idx <- order(position_table[, "chr"], position_table[, "pos"])
position_table <- position_table[position_idx, ]
position_table[["dist"]] <- 0
last_chr <- ""
for (r in 1:nrow(position_table)) {
this_chr <- position_table[r, "chr"]
if (r == 1) {
position_table[r, "dist"] <- position_table[r, "pos"]
last_chr <- this_chr
next
}
if (this_chr == last_chr) {
position_table[r, "dist"] <- position_table[r, "pos"] - position_table[r - 1, "pos"]
} else {
position_table[r, "dist"] <- position_table[r, "pos"]
}
last_chr <- this_chr
}
sequentials <- position_table[["dist"]] <= maximum_separation
message("There are ", sum(sequentials), " candidate regions.")
## The following can tell me how many runs of each length occurred, that is not quite what I want.
## Now use run length encoding to find the set of sequential sequentials!
rle_result <- rle(sequentials)
rle_values <- rle_result[["values"]]
## The following line is equivalent to just leaving values alone:
## true_values <- rle_result[["values"]] == TRUE
rle_lengths <- rle_result[["lengths"]]
true_sequentials <- rle_lengths[rle_values]
rle_idx <- cumsum(rle_lengths)[which(rle_values)]
position_table[["last_sequential"]] <- 0
count <- 0
for (r in rle_idx) {
count <- count + 1
position_table[r, "last_sequential"] <- true_sequentials[count]
}
message("The maximum sequential set is: ", max(position_table[["last_sequential"]]), ".")
wanted_idx <- position_table[["last_sequential"]] >= minimum
wanted <- position_table[wanted_idx, c("chr", "pos")]
return(wanted)
}
zymo22_sequentials <- sequential_variants(new_sets, conditions = "22")
## Error in sequential_variants(new_sets, conditions = "22"): object 'new_sets' not found
dim(zymo22_sequentials)
## Error in eval(expr, envir, enclos): object 'zymo22_sequentials' not found
## 7 candidate regions for zymodeme 2.2 -- thus I am betting that the reference strain is a 2.2
zymo23_sequentials <- sequential_variants(new_sets, conditions = "23",
minimum = 1, maximum_separation = 3)
## Error in sequential_variants(new_sets, conditions = "23", minimum = 1, : object 'new_sets' not found
dim(zymo23_sequentials)
## Error in eval(expr, envir, enclos): object 'zymo23_sequentials' not found
## In contrast, there are lots (587) of interesting regions for 2.3!
We can cross reference the variants against the zymodeme status and plot a heatmap of the results and hopefully see how they separate.
snp_genes <- sm(snps_vs_genes(lp_expt, new_sets, expt_name_col = "chromosome"))
## Error in snps_vs_genes(lp_expt, new_sets, expt_name_col = "chromosome"): object 'new_sets' not found
new_zymo_norm <- normalize_expt(new_snps, filter = TRUE, convert = "cpm", norm = "quant", transform = TRUE)
## Error in normalize_expt(new_snps, filter = TRUE, convert = "cpm", norm = "quant", : object 'new_snps' not found
new_zymo_norm <- set_expt_conditions(new_zymo_norm, fact = "phenotypiccharacteristics")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'pData': object 'new_zymo_norm' not found
zymo_heat <- plot_disheat(new_zymo_norm)
## Error in plot_heatmap(expt_data, expt_colors = expt_colors, expt_design = expt_design, : object 'new_zymo_norm' not found
zymo_heat[["plot"]]
## Error in eval(expr, envir, enclos): object 'zymo_heat' not found
Now let us try to make a heatmap which includes some of the annotation data.
des <- both_norm[["design"]]
## Error in eval(expr, envir, enclos): object 'both_norm' not found
undef_idx <- is.na(des[["strain"]])
## Error in eval(expr, envir, enclos): object 'des' not found
des[undef_idx, "strain"] <- "unknown"
## Error in des[undef_idx, "strain"] <- "unknown": object 'des' not found
##hmcols <- colorRampPalette(c("yellow","black","darkblue"))(256)
correlations <- hpgl_cor(exprs(both_norm))
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'exprs': object 'both_norm' not found
zymo_missing_idx <- is.na(des[["phenotypiccharacteristics"]])
## Error in eval(expr, envir, enclos): object 'des' not found
des[["phenotypiccharacteristics"]] <- as.character(des[["phenotypiccharacteristics"]])
## Error in eval(expr, envir, enclos): object 'des' not found
des[["clinicalcategorical"]] <- as.character(des[["clinicalcategorical"]])
## Error in eval(expr, envir, enclos): object 'des' not found
des[zymo_missing_idx, "phenotypiccharacteristics"] <- "unknown"
## Error in des[zymo_missing_idx, "phenotypiccharacteristics"] <- "unknown": object 'des' not found
mydendro <- list(
"clustfun" = hclust,
"lwd" = 2.0)
col_data <- as.data.frame(des[, c("phenotypiccharacteristics", "clinicalcategorical")])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'des' not found
unknown_clinical <- is.na(col_data[["clinicalcategorical"]])
## Error in eval(expr, envir, enclos): object 'col_data' not found
row_data <- as.data.frame(des[, c("strain")])
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': object 'des' not found
colnames(col_data) <- c("zymodeme", "outcome")
## Error in colnames(col_data) <- c("zymodeme", "outcome"): object 'col_data' not found
col_data[unknown_clinical, "outcome"] <- "undefined"
## Error in col_data[unknown_clinical, "outcome"] <- "undefined": object 'col_data' not found
colnames(row_data) <- c("strain")
## Error in colnames(row_data) <- c("strain"): object 'row_data' not found
myannot <- list(
"Col" = list("data" = col_data),
"Row" = list("data" = row_data))
## Error in eval(expr, envir, enclos): object 'col_data' not found
myclust <- list("cuth" = 1.0,
"col" = BrewerClusterCol)
mylabs <- list(
"Row" = list("nrow" = 4),
"Col" = list("nrow" = 4))
hmcols <- colorRampPalette(c("darkblue", "beige"))(240)
map1 <- annHeatmap2(
correlations,
dendrogram = mydendro,
annotation = myannot,
cluster = myclust,
labels = mylabs,
## The following controls if the picture is symmetric
scale = "none",
col = hmcols)
## Error in annHeatmap2(correlations, dendrogram = mydendro, annotation = myannot, : object 'correlations' not found
pp(file = "images/dendro_heatmap.png", image = map1, height = 20, width = 20)
## Error in pp(file = "images/dendro_heatmap.png", image = map1, height = 20, : object 'map1' not found
Print the larger heatmap so that all the labels appear. Keep in mind that as we get more samples, this image needs to continue getting bigger.
big heatmap
pheno <- subset_expt(lp_expt, subset = "condition=='z2.2'|condition=='z2.3'")
## subset_expt(): There were 69, now there are 34 samples.
pheno <- subset_expt(pheno, subset="!is.na(pData(pheno)[['bcftable']])")
## subset_expt(): There were 34, now there are 31 samples.
pheno_snps <- sm(count_expt_snps(pheno, annot_column = "bcftable"))
## Error : 'preprocessing/TMRC20063/outputs/vcfutils_lpanamensis_v36/r1_trimmed_lpanamensis_v36_count.txt' does not exist in current working directory ('/mnt/cbcb/fs01_abelew/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019').
## Error : 'preprocessing/TMRC20063/outputs/vcfutils_lpanamensis_v36/r1_trimmed_lpanamensis_v36_count.txt' does not exist in current working directory ('/mnt/cbcb/fs01_abelew/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_tmrc_2019').
## Error in Biobase::`sampleNames<-`(`*tmp*`, value = colnames(snp_exprs)): number of new names (30) should equal number of rows in AnnotatedDataFrame (31)
xref_prop <- table(pheno_snps[["conditions"]])
## Error in eval(quote(list(...)), env): object 'pheno_snps' not found
pheno_snps$conditions
## Error in eval(expr, envir, enclos): object 'pheno_snps' not found
idx_tbl <- exprs(pheno_snps) > 5
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'exprs': object 'pheno_snps' not found
new_tbl <- data.frame(row.names = rownames(exprs(pheno_snps)))
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'rownames': error in evaluating the argument 'object' in selecting a method for function 'exprs': object 'pheno_snps' not found
for (n in names(xref_prop)) {
new_tbl[[n]] <- 0
idx_cols <- which(pheno_snps[["conditions"]] == n)
prop_col <- rowSums(idx_tbl[, idx_cols]) / xref_prop[n]
new_tbl[n] <- prop_col
}
## Error in eval(expr, envir, enclos): object 'xref_prop' not found
keepers <- grepl(x = rownames(new_tbl), pattern = "LpaL13")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'grepl': error in evaluating the argument 'x' in selecting a method for function 'rownames': object 'new_tbl' not found
new_tbl <- new_tbl[keepers, ]
## Error in eval(expr, envir, enclos): object 'new_tbl' not found
new_tbl[["strong22"]] <- 1.001 - new_tbl[["z2.2"]]
## Error in eval(expr, envir, enclos): object 'new_tbl' not found
new_tbl[["strong23"]] <- 1.001 - new_tbl[["z2.3"]]
## Error in eval(expr, envir, enclos): object 'new_tbl' not found
s22_na <- new_tbl[["strong22"]] > 1
## Error in eval(expr, envir, enclos): object 'new_tbl' not found
new_tbl[s22_na, "strong22"] <- 1
## Error in new_tbl[s22_na, "strong22"] <- 1: object 'new_tbl' not found
s23_na <- new_tbl[["strong23"]] > 1
## Error in eval(expr, envir, enclos): object 'new_tbl' not found
new_tbl[s23_na, "strong23"] <- 1
## Error in new_tbl[s23_na, "strong23"] <- 1: object 'new_tbl' not found
new_tbl[["SNP"]] <- rownames(new_tbl)
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'rownames': object 'new_tbl' not found
new_tbl[["Chromosome"]] <- gsub(x = new_tbl[["SNP"]], pattern = "chr_(.*)_pos_.*", replacement = "\\1")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'gsub': object 'new_tbl' not found
new_tbl[["Position"]] <- gsub(x = new_tbl[["SNP"]], pattern = ".*_pos_(\\d+)_.*", replacement = "\\1")
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'gsub': object 'new_tbl' not found
new_tbl <- new_tbl[, c("SNP", "Chromosome", "Position", "strong22", "strong23")]
## Error in eval(expr, envir, enclos): object 'new_tbl' not found
library(CMplot)
## Much appreciate for using CMplot.
## Full description, Bug report, Suggestion and the latest codes:
## https://github.com/YinLiLin/CMplot
CMplot(new_tbl, bin.size = 100000)
## Error in is.data.frame(x): object 'new_tbl' not found
CMplot(new_tbl, plot.type="m", multracks=TRUE, threshold = c(0.01, 0.05),
threshold.lwd=c(1,1), threshold.col=c("black","grey"),
amplify=TRUE, bin.size=1e5,
chr.den.col=c("darkgreen", "yellow", "red"),
signal.col=c("red", "green", "blue"),
signal.cex=1, file="jpg", memo="", dpi=300, file.output=TRUE, verbose=TRUE)
## Error in is.data.frame(x): object 'new_tbl' not found
This tool looks a little opaque, but provides sample data with things that make sense to me and should be pretty easy to recapitulate in our data.
## For this, let us use the 'new_snps' data structure.
## Caveat here: these need to be coerced to numbers.
my_covariates <- pData(new_snps)[, c("phenotypiccharacteristics", "clinicalcategorical")]
for (col in colnames(my_covariates)) {
my_covariates[[col]] <- as.numeric(as.factor(my_covariates[[col]]))
}
my_covariates <- t(my_covariates)
my_geneloc <- fData(lp_expt)[, c("gid", "chromosome", "start", "end")]
colnames(my_geneloc) <- c("geneid", "chr", "left", "right")
my_ge <- exprs(normalize_expt(lp_expt, transform = "log2", filter = TRUE, convert = "cpm"))
used_samples <- tolower(colnames(my_ge)) %in% colnames(exprs(new_snps))
my_ge <- my_ge[, used_samples]
my_snpsloc <- data.frame(rownames = rownames(exprs(new_snps)))
## Oh, caveat here: Because of the way I stored the data,
## I could have duplicate rows which presumably will make matrixEQTL sad
my_snpsloc[["chr"]] <- gsub(pattern = "^chr_(.+)_pos(.+)_ref_.*$", replacement = "\\1",
x = rownames(my_snpsloc))
my_snpsloc[["pos"]] <- gsub(pattern = "^chr_(.+)_pos(.+)_ref_.*$", replacement = "\\2",
x = rownames(my_snpsloc))
test <- duplicated(my_snpsloc)
## Each duplicated row would be another variant at that position;
## so in theory we would do a rle to number them I am guessing
## However, I do not have different variants so I think I can ignore this for the moment
## but will need to make my matrix either 0 or 1.
if (sum(test) > 0) {
message("There are: ", sum(duplicated), " duplicated entries.")
keep_idx <- ! test
my_snpsloc <- my_snpsloc[keep_idx, ]
}
my_snps <- exprs(new_snps)
one_idx <- my_snps > 0
my_snps[one_idx] <- 1
## Ok, at this point I think I have all the pieces which this method wants...
## Oh, no I guess not; it actually wants the data as a set of filenames...
library(MatrixEQTL)
write.table(my_snps, "eqtl/snps.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(my_snps, "eqtl/snps.tsv", )
write.table(my_snpsloc, "eqtl/snpsloc.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(my_snpsloc, "eqtl/snpsloc.tsv")
write.table(as.data.frame(my_ge), "eqtl/ge.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(as.data.frame(my_ge), "eqtl/ge.tsv")
write.table(as.data.frame(my_geneloc), "eqtl/geneloc.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(as.data.frame(my_geneloc), "eqtl/geneloc.tsv")
write.table(as.data.frame(my_covariates), "eqtl/covariates.tsv", na = "NA", col.names = TRUE, row.names = TRUE, sep = "\t", quote = TRUE)
## readr::write_tsv(as.data.frame(my_covariates), "eqtl/covariates.tsv")
useModel = modelLINEAR # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
# Genotype file name
SNP_file_name = "eqtl/snps.tsv"
snps_location_file_name = "eqtl/snpsloc.tsv"
expression_file_name = "eqtl/ge.tsv"
gene_location_file_name = "eqtl/geneloc.tsv"
covariates_file_name = "eqtl/covariates.tsv"
# Output file name
output_file_name_cis = tempfile()
output_file_name_tra = tempfile()
# Only associations significant at this level will be saved
pvOutputThreshold_cis = 0.1
pvOutputThreshold_tra = 0.1
# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric()
# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
# Distance for local gene-SNP pairs
cisDist = 1e6
## Load genotype data
snps = SlicedData$new()
snps$fileDelimiter = "\t" # the TAB character
snps$fileOmitCharacters = "NA" # denote missing values;
snps$fileSkipRows = 1 # one row of column labels
snps$fileSkipColumns = 1 # one column of row labels
snps$fileSliceSize = 2000 # read file in slices of 2,000 rows
snps$LoadFile(SNP_file_name)
## Load gene expression data
gene = SlicedData$new()
gene$fileDelimiter = "\t" # the TAB character
gene$fileOmitCharacters = "NA" # denote missing values;
gene$fileSkipRows = 1 # one row of column labels
gene$fileSkipColumns = 1 # one column of row labels
gene$fileSliceSize = 2000 # read file in slices of 2,000 rows
gene$LoadFile(expression_file_name)
## Load covariates
cvrt = SlicedData$new()
cvrt$fileDelimiter = "\t" # the TAB character
cvrt$fileOmitCharacters = "NA" # denote missing values;
cvrt$fileSkipRows = 1 # one row of column labels
cvrt$fileSkipColumns = 1 # one column of row labels
if(length(covariates_file_name) > 0) {
cvrt$LoadFile(covariates_file_name)
}
## Run the analysis
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE)
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE)
me = Matrix_eQTL_main(
snps = snps,
gene = gene,
cvrt = cvrt,
output_file_name = output_file_name_tra,
pvOutputThreshold = pvOutputThreshold_tra,
useModel = useModel,
errorCovariance = errorCovariance,
verbose = TRUE,
output_file_name.cis = output_file_name_cis,
pvOutputThreshold.cis = pvOutputThreshold_cis,
snpspos = snpspos,
genepos = genepos,
cisDist = cisDist,
pvalue.hist = "qqplot",
min.pv.by.genesnp = FALSE,
noFDRsaveMemory = FALSE);
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
message(paste0("Saving to ", savefile))
tmp <- sm(saveme(filename = savefile))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset f80b9b77ce0f5b0c13e9172e5cf51a94eaadaa9e
## This is hpgltools commit: Mon Jun 28 14:14:28 2021 -0400: f80b9b77ce0f5b0c13e9172e5cf51a94eaadaa9e
## Saving to tmrc2_02sample_estimation_v202106.rda.xz
tmp <- loadme(filename = savefile)