My limited understanding of this experiment is that Nour and Vince would like to look at material retrieved from the removal of a catheter from a mouse bladder and see how much (if any) of the RNA is pseudomonas.
I therefore wanted to map this against mouse and pseudomonas.
<- load_biomart_annotations(species="mmusculus", overwrite=TRUE) mm_annot
## Got a bad mart type when trying host Oct2021.archive.ensembl.org mart ENSEMBL_MART_ENSEMBL.
## Got a bad mart type when trying host Sep2021.archive.ensembl.org mart ENSEMBL_MART_ENSEMBL.
## Got a bad mart type when trying host Aug2021.archive.ensembl.org mart ENSEMBL_MART_ENSEMBL.
## Got a bad mart type when trying host Oct2020.archive.ensembl.org mart ENSEMBL_MART_ENSEMBL.
## Got a bad mart type when trying host Sep2020.archive.ensembl.org mart ENSEMBL_MART_ENSEMBL.
## Using mart: ENSEMBL_MART_ENSEMBL from host: Aug2020.archive.ensembl.org.
## Successfully connected to the mmusculus_gene_ensembl database.
## Finished downloading ensembl gene annotations.
## Finished downloading ensembl structure annotations.
## Dropping haplotype chromosome annotations, set drop_haplotypes = FALSE if this is bad.
## Saving annotations to mmusculus_biomart_annotations.rda.
## Finished save().
<- mm_annot[["annotation"]]
mm_annotations rownames(mm_annotations) <- make.names(mm_annotations[["ensembl_gene_id"]], unique=TRUE)
<- load_microbesonline_annotations(species="PA14") pa_annot
## Found 1 entry.
## Pseudomonas aeruginosa UCBPP-PA14Proteobacteria2006-11-22yes105972208963
## The species being downloaded is: Pseudomonas aeruginosa UCBPP-PA14
## Downloading: http://www.microbesonline.org/cgi-bin/genomeInfo.cgi?tId=208963;export=tab
<- as.data.frame(pa_annot)
pa_annotations rownames(pa_annotations) <- make.names(pa_annotations[["sysName"]], unique=TRUE)
Watching Nour work, I know that one of the ~ 3 operons of explicit interest contains the gene esxA.
<- pa_annotations[["name"]] == "exsA"
esx_idx pa_annotations[esx_idx, ]
## locusId accession GI scaffoldId start stop strand
## PA14_42390 2197948 YP_791532.1 1.16e+08 4582 3777526 3776690 -
## sysName name desc COG COGFun
## PA14_42390 PA14_42390 exsA transcriptional regulator ExsA (NCBI) COG2207 K
## COGDesc TIGRFam TIGRRoles
## PA14_42390 AraC-type DNA-binding domain-containing proteins <NA> <NA>
## GO
## PA14_42390 GO:0006355,GO:0045449,GO:0005622,GO:0003677,GO:0003700,GO:0043565
## EC ECDesc
## PA14_42390 <NA> <NA>
<- c("PA14_42540", "PA14_42530", "PA14_42520", "PA14_42510", "PA14_42500", "PA14_42490",
loci "PA14_42480", "PA14_42470", "PA14_42460", "PA14_42450", "PA14_42440", "PA14_42430",
"PA14_42410", "PA14_42390", "PA14_42380", "PA14_42360", "PA14_42350", "PA14_42340",
"PA14_42320", "PA14_42310", "PA14_42300", "PA14_42290", "PA14_42540", "PA14_42280",
"PA14_42270", "PA14_42260", "PA14_42250")
<- create_expt("sample_sheets/all_samples.xlsx",
pa_expt file_column="paeruginosapa14hisat",
gene_info=pa_annotations)
## Reading the sample metadata.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## The sample definitions comprises: 29 rows(samples) and 30 columns(metadata fields).
## Warning in create_expt("sample_sheets/all_samples.xlsx", file_column =
## "paeruginosapa14hisat", : Some samples were removed when cross referencing the
## samples against the count data.
## Matched 5972 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 5979 features and 15 samples.
fData(pa_expt)[["length"]] <- abs(as.numeric(fData(pa_expt)[["start"]]) - as.numeric(fData(pa_expt)[["stop"]]))
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
<- subset_expt(pa_expt, coverage = 8000) pa_test
## The samples removed (and read coverage) when filtering samples with less than 8000 reads are:
## catheter_16 catheter_20 catheter_21 catheter_28 catheter_29 catheter_32
## 772 3664 171 7132 1165 389
## catheter_33 catheter_34
## 413 1866
## subset_expt(): There were 15, now there are 7 samples.
<- normalize_expt(pa_test, filter = "rowmax", thresh = 3, convert = "rpkm",
pa_filt column = "length", na_to_zero = TRUE)
## Removing 2023 low-maximum genes < 3: (3956 remaining).
<- write_expt(pa_test, excel = "excel/pa_rpkm_expt.xlsx") pa_written
## Deleting the file excel/pa_rpkm_expt.xlsx before writing the tables.
## Writing the first sheet, containing a legend and some summary data.
## `geom_smooth()` using formula 'y ~ x'
## varpart sees only 1 batch, adjusting the model accordingly.
## Error in .fitExtractVarPartModel(exprObj, formula, data, REML = REML, :
## Variable in formula is not found: condition
## Retrying with only condition in the model.
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
##
## expand
##
## Total:20 s
## Error in if (min(data$value) < 0) { :
## missing value where TRUE/FALSE needed
## Error in density.default(x, adjust = adj) : 'x' contains missing values
## Error in density.default(x, adjust = adj) : 'x' contains missing values
## `geom_smooth()` using formula 'y ~ x'
## varpart sees only 1 batch, adjusting the model accordingly.
## Error in .fitExtractVarPartModel(exprObj, formula, data, REML = REML, :
## Variable in formula is not found: condition
## Retrying with only condition in the model.
##
## Total:23 s
<- plot_boxplot(pa_filt, violin = TRUE, label_chars = NULL) start
## 11117 entries are 0. We are on a log scale, adding 1 to the data.
start
<- rownames(exprs(pa_filt)) %in% loci
extra_idx <- exprs(pa_filt)[extra_idx, ]
extra
<- exprs(pa_test)[extra_idx, ]
raw_extra
<- reshape2::melt(extra)
extra_melted
<- start +
final stat_summary(fun=mean, geom="point", size=1, color = "blue") +
stat_summary(fun=median, geom="point", size=1, color = "red") +
geom_point(data = as.data.frame(extra_melted), alpha = 0.4, position = position_jitter(width=0.1, height=0.0),
aes_string("x" = "Var2", "y" = "value"))
final
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 66 rows containing missing values (geom_point).
## Make a quick and dirty expt because the rownames are wrong
<- create_expt("sample_sheets/all_samples.xlsx",
mm_temp file_column="mmuscusmm38100hisat")
## Reading the sample metadata.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## The sample definitions comprises: 29 rows(samples) and 30 columns(metadata fields).
## Warning in create_expt("sample_sheets/all_samples.xlsx", file_column =
## "mmuscusmm38100hisat"): Some samples were removed when cross referencing the
## samples against the count data.
## Matched 25760 annotations and counts.
## Bringing together the count matrix and gene information.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 25760 features and 14 samples.
<- gsub(x=rownames(exprs(mm_temp)), pattern="^gene:", replacement="")
mm_ids
<- create_expt("sample_sheets/all_samples.xlsx",
mm_expt file_column="mmuscusmm38100hisat",
gene_info=mm_annotations) %>%
set_expt_batches(fact="infection") %>%
set_expt_genenames(mm_ids)
## Reading the sample metadata.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## The sample definitions comprises: 29 rows(samples) and 30 columns(metadata fields).
## Warning in create_expt("sample_sheets/all_samples.xlsx", file_column =
## "mmuscusmm38100hisat", : Some samples were removed when cross referencing the
## samples against the count data.
## Warning in create_expt("sample_sheets/all_samples.xlsx", file_column =
## "mmuscusmm38100hisat", : Even after changing the rownames in gene info, they do
## not match the count table.
## Even after changing the rownames in gene info, they do not match the count table.
## Here are the first few rownames from the count tables:
## gene:ENSMUSG00000000001, gene:ENSMUSG00000000003, gene:ENSMUSG00000000028, gene:ENSMUSG00000000037, gene:ENSMUSG00000000049, gene:ENSMUSG00000000056
## Here are the first few rownames from the gene information table:
## ENSMUSG00000000001, ENSMUSG00000000003, ENSMUSG00000020875, ENSMUSG00000000028, ENSMUSG00000048583, ENSMUSG00000000049
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 25760 features and 14 samples.
plot_boxplot(normalize_expt(mm_expt, transform="log2"))
## transform_counts: Found 132772 values equal to 0, adding 1 to the matrix.
<- normalize_expt(mm_expt, filter = "rowmax", transform = "log2") mm_filt
## Removing 6718 low-maximum genes < 2: (19042 remaining).
## transform_counts: Found 39690 values equal to 0, adding 1 to the matrix.
<- plot_boxplot(mm_filt, transform="log2", violin = TRUE)
start start
<- normalize_expt(mm_expt, convert="cpm", transform="log2", norm="vsd", filter=TRUE) mm_norm
## Removing 13049 low-count genes (12711 remaining).
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
plot_pca(mm_norm)$plot
<- normalize_expt(mm_expt, convert="cpm", transform="log2", batch="svaseq", filter=TRUE) mm_nb
## Removing 13049 low-count genes (12711 remaining).
## Setting 387 low elements to zero.
## transform_counts: Found 387 values equal to 0, adding 1 to the matrix.
plot_pca(mm_nb)$plot
<- list(
keepers "chronic_pbs" = c("chronic", "pbs"),
"mutant_pbs" = c("mutant", "pbs"),
"chronic_mutant" = c("chronic", "mutant"))
<- all_pairwise(mm_expt, model_batch = TRUE, filter=TRUE) mm_de
## This DE analysis will perform all pairwise comparisons among:
##
## chronic mutant pbs
## 6 5 3
## This analysis will include a batch factor in the model comprised of:
##
## high inter low
## 6 3 5
## This will pre-filter the input data using normalize_expt's: TRUE argument.
## Using limma's removeBatchEffect to visualize with(out) batch inclusion.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
<- combine_de_tables(
mm_table keepers=keepers,
mm_de, excel="excel/mm_compare.xlsx")
<- extract_significant_genes(mm_table, excel="excel/mm_sig.xlsx")
mm_sig <- all_gprofiler(mm_sig, species="mmusculus") mm_gp
## Redirecting to simple_gprofiler2().
## If you wish to roll the bones with the previous function, try:
## simple_gprofiler_old().
## Performing gProfiler GO search of 9 against mmusculus.
## GO search found 5 hits.
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the plotly package.
## Please report the issue at <https://github.com/plotly/plotly.R/issues>.
## Performing gProfiler KEGG search of 9 against mmusculus.
## KEGG search found 0 hits.
## Performing gProfiler REAC search of 9 against mmusculus.
## REAC search found 0 hits.
## Performing gProfiler WP search of 9 against mmusculus.
## WP search found 0 hits.
## Performing gProfiler TF search of 9 against mmusculus.
## TF search found 0 hits.
## Performing gProfiler MIRNA search of 9 against mmusculus.
## MIRNA search found 0 hits.
## Performing gProfiler HPA search of 9 against mmusculus.
## Error in gprofiler2::gost(query = gene_ids, organism = species, evcodes = evcodes, :
## Bad request, response code 400
## The HPA method failed for this organism.
## Performing gProfiler CORUM search of 9 against mmusculus.
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
## CORUM search found hits.
## Performing gProfiler HP search of 9 against mmusculus.
## HP search found 0 hits.
## Error in if (is.null(plotting) | nrow(plotting) == 0) { :
## argument is of length zero
## Redirecting to simple_gprofiler2().
## If you wish to roll the bones with the previous function, try:
## simple_gprofiler_old().
## Performing gProfiler GO search of 165 against mmusculus.
## GO search found 244 hits.
## Performing gProfiler KEGG search of 165 against mmusculus.
## KEGG search found 0 hits.
## Performing gProfiler REAC search of 165 against mmusculus.
## REAC search found 0 hits.
## Performing gProfiler WP search of 165 against mmusculus.
## WP search found 0 hits.
## Performing gProfiler TF search of 165 against mmusculus.
## TF search found 6 hits.
## Performing gProfiler MIRNA search of 165 against mmusculus.
## MIRNA search found 0 hits.
## Performing gProfiler HPA search of 165 against mmusculus.
## Error in gprofiler2::gost(query = gene_ids, organism = species, evcodes = evcodes, :
## Bad request, response code 400
## The HPA method failed for this organism.
## Performing gProfiler CORUM search of 165 against mmusculus.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 165 against mmusculus.
## HP search found 0 hits.
## Redirecting to simple_gprofiler2().
## If you wish to roll the bones with the previous function, try:
## simple_gprofiler_old().
## Performing gProfiler GO search of 89 against mmusculus.
## GO search found 114 hits.
## Performing gProfiler KEGG search of 89 against mmusculus.
## KEGG search found 0 hits.
## Performing gProfiler REAC search of 89 against mmusculus.
## REAC search found 0 hits.
## Performing gProfiler WP search of 89 against mmusculus.
## WP search found 0 hits.
## Performing gProfiler TF search of 89 against mmusculus.
## TF search found 3 hits.
## Performing gProfiler MIRNA search of 89 against mmusculus.
## MIRNA search found 1 hits.
## Performing gProfiler HPA search of 89 against mmusculus.
## Error in gprofiler2::gost(query = gene_ids, organism = species, evcodes = evcodes, :
## Bad request, response code 400
## The HPA method failed for this organism.
## Performing gProfiler CORUM search of 89 against mmusculus.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 89 against mmusculus.
## HP search found 0 hits.
## Redirecting to simple_gprofiler2().
## If you wish to roll the bones with the previous function, try:
## simple_gprofiler_old().
## Performing gProfiler GO search of 9 against mmusculus.
## GO search found 1 hits.
## Performing gProfiler KEGG search of 9 against mmusculus.
## KEGG search found 0 hits.
## Performing gProfiler REAC search of 9 against mmusculus.
## REAC search found 0 hits.
## Performing gProfiler WP search of 9 against mmusculus.
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
## WP search found hits.
## Performing gProfiler TF search of 9 against mmusculus.
## TF search found 1 hits.
## Performing gProfiler MIRNA search of 9 against mmusculus.
## MIRNA search found 0 hits.
## Performing gProfiler HPA search of 9 against mmusculus.
## Error in gprofiler2::gost(query = gene_ids, organism = species, evcodes = evcodes, :
## Bad request, response code 400
## The HPA method failed for this organism.
## Performing gProfiler CORUM search of 9 against mmusculus.
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
## CORUM search found hits.
## Performing gProfiler HP search of 9 against mmusculus.
## HP search found 0 hits.
## Error in if (is.null(plotting) | nrow(plotting) == 0) { :
## argument is of length zero
## Genes increased in the chronic vs. mutant samples
"chronic_mutant_up"]][["interactive_plots"]][["GO"]] mm_gp[[
<- all_pairwise(mm_expt, model_batch="svaseq", filter=TRUE) mm_de_sva
## This DE analysis will perform all pairwise comparisons among:
##
## chronic mutant pbs
## 6 5 3
## This analysis will include surrogate estimates from: svaseq.
## This will pre-filter the input data using normalize_expt's: TRUE argument.
## Removing 0 low-count genes (12711 remaining).
## Setting 387 low elements to zero.
## transform_counts: Found 387 values equal to 0, adding 1 to the matrix.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
<- combine_de_tables(
mm_table_sva keepers=keepers,
mm_de_sva, excel="excel/mm_compare_sva.xlsx")
## Deleting the file excel/mm_compare_sva.xlsx before writing the tables.
<- extract_significant_genes(
mm_sig_sva excel="excel/mm_sig_sva.xlsx") mm_table_sva,
## Deleting the file excel/mm_sig_sva.xlsx before writing the tables.
<- all_gprofiler(mm_sig_sva, species="mmusculus") mm_gp_sva
## Redirecting to simple_gprofiler2().
## If you wish to roll the bones with the previous function, try:
## simple_gprofiler_old().
## Performing gProfiler GO search of 162 against mmusculus.
## GO search found 452 hits.
## Performing gProfiler KEGG search of 162 against mmusculus.
## KEGG search found 5 hits.
## Performing gProfiler REAC search of 162 against mmusculus.
## REAC search found 7 hits.
## Performing gProfiler WP search of 162 against mmusculus.
## WP search found 0 hits.
## Performing gProfiler TF search of 162 against mmusculus.
## TF search found 15 hits.
## Performing gProfiler MIRNA search of 162 against mmusculus.
## MIRNA search found 0 hits.
## Performing gProfiler HPA search of 162 against mmusculus.
## Error in gprofiler2::gost(query = gene_ids, organism = species, evcodes = evcodes, :
## Bad request, response code 400
## The HPA method failed for this organism.
## Performing gProfiler CORUM search of 162 against mmusculus.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 162 against mmusculus.
## HP search found 0 hits.
## Redirecting to simple_gprofiler2().
## If you wish to roll the bones with the previous function, try:
## simple_gprofiler_old().
## Performing gProfiler GO search of 13 against mmusculus.
## GO search found 1 hits.
## Performing gProfiler KEGG search of 13 against mmusculus.
## KEGG search found 0 hits.
## Performing gProfiler REAC search of 13 against mmusculus.
## REAC search found 0 hits.
## Performing gProfiler WP search of 13 against mmusculus.
## WP search found 0 hits.
## Performing gProfiler TF search of 13 against mmusculus.
## TF search found 0 hits.
## Performing gProfiler MIRNA search of 13 against mmusculus.
## MIRNA search found 0 hits.
## Performing gProfiler HPA search of 13 against mmusculus.
## Error in gprofiler2::gost(query = gene_ids, organism = species, evcodes = evcodes, :
## Bad request, response code 400
## The HPA method failed for this organism.
## Performing gProfiler CORUM search of 13 against mmusculus.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 13 against mmusculus.
## HP search found 0 hits.
## Redirecting to simple_gprofiler2().
## If you wish to roll the bones with the previous function, try:
## simple_gprofiler_old().
## Performing gProfiler GO search of 11 against mmusculus.
## GO search found 1 hits.
## Performing gProfiler KEGG search of 11 against mmusculus.
## KEGG search found 0 hits.
## Performing gProfiler REAC search of 11 against mmusculus.
## REAC search found 0 hits.
## Performing gProfiler WP search of 11 against mmusculus.
## WP search found 0 hits.
## Performing gProfiler TF search of 11 against mmusculus.
## TF search found 0 hits.
## Performing gProfiler MIRNA search of 11 against mmusculus.
## MIRNA search found 0 hits.
## Performing gProfiler HPA search of 11 against mmusculus.
## Error in gprofiler2::gost(query = gene_ids, organism = species, evcodes = evcodes, :
## Bad request, response code 400
## The HPA method failed for this organism.
## Performing gProfiler CORUM search of 11 against mmusculus.
## No results to show
## Please make sure that the organism is correct or set significant = FALSE
## CORUM search found hits.
## Performing gProfiler HP search of 11 against mmusculus.
## HP search found 0 hits.
## Error in if (is.null(plotting) | nrow(plotting) == 0) { :
## argument is of length zero
## Redirecting to simple_gprofiler2().
## If you wish to roll the bones with the previous function, try:
## simple_gprofiler_old().
## Performing gProfiler GO search of 64 against mmusculus.
## GO search found 112 hits.
## Performing gProfiler KEGG search of 64 against mmusculus.
## KEGG search found 1 hits.
## Performing gProfiler REAC search of 64 against mmusculus.
## REAC search found 0 hits.
## Performing gProfiler WP search of 64 against mmusculus.
## WP search found 0 hits.
## Performing gProfiler TF search of 64 against mmusculus.
## TF search found 1 hits.
## Performing gProfiler MIRNA search of 64 against mmusculus.
## MIRNA search found 0 hits.
## Performing gProfiler HPA search of 64 against mmusculus.
## Error in gprofiler2::gost(query = gene_ids, organism = species, evcodes = evcodes, :
## Bad request, response code 400
## The HPA method failed for this organism.
## Performing gProfiler CORUM search of 64 against mmusculus.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 64 against mmusculus.
## HP search found 0 hits.
## Redirecting to simple_gprofiler2().
## If you wish to roll the bones with the previous function, try:
## simple_gprofiler_old().
## Performing gProfiler GO search of 854 against mmusculus.
## GO search found 1163 hits.
## Performing gProfiler KEGG search of 854 against mmusculus.
## KEGG search found 7 hits.
## Performing gProfiler REAC search of 854 against mmusculus.
## REAC search found 20 hits.
## Performing gProfiler WP search of 854 against mmusculus.
## WP search found 0 hits.
## Performing gProfiler TF search of 854 against mmusculus.
## TF search found 45 hits.
## Performing gProfiler MIRNA search of 854 against mmusculus.
## MIRNA search found 2 hits.
## Performing gProfiler HPA search of 854 against mmusculus.
## Error in gprofiler2::gost(query = gene_ids, organism = species, evcodes = evcodes, :
## Bad request, response code 400
## The HPA method failed for this organism.
## Performing gProfiler CORUM search of 854 against mmusculus.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 854 against mmusculus.
## HP search found 9 hits.
## Redirecting to simple_gprofiler2().
## If you wish to roll the bones with the previous function, try:
## simple_gprofiler_old().
## Performing gProfiler GO search of 72 against mmusculus.
## GO search found 1 hits.
## Performing gProfiler KEGG search of 72 against mmusculus.
## KEGG search found 1 hits.
## Performing gProfiler REAC search of 72 against mmusculus.
## REAC search found 2 hits.
## Performing gProfiler WP search of 72 against mmusculus.
## WP search found 0 hits.
## Performing gProfiler TF search of 72 against mmusculus.
## TF search found 13 hits.
## Performing gProfiler MIRNA search of 72 against mmusculus.
## MIRNA search found 0 hits.
## Performing gProfiler HPA search of 72 against mmusculus.
## Error in gprofiler2::gost(query = gene_ids, organism = species, evcodes = evcodes, :
## Bad request, response code 400
## The HPA method failed for this organism.
## Performing gProfiler CORUM search of 72 against mmusculus.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 72 against mmusculus.
## HP search found 0 hits.
## Genes increased in the chronic vs. mutant samples
"chronic_mutant_up"]][["interactive_plots"]][["GO"]] mm_gp_sva[[
"chronic_mutant_up"]][["pvalue_plots"]][["MF"]] mm_gp_sva[[
"chronic_mutant_up"]][["pvalue_plots"]][["BP"]] mm_gp_sva[[
"chronic_mutant_up"]][["pvalue_plots"]][["TF"]] mm_gp_sva[[
"chronic_mutant_down"]][["interactive_plots"]][["GO"]] mm_gp_sva[[
"chronic_mutant_down"]][["pvalue_plots"]][["BP"]] mm_gp_sva[[
"chronic_mutant_down"]][["pvalue_plots"]][["TF"]] mm_gp_sva[[
## Genes increased in the chronic vs. mutant samples
"chronic_pbs_up"]][["interactive_plots"]][["GO"]] mm_gp_sva[[
"chronic_pbs_up"]][["pvalue_plots"]][["MF"]] mm_gp_sva[[
"chronic_pbs_up"]][["pvalue_plots"]][["BP"]] mm_gp_sva[[
"chronic_pbs_up"]][["pvalue_plots"]][["REAC"]] mm_gp_sva[[
"chronic_pbs_up"]][["pvalue_plots"]][["TF"]] mm_gp_sva[[
"chronic_pbs_down"]][["interactive_plots"]][["GO"]] mm_gp_sva[[
"chronic_pbs_down"]][["pvalue_plots"]][["MF"]] mm_gp_sva[[
"chronic_pbs_down"]][["pvalue_plots"]][["TF"]] mm_gp_sva[[
## NULL
## Genes increased in the mutant vs. mutant samples
"mutant_pbs_up"]][["interactive_plots"]][["GO"]] mm_gp_sva[[
"mutant_pbs_down"]][["interactive_plots"]][["GO"]] mm_gp_sva[[
"mutant_pbs_down"]][["pvalue_plots"]][["MF"]] mm_gp_sva[[
"mutant_pbs_down"]][["pvalue_plots"]][["TF"]] mm_gp_sva[[
There are a few gene sets provided by mSigDB:
The filenames I am using for now: m2.all.v2022.1.Mm.entrez.gmt msigdb_v2022.1.Mm.xml
<- load_gmt_signatures(signatures = "reference/m2.all.v2022.1.Mm.entrez.gmt")
msig_m2 <- simple_gsva(mm_expt, signatures = msig_m2, orgdb = "org.Mm.eg.db", msig_xml = "reference/msigdb_v2022.1.Mm.xml") mm_c2
## Converting the rownames() of the expressionset to ENTREZID.
## 4052 ENSEMBL ID's didn't have a matching ENTEREZ ID. Dropping them now.
## Before conversion, the expressionset has 25760 entries.
## After conversion, the expressionset has 21970 entries.
## Adding annotations from reference/msigdb_v2022.1.Mm.xml.
## Not subsetting the msigdb metadata, the wanted_meta argument was 'all'.
## The downloaded msigdb contained 2615 rownames shared with the gsva result out of 2615.
<- get_sig_gsva_categories(
mm_c2_sig
mm_c2,excel = "excel/mm_c2.xlsx")
## Starting limma pairwise comparison.
## libsize was not specified, this parameter has profound effects on limma's result.
## Using the libsize from expt$libsize.
## Limma step 1/6: choosing model.
## Choosing the non-intercept containing model.
## Assuming this data is similar to a micro array and not performign voom.
## Limma step 3/6: running lmFit with method: ls.
## Limma step 4/6: making and fitting contrasts with no intercept. (~ 0 + factors)
## Limma step 5/6: Running eBayes with robust = FALSE and trend = FALSE.
## Limma step 6/6: Writing limma outputs.
## Limma step 6/6: 1/3: Creating table: mutant_vs_chronic. Adjust = BH
## Limma step 6/6: 2/3: Creating table: pbs_vs_chronic. Adjust = BH
## Limma step 6/6: 3/3: Creating table: pbs_vs_mutant. Adjust = BH
## Limma step 6/6: 1/3: Creating table: chronic. Adjust = BH
## Limma step 6/6: 2/3: Creating table: mutant. Adjust = BH
## Limma step 6/6: 3/3: Creating table: pbs. Adjust = BH
## The factor chronic has 6 rows.
## The factor mutant has 5 rows.
## The factor pbs has 3 rows.
## Testing each factor against the others.
## Scoring chronic against everything else.
## Scoring mutant against everything else.
## Scoring pbs against everything else.
## Deleting the file excel/mm_c2.xlsx before writing the tables.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : mce2,68501|Cabc1 ... s1,Hmgcs1,208715 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : tn4,68585|142798 ... 1_s_at,Dbp,13170 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 0096515,ENSMUSG0 ... SMUSG00000118346 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : ,52120|ENSMUSG00 ... 6,Tmem179b,67706 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 0000095416,Ighv1 ... 6,Tmem179b,67706 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 96010,H4f16,3203 ... 118462,Lhb,16866 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 00000050808,Muc1 ... 8672,Muc4,140474 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 037251,Pomk,7465 ... 8672,Muc4,140474 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : corl,209707|1737 ... 477,Cyp4b1,13120 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : sta3,408196|ENSG ... 9,Ighv5-9,544896 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 0078652,ENSMUSG0 ... SMUSG00000118462 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : ENSMUSG000000320 ... 118462,Lhb,16866 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : mce2,68501|Cabc1 ... s1,Hmgcs1,208715 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : tn4,68585|142798 ... 1_s_at,Dbp,13170 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 96010,H4f16,3203 ... 118462,Lhb,16866 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 037251,Pomk,7465 ... 8672,Muc4,140474 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 00000050808,Muc1 ... 8672,Muc4,140474 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 0096515,ENSMUSG0 ... SMUSG00000118346 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : ,52120|ENSMUSG00 ... 6,Tmem179b,67706 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 0000095416,Ighv1 ... 6,Tmem179b,67706 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 0078652,ENSMUSG0 ... SMUSG00000118462 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : ENSMUSG000000320 ... 118462,Lhb,16866 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : corl,209707|1737 ... 477,Cyp4b1,13120 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : sta3,408196|ENSG ... 9,Ighv5-9,544896 is truncated.
## Number of characters exeed the limit of 32767.
<- load_gmt_signatures(signatures = "reference/m8.all.v2022.1.Mm.entrez.gmt")
msig_m8 <- simple_gsva(mm_expt, signatures = msig_m8, orgdb = "org.Mm.eg.db", msig_xml = "reference/msigdb_v2022.1.Mm.xml") mm_c8
## Converting the rownames() of the expressionset to ENTREZID.
## 4052 ENSEMBL ID's didn't have a matching ENTEREZ ID. Dropping them now.
## Before conversion, the expressionset has 25760 entries.
## After conversion, the expressionset has 21970 entries.
## Adding annotations from reference/msigdb_v2022.1.Mm.xml.
## Not subsetting the msigdb metadata, the wanted_meta argument was 'all'.
## The downloaded msigdb contained 212 rownames shared with the gsva result out of 212.
<- get_sig_gsva_categories(
mm_c8_sig
mm_c8,excel = "excel/mm_c8.xlsx")
## Starting limma pairwise comparison.
## libsize was not specified, this parameter has profound effects on limma's result.
## Using the libsize from expt$libsize.
## Limma step 1/6: choosing model.
## Choosing the non-intercept containing model.
## Assuming this data is similar to a micro array and not performign voom.
## Limma step 3/6: running lmFit with method: ls.
## Limma step 4/6: making and fitting contrasts with no intercept. (~ 0 + factors)
## Limma step 5/6: Running eBayes with robust = FALSE and trend = FALSE.
## Limma step 6/6: Writing limma outputs.
## Limma step 6/6: 1/3: Creating table: mutant_vs_chronic. Adjust = BH
## Limma step 6/6: 2/3: Creating table: pbs_vs_chronic. Adjust = BH
## Limma step 6/6: 3/3: Creating table: pbs_vs_mutant. Adjust = BH
## Limma step 6/6: 1/3: Creating table: chronic. Adjust = BH
## Limma step 6/6: 2/3: Creating table: mutant. Adjust = BH
## Limma step 6/6: 3/3: Creating table: pbs. Adjust = BH
## The factor chronic has 6 rows.
## The factor mutant has 5 rows.
## The factor pbs has 3 rows.
## Testing each factor against the others.
## Scoring chronic against everything else.
## Scoring mutant against everything else.
## Scoring pbs against everything else.
## Deleting the file excel/mm_c8.xlsx before writing the tables.
::pander(sessionInfo()) pander
R version 4.2.1 (2022-06-23)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_US.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_US.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_US.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.UTF-8 and LC_IDENTIFICATION=C
attached base packages: splines, stats4, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: org.Hs.eg.db(v.3.15.0), AnnotationDbi(v.1.58.0), org.Mm.eg.db(v.3.15.0), ruv(v.0.9.7.1), lme4(v.1.1-30), Matrix(v.1.5-1), BiocParallel(v.1.30.4), variancePartition(v.1.26.0), ggplot2(v.3.3.6), hpgltools(v.1.0), testthat(v.3.1.5), reticulate(v.1.26), SummarizedExperiment(v.1.26.1), GenomicRanges(v.1.48.0), GenomeInfoDb(v.1.32.4), IRanges(v.2.30.1), S4Vectors(v.0.34.0), MatrixGenerics(v.1.8.1), matrixStats(v.0.62.0), Biobase(v.2.56.0) and BiocGenerics(v.0.42.0)
loaded via a namespace (and not attached): rappdirs(v.0.3.3), rtracklayer(v.1.56.1), R.methodsS3(v.1.8.2), tidyr(v.1.2.1), bit64(v.4.0.5), knitr(v.1.40), irlba(v.2.3.5.1), R.utils(v.2.12.0), DelayedArray(v.0.22.0), data.table(v.1.14.2), KEGGREST(v.1.36.3), RCurl(v.1.98-1.9), doParallel(v.1.0.17), generics(v.0.1.3), ScaledMatrix(v.1.4.1), preprocessCore(v.1.58.0), GenomicFeatures(v.1.48.4), callr(v.3.7.2), RhpcBLASctl(v.0.21-247.1), usethis(v.2.1.6), RSQLite(v.2.2.18), shadowtext(v.0.1.2), bit(v.4.0.4), tzdb(v.0.3.0), enrichplot(v.1.16.2), xml2(v.1.3.3), lubridate(v.1.8.0), httpuv(v.1.6.6), assertthat(v.0.2.1), viridis(v.0.6.2), xfun(v.0.33), hms(v.1.1.2), jquerylib(v.0.1.4), IHW(v.1.24.0), evaluate(v.0.17), promises(v.1.2.0.1), DEoptimR(v.1.0-11), fansi(v.1.0.3), restfulr(v.0.0.15), progress(v.1.2.2), caTools(v.1.18.2), dbplyr(v.2.2.1), geneplotter(v.1.74.0), igraph(v.1.3.5), DBI(v.1.1.3), htmlwidgets(v.1.5.4), purrr(v.0.3.5), ellipsis(v.0.3.2), crosstalk(v.1.2.0), selectr(v.0.4-2), dplyr(v.1.0.10), backports(v.1.4.1), gprofiler2(v.0.2.1), annotate(v.1.74.0), aod(v.1.3.2), sparseMatrixStats(v.1.8.0), biomaRt(v.2.52.0), SingleCellExperiment(v.1.18.1), vctrs(v.0.4.2), remotes(v.2.4.2), cachem(v.1.0.6), withr(v.2.5.0), ggforce(v.0.4.1), robustbase(v.0.95-0), vroom(v.1.6.0), GenomicAlignments(v.1.32.1), treeio(v.1.20.2), fdrtool(v.1.2.17), prettyunits(v.1.1.1), DOSE(v.3.22.1), ape(v.5.6-2), lazyeval(v.0.2.2), crayon(v.1.5.2), genefilter(v.1.78.0), slam(v.0.1-50), labeling(v.0.4.2), edgeR(v.3.38.4), pkgconfig(v.2.0.3), tweenr(v.2.0.2), nlme(v.3.1-160), pkgload(v.1.3.0), devtools(v.2.4.5), rlang(v.1.0.6), lifecycle(v.1.0.3), miniUI(v.0.1.1.1), downloader(v.0.4), filelock(v.1.0.2), BiocFileCache(v.2.4.0), rsvd(v.1.0.5), directlabels(v.2021.1.13), rprojroot(v.2.0.3), polyclip(v.1.10-0), GSVA(v.1.44.5), graph(v.1.74.0), aplot(v.0.1.8), lpsymphony(v.1.24.0), Rhdf5lib(v.1.18.2), boot(v.1.3-28), processx(v.3.7.0), png(v.0.1-7), viridisLite(v.0.4.1), rjson(v.0.2.21), bitops(v.1.0-7), R.oo(v.1.25.0), rhdf5filters(v.1.8.0), KernSmooth(v.2.23-20), pander(v.0.6.5), Biostrings(v.2.64.1), DelayedMatrixStats(v.1.18.1), blob(v.1.2.3), stringr(v.1.4.1), qvalue(v.2.28.0), readr(v.2.1.3), gridGraphics(v.0.5-1), beachmat(v.2.12.0), scales(v.1.2.1), memoise(v.2.0.1), GSEABase(v.1.58.0), magrittr(v.2.0.3), plyr(v.1.8.7), gplots(v.3.1.3), zlibbioc(v.1.42.0), compiler(v.4.2.1), scatterpie(v.0.1.8), BiocIO(v.1.6.0), RColorBrewer(v.1.1-3), DESeq2(v.1.36.0), Rsamtools(v.2.12.0), cli(v.3.4.1), XVector(v.0.36.0), urlchecker(v.1.0.1), patchwork(v.1.1.2), ps(v.1.7.1), MASS(v.7.3-58.1), mgcv(v.1.8-40), tidyselect(v.1.2.0), stringi(v.1.7.8), highr(v.0.9), yaml(v.2.3.5), GOSemSim(v.2.22.0), BiocSingular(v.1.12.0), locfit(v.1.5-9.6), ggrepel(v.0.9.1), grid(v.4.2.1), sass(v.0.4.2), fastmatch(v.1.1-3), tools(v.4.2.1), parallel(v.4.2.1), rstudioapi(v.0.14), foreach(v.1.5.2), gridExtra(v.2.3), Rtsne(v.0.16), farver(v.2.1.1), ggraph(v.2.1.0), digest(v.0.6.29), shiny(v.1.7.2), quadprog(v.1.5-8), Rcpp(v.1.0.9), broom(v.1.0.1), later(v.1.3.0), httr(v.1.4.4), Rdpack(v.2.4), colorspace(v.2.0-3), rvest(v.1.0.3), brio(v.1.1.3), XML(v.3.99-0.11), fs(v.1.5.2), RBGL(v.1.72.0), yulab.utils(v.0.0.5), PROPER(v.1.28.0), tidytree(v.0.4.1), graphlayouts(v.0.8.2), ggplotify(v.0.1.0), plotly(v.4.10.0), sessioninfo(v.1.2.2), xtable(v.1.8-4), jsonlite(v.1.8.2), nloptr(v.2.0.3), ggtree(v.3.4.4), tidygraph(v.1.2.2), corpcor(v.1.6.10), ggfun(v.0.0.7), Vennerable(v.3.1.0.9000), R6(v.2.5.1), profvis(v.0.3.7), pillar(v.1.8.1), htmltools(v.0.5.3), mime(v.0.12), glue(v.1.6.2), fastmap(v.1.1.0), minqa(v.1.2.4), clusterProfiler(v.4.4.4), codetools(v.0.2-18), fgsea(v.1.22.0), pkgbuild(v.1.3.1), utf8(v.1.2.2), lattice(v.0.20-45), bslib(v.0.4.0), tibble(v.3.1.8), sva(v.3.44.0), pbkrtest(v.0.5.1), curl(v.4.3.3), gtools(v.3.9.3), zip(v.2.2.1), GO.db(v.3.15.0), openxlsx(v.4.2.5), survival(v.3.4-0), limma(v.3.52.4), rmarkdown(v.2.17), desc(v.1.4.2), munsell(v.0.5.0), rhdf5(v.2.40.0), DO.db(v.2.9), fastcluster(v.1.2.3), GenomeInfoDbData(v.1.2.8), iterators(v.1.0.14), HDF5Array(v.1.24.2), reshape2(v.1.4.4), gtable(v.0.3.1) and rbibutils(v.2.2.9)
message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset b0dcbf17fefc74206e61386580411962bf6beb4f
## This is hpgltools commit: Fri Oct 7 14:32:20 2022 -0400: b0dcbf17fefc74206e61386580411962bf6beb4f
<- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
this_save message(paste0("Saving to ", this_save))
## Saving to index-v20221025.rda.xz
<- sm(saveme(filename=this_save)) tmp