There are a few methods of importing annotation data into R. The following are two attempts, the second is currently being used in these analyses.
EuPathDB::download_eupath_metadata(webservice="tritrypdb", eu_version=46) meta <-
## Appending to an existing file: EuPathDB/metadata/biocv3.12_tritrypdbv50_metadata.csv
## Appending to an existing file: EuPathDB/metadata/GRanges_biocv3.12_tritrypdbv50_metadata.csv
## Appending to an existing file: EuPathDB/metadata/OrgDb_biocv3.12_tritrypdbv50_metadata.csv
## Appending to an existing file: EuPathDB/metadata/TxDb_biocv3.12_tritrypdbv50_metadata.csv
## Appending to an existing file: EuPathDB/metadata/OrganismDbi_biocv3.12_tritrypdbv50_metadata.csv
## Appending to an existing file: EuPathDB/metadata/BSgenome_biocv3.12_tritrypdbv50_metadata.csv
## Appending to an existing file: EuPathDB/metadata/biocv3.12_tritrypdbv50_invalid_metadata.csv
## Appending to an existing file: EuPathDB/metadata/GRanges_biocv3.12_tritrypdbv50_invalid_metadata.csv
## Appending to an existing file: EuPathDB/metadata/OrgDb_biocv3.12_tritrypdbv50_invalid_metadata.csv
## Appending to an existing file: EuPathDB/metadata/TxDb_biocv3.12_tritrypdbv50_invalid_metadata.csv
## Appending to an existing file: EuPathDB/metadata/OrganismDbi_biocv3.12_tritrypdbv50_invalid_metadata.csv
## Appending to an existing file: EuPathDB/metadata/BSgenome_biocv3.12_tritrypdbv50_invalid_metadata.csv
EuPathDB::get_eupath_entry(species="Leishmania major", metadata=meta) lm_entry <-
## Found the following hits: Leishmania major strain Friedlin, Leishmania major strain LV39c5, Leishmania major strain SD 75.1, choosing the first.
## Using: Leishmania major strain Friedlin.
EuPathDB::get_eupath_entry(species="Leishmania panamensis", metadata=meta) lp_entry <-
## Found the following hits: Leishmania panamensis MHOM/COL/81/L13, Leishmania panamensis strain MHOM/PA/94/PSC-1, choosing the first.
## Using: Leishmania panamensis MHOM/COL/81/L13.
EuPathDB::get_eupath_entry(species="Leishmania mexicana", metadata=meta) lmex_entry <-
## Found: Leishmania mexicana MHOM/GT/2001/U1103
EuPathDB::get_eupath_entry(species="Leishmania amazonensis", metadata=meta) lama_entry <-
## Found: Leishmania amazonensis MHOM/BR/71973/M2269
EuPathDB::get_eupath_entry(species="2904", metadata=meta) lb_entry <-
## Found the following hits: Leishmania braziliensis MHOM/BR/75/M2904, Leishmania braziliensis MHOM/BR/75/M2904 2019, choosing the first.
## Using: Leishmania braziliensis MHOM/BR/75/M2904.
EuPathDB::get_eupath_entry(species="donovani", metadata=meta) ld_entry <-
## Found the following hits: Leishmania donovani BPK282A1, Leishmania donovani strain BHU 1220, Leishmania donovani CL-SL, Leishmania donovani strain LV9, choosing the first.
## Using: Leishmania donovani BPK282A1.
EuPathDB::get_eupath_entry(species="Crith", metadata=meta) crit_entry <-
## Found: Crithidia fasciculata strain Cf-Cl
testing_panamensis <- EuPathDB::make_eupath_orgdb(entry=lp_entry)
testing_braziliensis <- EuPathDB::make_eupath_orgdb(entry=lb_entry)
testing_donovani <- EuPathDB::make_eupath_orgdb(entry=ld_entry)
testing_mexicana <- EuPathDB::make_eupath_orgdb(entry=lmex_entry)
testing_major <- EuPathDB::make_eupath_orgdb(entry=lm_entry)
testing_crith <- EuPathDB::make_eupath_orgdb(entry=crit_entry)
Assuming the above packages got created, we may load them and extract the annotation data.
c("annot_cds_length", "annot_chromosome", "annot_gene_entrez_id",
wanted_fields <-"annot_gene_name", "annot_strand", "gid", "go_go_id",
"go_go_term_name", "go_ontology",
"interpro_description" ,"interpro_e_value", "type_gene_type")
## The last set of these I created with the old EuPathDB was version 46
## I also have new ones for version 49, but figured I should stick with the old ones for now.
sm(EuPathDB::load_eupath_annotations(query=lm_entry, eu_version=46))
lm_org <- sm(EuPathDB::load_eupath_annotations(query=lp_entry, eu_version=46))
lp_org <- sm(EuPathDB::load_eupath_annotations(query=lb_entry, eu_version=46))
lb_org <- sm(EuPathDB::load_eupath_annotations(query=ld_entry, eu_version=46))
ld_org <- sm(EuPathDB::load_eupath_annotations(query=lmex_entry, eu_version=46))
lmex_org <- sm(EuPathDB::load_eupath_annotations(query=crit_entry, eu_version=46)) cf_ort <-
In contrast, it is possible to load most annotations of interest directly from the gff files used in the alignments. More in-depth information for the human transcriptome may be extracted from biomart.
## The old way of getting genome/annotation data
"reference/lpanamensis.gff"
lp_gff <- "reference/lbraziliensis.gff"
lb_gff <- "reference/hsapiens.gtf"
hs_gff <-
"reference/lpanamensis.fasta.xz"
lp_fasta <- "reference/lbraziliensis.fasta.xz"
lb_fasta <- "reference/hsapiens.fasta.xz"
hs_fasta <-
sm(load_gff_annotations(lp_gff, type="gene"))
lp_annotations <-rownames(lp_annotations) <- paste0("exon_", lp_annotations$web_id, ".1")
sm(load_gff_annotations(lb_gff, type="gene"))
lb_annotations <-
sm(load_gff_annotations(hs_gff, id_col="gene_id"))
hs_gff_annot <- sm(load_biomart_annotations())$annotation
hs_annotations <-$ID <- hs_annotations$geneID
hs_annotationsrownames(hs_annotations) <- make.names(hs_annotations[["ensembl_gene_id"]], unique=TRUE)
dim(hs_annotations)
## [1] 197995 12
plot_histogram(lp_annotations[["width"]])
lp_size_dist <- lp_size_dist
plot_histogram(hs_annotations[["cds_length"]])
hs_size_dist <-+
hs_size_dist ggplot2::scale_x_continuous(limits=c(0, 20000))
## Warning: Removed 103681 rows containing non-finite values (stat_bin).
## Warning: Removed 103681 rows containing non-finite values (stat_density).
## Warning: Removed 2 rows containing missing values (geom_bar).
Annotation for gene ontologies may be gathered from a similarly large number of sources. The following are a couple.
## Try using biomart
sm(load_biomart_go())
hs_go <-## or the org.Hs.eg.db sqlite database
sm(library("Homo.sapiens"))
tt <- Homo.sapiens
hs <-##hs_go_ensembl <- load_orgdb_go(hs, hs_annotations$geneID)
##dim(hs_go_biomart)
##dim(hs_go_ensembl)
##hs_goids <- hs_go_biomart
## While testing, I called this desc, that will need to change.
##lp_tooltips <- make_tooltips(lp_annotations)
##lb_tooltips <- make_tooltips(lb_annotations)
lp_annotations[, c("ID", "width")]
lp_lengths <- lb_annotations[, c("ID", "width")]
lb_lengths <- hs_annotations[, c("ensembl_gene_id", "cds_length")]
hs_lengths <-colnames(hs_lengths) <- c("ID", "width")
read.csv(file="reference/lpan_go.txt.xz", sep="\t", header=FALSE)
lp_goids <- read.csv(file="reference/lbraz_go.txt.xz", sep="\t", header=FALSE)
lb_goids <-colnames(lp_goids) <- c("ID","GO","ont","name","source","tag")
colnames(lb_goids) <- c("ID","GO","ont","name","source","tag")
The PBMC experiment has samples across 2 contexts, the host and parasite. The following block sets up one experiment for each. If you open the all_samples-species.xlsx files, you will note immediately that a few different attempts were made at ascertaining the most likely experimental factors that contributed to the readily apparent batch effects.
Start out by extracting the relevant data and querying it to see the general quality.
This first block sets the names of the samples and colors. It also makes separate data sets for:
hs_annotations
hs_final_annotations <- hs_final_annotations[, c("ensembl_transcript_id", "ensembl_gene_id", "cds_length",
hs_final_annotations <-"hgnc_symbol", "description", "gene_biotype")]
$rn <- rownames(hs_final_annotations)
hs_final_annotations "New experimental design factors by snp added 2016-09-20"
note <-rownames(hs_final_annotations) <- hs_final_annotations$rn
$rn <- NULL
hs_final_annotations
hs_final_annotations[, c("ensembl_gene_id", "cds_length")]
hs_length <-colnames(hs_length) <- c("ID", "length")
create_expt(
hs_pbmc <-metadata=glue::glue("sample_sheets/pbmc_samples_{ver}.xlsx"),
gene_info=hs_final_annotations,
file_column="humanfile",
notes=note)
## Reading the sample metadata.
## Dropped 1 rows from the sample metadata because they were blank.
## The sample definitions comprises: 21 rows(samples) and 38 columns(metadata fields).
## Reading count tables.
## Reading count files with read.table().
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0630/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0631/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0632/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0633/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0634/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0635/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0636/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0650/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0651/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0652/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0653/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0654/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0655/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0656/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0657/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0658/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0659/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0660/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0661/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0662/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0663/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## Finished reading count data.
## Matched 43897 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 51041 rows and 21 columns.
create_expt(
hs_inf <-metadata=glue::glue("sample_sheets/pbmc_samples_removetwo_{ver}.xlsx"),
gene_info=hs_final_annotations,
file_column="humanfile",
notes=note)
## Reading the sample metadata.
## Dropped 1 rows from the sample metadata because they were blank.
## The sample definitions comprises: 15 rows(samples) and 33 columns(metadata fields).
## Reading count tables.
## Reading count files with read.table().
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0630/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0631/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0632/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0635/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0636/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0650/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0651/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0652/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0655/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0656/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0657/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0658/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0659/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0662/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## /mnt/sshfs_10186/cbcbsub01/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/lpanamensis_2016/preprocessing/hpgl0663/outputs/tophat_hsapiens/accepted_paired.count.xz contains 51046 rows and merges to 51046 rows.
## Finished reading count data.
## Matched 43897 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 51041 rows and 15 columns.
c("#009900","#990000", "#000099")
chosen_colors <-names(chosen_colors) <- c("uninf","chr","sh")
set_expt_colors(hs_pbmc, colors=chosen_colors) hs_pbmc <-
## The new colors are a character, changing according to condition.
set_expt_colors(hs_inf, colors=chosen_colors) hs_inf <-
## The new colors are a character, changing according to condition.
paste0(pData(hs_inf)$label, "_", pData(hs_inf)$donor)
inf_newnames <- set_expt_samplenames(hs_inf, newnames=inf_newnames)
hs_inf <-
exclude_genes_expt(hs_pbmc, method="keep",
hs_cds_pbmc <-column="gene_biotype",
patterns="protein_coding")
## Before removal, there were 51041 entries.
## Now there are 18847 entries.
## Percent kept: 94.947, 94.996, 95.664, 95.257, 94.961, 94.865, 94.772, 94.631, 96.357, 96.235, 95.806, 96.247, 96.226, 96.281, 93.932, 94.758, 94.615, 94.553, 94.932, 94.817, 95.072
## Percent removed: 5.053, 5.004, 4.336, 4.743, 5.039, 5.135, 5.228, 5.369, 3.643, 3.765, 4.194, 3.753, 3.774, 3.719, 6.068, 5.242, 5.385, 5.447, 5.068, 5.183, 4.928
exclude_genes_expt(hs_inf, method="keep",
hs_cds_inf <-column="gene_biotype",
patterns="protein_coding")
## Before removal, there were 51041 entries.
## Now there are 18847 entries.
## Percent kept: 94.947, 94.996, 95.664, 94.865, 94.772, 94.631, 96.357, 96.235, 96.226, 96.281, 93.932, 94.758, 94.615, 94.817, 95.072
## Percent removed: 5.053, 5.004, 4.336, 5.135, 5.228, 5.369, 3.643, 3.765, 3.774, 3.719, 6.068, 5.242, 5.385, 5.183, 4.928
sm(create_expt(
lp_pbmc_all <-metadata=glue::glue("sample_sheets/pbmc_samples_{ver}.xlsx"),
gene_info=lp_annotations, file_column="parasitefile"))
sm(create_expt(
lp_pbmc_inf <-metadata=glue::glue("sample_sheets/pbmc_samples_removetwo_{ver}.xlsx"),
gene_info=lp_annotations, file_column="parasitefile"))
sm(graph_metrics(hs_cds_inf)) hs_inf_met <-
write_expt(
hs_inf_write <-norm="quant", convert="cpm",
hs_cds_inf, transform="log2", batch="svaseq",
filter=TRUE,
excel=glue::glue("excel/pbmc_only_infected-v{ver}.xlsx"))
## Writing the first sheet, containing a legend and some summary data.
## Writing the raw reads.
## Graphing the raw reads.
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete
## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete
## Attempting mixed linear model with: ~ condition + batch
## Fitting the expressionset to the model, this is slow.
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
##
## expand
##
## Total:103 s
## Placing factor: condition at the beginning of the model.
## Writing the normalized reads.
## Graphing the normalized reads.
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete
## Warning in doTryCatch(return(expr), name, parentenv, handler): display list
## redraw incomplete
## Attempting mixed linear model with: ~ condition + batch
## Fitting the expressionset to the model, this is slow.
##
## Total:81 s
## Placing factor: condition at the beginning of the model.
## Writing the median reads by factor.
## Step 1, all samples (cds only) including uninfected samples.
normalize_expt(hs_cds_pbmc, transform="log2", convert="cpm",
pbmc_norm <-norm="quant", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 6745 low-count genes (12102 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## The method is: raw.
## Step 4: not doing batch correction.
## Step 4: transforming the data with log2.
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
plot_pca(pbmc_norm)$plot
plt <-pp(file=glue::glue("images/pbmc_step1-v{ver}.pdf"), image=plt)
## Writing the image to: images/pbmc_step1-v202102.pdf and calling dev.off().
plt
## Step 1a.
normalize_expt(hs_cds_pbmc, convert="cpm",
pbmc_nb <-filter=TRUE, batch="svaseq")
## This function will replace the expt$expressionset slot with:
## svaseq(cpm(cbcb(data)))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 6745 low-count genes (12102 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 248341 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 325 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 5476 entries are 0<x<1: 2%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 92 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Step 4: not transforming the data.
normalize_expt(pbmc_nb, transform="log2") pbmc_nb <-
## This function will replace the expt$expressionset slot with:
## log2(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## The method is: raw.
## Step 4: not doing batch correction.
## Step 4: transforming the data with log2.
## transform_counts: Found 92 values equal to 0, adding 1 to the matrix.
plot_pca(pbmc_nb)$plot
plt <-pp(file=glue::glue("images/pbmc_step1a-v{ver}-d{rundate}.pdf"), image=plt)
## Writing the image to: images/pbmc_step1a-v202102-d20210309.pdf and calling dev.off().
plt
## Step 2, drop the uninfected samples and repeat.
subset_expt(hs_cds_pbmc, subset="condition!='uninf'") hs_minus <-
## Using a subset expression.
## There were 21, now there are 18 samples.
## minus_norm <- normalize_expt(hs_minus, transform="log2", convert="cpm", norm="quant", filter=TRUE)
normalize_expt(hs_minus, convert="cpm", norm="quant", filter=TRUE) minus_norm <-
## This function will replace the expt$expressionset slot with:
## cpm(quant(cbcb(data)))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 6882 low-count genes (11965 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## The method is: raw.
## Step 4: not doing batch correction.
## Step 4: not transforming the data.
normalize_expt(minus_norm, transform="log2") minus_norm <-
## This function will replace the expt$expressionset slot with:
## log2(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## The method is: raw.
## Step 4: not doing batch correction.
## Step 4: transforming the data with log2.
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
plot_pca(minus_norm)$plot
plt <-pp(file=glue::glue("images/pbmc_step2-v{ver}.pdf"), image=plt)
## Writing the image to: images/pbmc_step2-v202102.pdf and calling dev.off().
plt
## Step 2a
##minus_nb <- normalize_expt(hs_minus, transform="log2", convert="cpm", filter=TRUE, batch="svaseq")
normalize_expt(hs_minus, convert="cpm", filter=TRUE, batch="svaseq") minus_nb <-
## This function will replace the expt$expressionset slot with:
## svaseq(cpm(cbcb(data)))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 6882 low-count genes (11965 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 211543 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 233 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 3594 entries are 0<x<1: 2%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 20 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Step 4: not transforming the data.
normalize_expt(minus_nb, transform="log2") minus_nb <-
## This function will replace the expt$expressionset slot with:
## log2(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## The method is: raw.
## Step 4: not doing batch correction.
## Step 4: transforming the data with log2.
## transform_counts: Found 20 values equal to 0, adding 1 to the matrix.
plot_pca(minus_nb)$plot
plt <-pp(file=glue::glue("images/pbmc_step2a-v{ver}.pdf"), image=plt)
## Writing the image to: images/pbmc_step2a-v202102.pdf and calling dev.off().
plt
## Step 3, drop the problematic samples and repeat.
subset_expt(hs_cds_inf, subset="condition!='uninf'") minus_minus <-
## Using a subset expression.
## There were 15, now there are 12 samples.
normalize_expt(minus_minus, transform="log2", convert="cpm",
minusminus_norm <-norm="quant", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 6985 low-count genes (11862 remaining).
## Step 2: normalizing the data with quant.
## Step 3: converting the data with cpm.
## The method is: raw.
## Step 4: not doing batch correction.
## Step 4: transforming the data with log2.
## transform_counts: Found 16 values equal to 0, adding 1 to the matrix.
plot_pca(minusminus_norm)$plot
plt <-pp(file=glue::glue("images/pbmc_step3-v{ver}.pdf"), image=plt)
## Writing the image to: images/pbmc_step3-v202102.pdf and calling dev.off().
plt
## Step 4, add sva to step 3.
normalize_expt(minus_minus, transform="log2", convert="cpm",
minusminus_batch <-batch="svaseq", filter=TRUE)
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 6985 low-count genes (11862 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 140275 entries are x>1: 99%.
## batch_counts: Before batch/surrogate estimation, 140 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 1929 entries are 0<x<1: 1%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 8 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Step 4: transforming the data with log2.
## transform_counts: Found 8 values equal to 0, adding 1 to the matrix.
plot_pca(minusminus_batch)$plot
plt <-pp(file=glue::glue("images/pbmc_step4-v{ver}.pdf"), image=plt)
## Writing the image to: images/pbmc_step4-v202102.pdf and calling dev.off().
plt
## An alternate version, where the transformation is explicitly last
normalize_expt(minus_minus, convert="cpm", batch="svaseq", filter=TRUE) minusminus_batch <-
## This function will replace the expt$expressionset slot with:
## svaseq(cpm(cbcb(data)))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 6985 low-count genes (11862 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 140275 entries are x>1: 99%.
## batch_counts: Before batch/surrogate estimation, 140 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 1929 entries are 0<x<1: 1%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 8 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Step 4: not transforming the data.
normalize_expt(minusminus_batch, transform="log2") minusminus_batch <-
## This function will replace the expt$expressionset slot with:
## log2(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## The method is: raw.
## Step 4: not doing batch correction.
## Step 4: transforming the data with log2.
## transform_counts: Found 8 values equal to 0, adding 1 to the matrix.
plot_pca(minusminus_batch)$plot
plt <-pp(file=glue::glue("images/pbmc_step4_separate-v{ver}.pdf"), image=plt)
## Writing the image to: images/pbmc_step4_separate-v202102.pdf and calling dev.off().
plt
normalize_expt(hs_cds_inf, convert="cpm", batch="svaseq", filter=TRUE) step4_with_uninf <-
## This function will replace the expt$expressionset slot with:
## svaseq(cpm(cbcb(data)))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 6800 low-count genes (12047 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 176883 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 227 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 3595 entries are 0<x<1: 2%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 75 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Step 4: not transforming the data.
normalize_expt(step4_with_uninf, transform="log2") step4_with_uninf <-
## This function will replace the expt$expressionset slot with:
## log2(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## The method is: raw.
## Step 4: not doing batch correction.
## Step 4: transforming the data with log2.
## transform_counts: Found 75 values equal to 0, adding 1 to the matrix.
plot_pca(step4_with_uninf)$plot
plt <-pp(file=glue::glue("step4_with_uninfected_separate-v{ver}.pdf"), image=plt)
## Writing the image to: step4_with_uninfected_separate-v202102.pdf and calling dev.off().
plt
list("sh_nil" = c("sh", "uninf"),
keepers <-"ch_nil" = c("chr", "uninf"),
"ch_sh" = c("chr", "sh"))
levels(pData(hs_cds_pbmc)$condition)
## [1] "uninf" "chr" "sh"
nrow(pData(hs_cds_pbmc))
## [1] 21
all_pairwise(hs_cds_pbmc, model_batch="svaseq", filter=TRUE) pbmc_de <-
## batch_counts: Before batch/surrogate estimation, 253599 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 325 entries are x==0: 0%.
## The be method chose 3 surrogate variables.
## Attempting svaseq estimation with 3 surrogates.
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (12102 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 248341 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 325 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 5476 entries are 0<x<1: 2%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 92 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Step 4: transforming the data with log2.
## transform_counts: Found 92 values equal to 0, adding 1 to the matrix.
## Not putting labels on the PC plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
combine_de_tables(
pbmc_tables <-keepers=keepers,
pbmc_de, excel=glue::glue("excel/pbmc_de_tables_svaseq-v{ver}-d{rundate}.xlsx"))
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on 1/3: sh_nil which is: sh/uninf.
## Found inverse table with uninf_vs_sh
## Working on 2/3: ch_nil which is: chr/uninf.
## Found inverse table with uninf_vs_chr
## Working on 3/3: ch_sh which is: chr/sh.
## Found inverse table with sh_vs_chr
## Adding venn plots for sh_nil.
## Limma expression coefficients for sh_nil; R^2: 0.954; equation: y = 0.977x + 0.0163
## Deseq expression coefficients for sh_nil; R^2: 0.954; equation: y = 1.01x - 0.119
## Edger expression coefficients for sh_nil; R^2: 0.954; equation: y = 1.01x - 0.116
## Adding venn plots for ch_nil.
## Limma expression coefficients for ch_nil; R^2: 0.955; equation: y = 0.975x + 0.0211
## Deseq expression coefficients for ch_nil; R^2: 0.955; equation: y = 1x - 0.0525
## Edger expression coefficients for ch_nil; R^2: 0.955; equation: y = 1x - 0.0508
## Adding venn plots for ch_sh.
## Limma expression coefficients for ch_sh; R^2: 0.997; equation: y = 0.998x + 0.00414
## Deseq expression coefficients for ch_sh; R^2: 0.997; equation: y = 0.992x + 0.0696
## Edger expression coefficients for ch_sh; R^2: 0.997; equation: y = 0.992x + 0.0516
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/pbmc_de_tables_svaseq-v202102-d20210309.xlsx.
extract_significant_genes(
pbmc_sig <-lfc=0.585,
pbmc_tables, excel=glue::glue("excel/pbmc_sig_tables_svaseq-v{ver}-d{rundate}.xlsx"))
## Printing significant genes to the file: excel/pbmc_sig_tables_svaseq-v202102-d20210309.xlsx
## 1/3: Creating significant table up_limma_sh_nil
## 2/3: Creating significant table up_limma_ch_nil
## The up table ch_sh is empty.
## The down table ch_sh is empty.
## Printing significant genes to the file: excel/pbmc_sig_tables_svaseq-v202102-d20210309.xlsx
## 1/3: Creating significant table up_edger_sh_nil
## 2/3: Creating significant table up_edger_ch_nil
## The up table ch_sh is empty.
## The down table ch_sh is empty.
## Printing significant genes to the file: excel/pbmc_sig_tables_svaseq-v202102-d20210309.xlsx
## 1/3: Creating significant table up_deseq_sh_nil
## 2/3: Creating significant table up_deseq_ch_nil
## The up table ch_sh is empty.
## The down table ch_sh is empty.
## Printing significant genes to the file: excel/pbmc_sig_tables_svaseq-v202102-d20210309.xlsx
## 1/3: Creating significant table up_ebseq_sh_nil
## 2/3: Creating significant table up_ebseq_ch_nil
## The up table ch_sh is empty.
## The down table ch_sh is empty.
## Printing significant genes to the file: excel/pbmc_sig_tables_svaseq-v202102-d20210309.xlsx
## 1/3: Creating significant table up_basic_sh_nil
## 2/3: Creating significant table up_basic_ch_nil
## The up table ch_sh is empty.
## The down table ch_sh is empty.
## Adding significance bar plots.
all_pairwise(hs_cds_pbmc, model_batch=TRUE, filter=TRUE) pbmc_de_batch <-
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Using limma's removeBatchEffect to visualize with(out) batch inclusion.
## Not putting labels on the PC plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
combine_de_tables(
pbmc_batch_tables <-keepers=keepers,
pbmc_de_batch, excel=glue::glue("excel/pbmc_de_tables_batch-v{ver}-d{rundate}.xlsx"))
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on 1/3: sh_nil which is: sh/uninf.
## Found inverse table with uninf_vs_sh
## Working on 2/3: ch_nil which is: chr/uninf.
## Found inverse table with uninf_vs_chr
## Working on 3/3: ch_sh which is: chr/sh.
## Found inverse table with sh_vs_chr
## Adding venn plots for sh_nil.
## Limma expression coefficients for sh_nil; R^2: 0.953; equation: y = 0.973x + 0.034
## Deseq expression coefficients for sh_nil; R^2: 0.953; equation: y = 1.01x - 0.131
## Edger expression coefficients for sh_nil; R^2: 0.953; equation: y = 1.01x - 0.129
## Adding venn plots for ch_nil.
## Limma expression coefficients for ch_nil; R^2: 0.955; equation: y = 0.973x + 0.03
## Deseq expression coefficients for ch_nil; R^2: 0.956; equation: y = 1.01x - 0.0687
## Edger expression coefficients for ch_nil; R^2: 0.955; equation: y = 1x - 0.0645
## Adding venn plots for ch_sh.
## Limma expression coefficients for ch_sh; R^2: 0.997; equation: y = 0.998x + 0.00703
## Deseq expression coefficients for ch_sh; R^2: 0.997; equation: y = 0.992x + 0.073
## Edger expression coefficients for ch_sh; R^2: 0.997; equation: y = 0.992x + 0.0547
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/pbmc_de_tables_batch-v202102-d20210309.xlsx.
extract_significant_genes(
pbmc_batch_sig <-lfc=0.585,
pbmc_batch_tables, excel=glue::glue("excel/pbmc_sig_tables_batch-v{ver}-d{rundate}.xlsx"))
## Printing significant genes to the file: excel/pbmc_sig_tables_batch-v202102-d20210309.xlsx
## 1/3: Creating significant table up_limma_sh_nil
## 2/3: Creating significant table up_limma_ch_nil
## The up table ch_sh is empty.
## The down table ch_sh is empty.
## Printing significant genes to the file: excel/pbmc_sig_tables_batch-v202102-d20210309.xlsx
## 1/3: Creating significant table up_edger_sh_nil
## 2/3: Creating significant table up_edger_ch_nil
## The up table ch_sh is empty.
## The down table ch_sh is empty.
## Printing significant genes to the file: excel/pbmc_sig_tables_batch-v202102-d20210309.xlsx
## 1/3: Creating significant table up_deseq_sh_nil
## 2/3: Creating significant table up_deseq_ch_nil
## The up table ch_sh is empty.
## The down table ch_sh is empty.
## Printing significant genes to the file: excel/pbmc_sig_tables_batch-v202102-d20210309.xlsx
## 1/3: Creating significant table up_ebseq_sh_nil
## 2/3: Creating significant table up_ebseq_ch_nil
## The up table ch_sh is empty.
## The down table ch_sh is empty.
## Printing significant genes to the file: excel/pbmc_sig_tables_batch-v202102-d20210309.xlsx
## 1/3: Creating significant table up_basic_sh_nil
## 2/3: Creating significant table up_basic_ch_nil
## The up table ch_sh is empty.
## The down table ch_sh is empty.
## Adding significance bar plots.
levels(pData(hs_cds_inf)$condition)
## [1] "uninf" "chr" "sh"
nrow(pData(hs_cds_inf))
## [1] 15
all_pairwise(hs_cds_inf, model_batch="svaseq", filter=TRUE) minus_de <-
## batch_counts: Before batch/surrogate estimation, 180330 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 227 entries are x==0: 0%.
## The be method chose 3 surrogate variables.
## Attempting svaseq estimation with 3 surrogates.
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (12047 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 176883 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 227 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 3595 entries are 0<x<1: 2%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 75 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Step 4: transforming the data with log2.
## transform_counts: Found 75 values equal to 0, adding 1 to the matrix.
## Not putting labels on the PC plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
combine_de_tables(
minus_tables <-keepers=keepers,
minus_de, excel=glue::glue("excel/pbmc_de_tables_nouninf_svaseq-v{ver}-d{rundate}.xlsx"))
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on 1/3: sh_nil which is: sh/uninf.
## Found inverse table with uninf_vs_sh
## Working on 2/3: ch_nil which is: chr/uninf.
## Found inverse table with uninf_vs_chr
## Working on 3/3: ch_sh which is: chr/sh.
## Found inverse table with sh_vs_chr
## Adding venn plots for sh_nil.
## Limma expression coefficients for sh_nil; R^2: 0.953; equation: y = 0.978x + 0.00797
## Deseq expression coefficients for sh_nil; R^2: 0.953; equation: y = 1.01x - 0.154
## Edger expression coefficients for sh_nil; R^2: 0.953; equation: y = 1.01x - 0.156
## Adding venn plots for ch_nil.
## Limma expression coefficients for ch_nil; R^2: 0.946; equation: y = 0.972x + 0.0337
## Deseq expression coefficients for ch_nil; R^2: 0.947; equation: y = 1x - 0.0596
## Edger expression coefficients for ch_nil; R^2: 0.947; equation: y = 1x - 0.0605
## Adding venn plots for ch_sh.
## Limma expression coefficients for ch_sh; R^2: 0.993; equation: y = 0.997x + 0.0107
## Deseq expression coefficients for ch_sh; R^2: 0.994; equation: y = 0.992x + 0.0661
## Edger expression coefficients for ch_sh; R^2: 0.994; equation: y = 0.993x + 0.0517
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/pbmc_de_tables_nouninf_svaseq-v202102-d20210309.xlsx.
extract_significant_genes(
minus_sig <-lfc=0.585,
minus_tables, excel=glue::glue("excel/pbmc_sig_tables_nouninf_svaseq-v{ver}-d{rundate}.xlsx"))
## Printing significant genes to the file: excel/pbmc_sig_tables_nouninf_svaseq-v202102-d20210309.xlsx
## 1/3: Creating significant table up_limma_sh_nil
## 2/3: Creating significant table up_limma_ch_nil
## 3/3: Creating significant table up_limma_ch_sh
## Printing significant genes to the file: excel/pbmc_sig_tables_nouninf_svaseq-v202102-d20210309.xlsx
## 1/3: Creating significant table up_edger_sh_nil
## 2/3: Creating significant table up_edger_ch_nil
## 3/3: Creating significant table up_edger_ch_sh
## Printing significant genes to the file: excel/pbmc_sig_tables_nouninf_svaseq-v202102-d20210309.xlsx
## 1/3: Creating significant table up_deseq_sh_nil
## 2/3: Creating significant table up_deseq_ch_nil
## 3/3: Creating significant table up_deseq_ch_sh
## Printing significant genes to the file: excel/pbmc_sig_tables_nouninf_svaseq-v202102-d20210309.xlsx
## 1/3: Creating significant table up_ebseq_sh_nil
## 2/3: Creating significant table up_ebseq_ch_nil
## 3/3: Creating significant table up_ebseq_ch_sh
## Printing significant genes to the file: excel/pbmc_sig_tables_nouninf_svaseq-v202102-d20210309.xlsx
## 1/3: Creating significant table up_basic_sh_nil
## 2/3: Creating significant table up_basic_ch_nil
## The up table ch_sh is empty.
## The down table ch_sh is empty.
## Adding significance bar plots.
all_pairwise(hs_cds_inf, model_batch=TRUE, filter=TRUE) minus_de_batch <-
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Using limma's removeBatchEffect to visualize with(out) batch inclusion.
## Not putting labels on the PC plot.
## Error in e$fun(obj, substitute(ex), parent.frame(), e$data): worker initialization failed: there is no package called ‘hpgltools’
combine_de_tables(
minus_tables_batch <-keepers=keepers,
minus_de_batch, excel=glue::glue("excel/pbmc_de_tables_nouninf_batch-v{ver}-d{rundate}.xlsx"))
## Error in combine_de_tables(minus_de_batch, keepers = keepers, excel = glue::glue("excel/pbmc_de_tables_nouninf_batch-v{ver}-d{rundate}.xlsx")): object 'minus_de_batch' not found
extract_significant_genes(
minus_sig_batch <-lfc=0.585,
minus_tables_batch, excel=glue::glue("excel/pbmc_sig_tables_nouninf_batch-v{ver}-d{rundate}.xlsx"))
## Error in extract_significant_genes(minus_tables_batch, lfc = 0.585, excel = glue::glue("excel/pbmc_sig_tables_nouninf_batch-v{ver}-d{rundate}.xlsx")): object 'minus_tables_batch' not found
compare_de_results(pbmc_tables, minus_tables) similarities <-
## Testing method: limma.
## Adding method: limma to the set.
## Testing method: deseq.
## Adding method: deseq to the set.
## Testing method: edger.
## Adding method: edger to the set.
## Starting method limma, table sh_nil.
## Starting method limma, table ch_nil.
## Starting method limma, table ch_sh.
## Starting method deseq, table sh_nil.
## Starting method deseq, table ch_nil.
## Starting method deseq, table ch_sh.
## Starting method edger, table sh_nil.
## Starting method edger, table ch_nil.
## Starting method edger, table ch_sh.
plot_libsize(hs_cds_pbmc)
pbmc_s1_libsize <-pp(file="pictures/pbmc_s1_libsize.pdf", image=pbmc_s1_libsize$plot)
## Writing the image to: pictures/pbmc_s1_libsize.pdf and calling dev.off().
normalize_expt(hs_cds_pbmc, filter=TRUE, batch="svaseq", transform="log2") pbmc_heat_norm <-
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cbcb(data)))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 6745 low-count genes (12102 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 253599 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 325 entries are x==0: 0%.
## The be method chose 3 surrogate variables.
## Attempting svaseq estimation with 3 surrogates.
## There are 31 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Step 4: transforming the data with log2.
## transform_counts: Found 31 values equal to 0, adding 1 to the matrix.
plot_disheat(pbmc_heat_norm) pbmc_s1_heat <-
pp(file="pictures/pbmc_s1_disheat.pdf", image=pbmc_s1_heat)
## Writing the image to: pictures/pbmc_s1_disheat.pdf and calling dev.off().
Therefore I am reasonably certain the data to use for this operation is from ‘all_sig’. When using upsetr, it expectes a dataframe where columns are the categories (up_sh_vs_nil, down_sh_vs_nil, etc), rows are genes, and each element is the number of times that element is true for the given [gene, category]. In this case, it will be either 0 or 1 for everything.
data.frame(row.names=rownames(exprs(hs_cds_pbmc)))
start <-"up_sh_vs_nil"]] <- 0
start[["down_sh_vs_nil"]] <- 0
start[["up_chr_vs_nil"]] <- 0
start[["down_chr_vs_nil"]] <- 0
start[[ rownames(pbmc_sig[["deseq"]][["ups"]][["sh_nil"]])
idx <-"up_sh_vs_nil"] <- 1
start[idx, rownames(pbmc_sig[["deseq"]][["downs"]][["sh_nil"]])
idx <-"down_sh_vs_nil"] <- 1
start[idx, rownames(pbmc_sig[["deseq"]][["ups"]][["ch_nil"]])
idx <-"up_chr_vs_nil"] <- 1
start[idx, rownames(pbmc_sig[["deseq"]][["downs"]][["ch_nil"]])
idx <-"down_chr_vs_nil"] <- 1
start[idx,
start[["up_sh_vs_nil"]] == 1 & rowSums(start) == 1
unique_sh_up_idx <- start[unique_sh_up_idx, ]
unique_sh_up <-
start[["down_sh_vs_nil"]] == 1 & rowSums(start) == 1
unique_sh_down_idx <- start[unique_sh_down_idx, ]
unique_sh_down <-
simple_gprofiler(sig_genes=rownames(unique_sh_up), species="hsapiens") test_up <-
## Performing gProfiler GO search of 139 genes against hsapiens.
## GO search found 26 hits.
## Performing gProfiler KEGG search of 139 genes against hsapiens.
## KEGG search found 0 hits.
## Performing gProfiler REAC search of 139 genes against hsapiens.
## REAC search found 1 hits.
## Performing gProfiler MI search of 139 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 139 genes against hsapiens.
## TF search found 26 hits.
## Performing gProfiler CORUM search of 139 genes against hsapiens.
## CORUM search found 1 hits.
## Performing gProfiler HP search of 139 genes against hsapiens.
## HP search found 0 hits.
simple_gprofiler(sig_genes=rownames(unique_sh_down), species="hsapiens") test_down <-
## Performing gProfiler GO search of 243 genes against hsapiens.
## GO search found 1 hits.
## Performing gProfiler KEGG search of 243 genes against hsapiens.
## KEGG search found 0 hits.
## Performing gProfiler REAC search of 243 genes against hsapiens.
## REAC search found 1 hits.
## Performing gProfiler MI search of 243 genes against hsapiens.
## MI search found 0 hits.
## Performing gProfiler TF search of 243 genes against hsapiens.
## TF search found 0 hits.
## Performing gProfiler CORUM search of 243 genes against hsapiens.
## CORUM search found 0 hits.
## Performing gProfiler HP search of 243 genes against hsapiens.
## HP search found 2 hits.
Thus, in the following picture we see:
library(UpSetR)
upset(start)
plt <- grDevices::recordPlot()
plt <-pp(file="pictures/upset.pdf", image=plt)
## Writing the image to: pictures/upset.pdf and calling dev.off().
Let us grab the C7 immunology signatures from msigdb, there are a few ways to do this. One is to parse the xml files and another to parse the gmt files.
GSEABase::getGmt("reference/msigdb_v7.2/c7.all.v7.2.entrez.gmt",
broad_c7 <-collectionType=GSEABase::BroadCollection(category="c7"),
geneIdType=GSEABase::EntrezIdentifier())
GSEABase::getGmt("reference/msigdb_v7.2/c8.all.v7.2.entrez.gmt",
broad_c8 <-collectionType=GSEABase::BroadCollection(category="c8"),
geneIdType=GSEABase::EntrezIdentifier())
GSEABase::getGmt("reference/msigdb_v7.2/c2.all.v7.2.entrez.gmt",
broad_c2 <-collectionType=GSEABase::BroadCollection(category="c2"),
geneIdType=GSEABase::EntrezIdentifier())
pData(hs_cds_pbmc)[["label"]]
better_names <- set_expt_samplenames(expt=hs_cds_pbmc, newnames=better_names)
hs_cds_pbmc_new <- normalize_expt(hs_cds_pbmc_new, filter=TRUE, batch="svaseq") hs_cds_gsva_input <-
## This function will replace the expt$expressionset slot with:
## svaseq(cbcb(data))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted. It is often advisable to cpm/rpkm
## the data to normalize for sampling differences, keep in mind though that rpkm
## has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
## will try to detect this).
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 6745 low-count genes (12102 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 253599 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 325 entries are x==0: 0%.
## The be method chose 3 surrogate variables.
## Attempting svaseq estimation with 3 surrogates.
## There are 31 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Step 4: not transforming the data.
normalize_expt(hs_cds_gsva_input, convert="rpkm", column="cds_length") hs_cds_gsva_input <-
## This function will replace the expt$expressionset slot with:
## rpkm(data)
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Filter is false, this should likely be set to something, good
## choices include cbcb, kofa, pofa (anything but FALSE). If you want this to
## stay FALSE, keep in mind that if other normalizations are performed, then the
## resulting libsizes are likely to be strange (potentially negative!)
## Leaving the data in its current base format, keep in mind that
## some metrics are easier to see when the data is log2 transformed, but
## EdgeR/DESeq do not accept transformed data.
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: not doing count filtering.
## Step 2: not normalizing the data.
## Step 3: converting the data with rpkm.
## The method is: raw.
## Step 4: not doing batch correction.
## Step 4: not transforming the data.
simple_gsva(hs_cds_gsva_input) pbmc_gsva <-
## Converting the rownames() of the expressionset to ENTREZID.
## 34 ENSEMBL ID's didn't have a matching ENTEREZ ID. Dropping them now.
## Before conversion, the expressionset has 12102 entries.
## After conversion, the expressionset has 12145 entries.
## Warning in .filterFeatures(expr, method): 295 genes with constant expression
## values throuhgout the samples.
## Mapping identifiers between gene sets and feature names
## Setting parallel calculations through a MulticoreParam back-end
## with workers=8 and tasks=100.
## Estimating ssGSEA scores for 2837 gene sets.
##
|
| | 0%
|
|=== | 5%
|
|======= | 10%
|
|========== | 14%
|
|============= | 19%
|
|================= | 24%
|
|==================== | 29%
|
|======================= | 33%
|
|=========================== | 38%
|
|============================== | 43%
|
|================================= | 48%
|
|===================================== | 52%
|
|======================================== | 57%
|
|=========================================== | 62%
|
|=============================================== | 67%
|
|================================================== | 71%
|
|===================================================== | 76%
|
|========================================================= | 81%
|
|============================================================ | 86%
|
|=============================================================== | 90%
|
|=================================================================== | 95%
|
|======================================================================| 100%
pbmc_gsva[["expt"]]
gsva_expt <- gsva_likelihoods(pbmc_gsva, factor="chr") pbmc_scored <-
## Error in gsva_likelihoods(pbmc_gsva, factor = "chr"): could not find function "gsva_likelihoods"
grepl(x=rownames(gsva_expt$expressionset), pattern="^REACTOME")
reactome_subset <- gsva_expt$expressionset[reactome_subset, ]
reactome_gsva <-pp("images/pbmc_reactome_gsva.svg")
## Going to write the image to: images/pbmc_reactome_gsva.svg when dev.off() is called.
c("#00007F", "blue", "#007FFF", "cyan",
color_range <-"#7FFF7F", "yellow", "#FF7F00", "red", "#7F0000")
grDevices::colorRampPalette(color_range)
jet_colors <- heatmap.3(exprs(reactome_gsva), cexRow=0.1, cexCol=0.5, trace="none", col=jet_colors)
tt <-dev.off()
## png
## 2
A couple of TODOs from our meeting 20210224
simple_gsva(hs_cds_gsva_input, signatures = broad_c7,
testing <-msig_xml = "reference/msigdb_v7.2.xml", cores = 10)
## Converting the rownames() of the expressionset to ENTREZID.
## 34 ENSEMBL ID's didn't have a matching ENTEREZ ID. Dropping them now.
## Before conversion, the expressionset has 12102 entries.
## After conversion, the expressionset has 12145 entries.
## Warning in .filterFeatures(expr, method): 295 genes with constant expression
## values throuhgout the samples.
## Mapping identifiers between gene sets and feature names
## Setting parallel calculations through a MulticoreParam back-end
## with workers=10 and tasks=100.
## Estimating ssGSEA scores for 4872 gene sets.
##
|
| | 0%
|
|=== | 5%
|
|======= | 10%
|
|========== | 14%
|
|============= | 19%
|
|================= | 24%
|
|==================== | 29%
|
|======================= | 33%
|
|=========================== | 38%
|
|============================== | 43%
|
|================================= | 48%
|
|===================================== | 52%
|
|======================================== | 57%
|
|=========================================== | 62%
|
|=============================================== | 67%
|
|================================================== | 71%
|
|===================================================== | 76%
|
|========================================================= | 81%
|
|============================================================ | 86%
|
|=============================================================== | 90%
|
|=================================================================== | 95%
|
|======================================================================| 100%
## Adding annotations from reference/msigdb_v7.2.xml.
Here is an invocation of all of my current analyses with gsva using the C2 signatures.
simple_gsva(hs_cds_gsva_input, signatures=broad_c2) c2_pbmc_gsva <-
## Converting the rownames() of the expressionset to ENTREZID.
## 34 ENSEMBL ID's didn't have a matching ENTEREZ ID. Dropping them now.
## Before conversion, the expressionset has 12102 entries.
## After conversion, the expressionset has 12145 entries.
## Warning in .filterFeatures(expr, method): 295 genes with constant expression
## values throuhgout the samples.
## Mapping identifiers between gene sets and feature names
## Setting parallel calculations through a MulticoreParam back-end
## with workers=8 and tasks=100.
## Estimating ssGSEA scores for 5813 gene sets.
##
|
| | 0%
|
|=== | 5%
|
|======= | 10%
|
|========== | 14%
|
|============= | 19%
|
|================= | 24%
|
|==================== | 29%
|
|======================= | 33%
|
|=========================== | 38%
|
|============================== | 43%
|
|================================= | 48%
|
|===================================== | 52%
|
|======================================== | 57%
|
|=========================================== | 62%
|
|=============================================== | 67%
|
|================================================== | 71%
|
|===================================================== | 76%
|
|========================================================= | 81%
|
|============================================================ | 86%
|
|=============================================================== | 90%
|
|=================================================================== | 95%
|
|======================================================================| 100%
get_sig_gsva_categories(
c2_pbmc_sig <-
c2_pbmc_gsva,excel=glue::glue("excel/pbmc_batch_msig_c2_categories-v{ver}.xlsx"))
## Starting limma pairwise comparison.
## libsize was not specified, this parameter has profound effects on limma's result.
## Using the libsize from expt$libsize.
## Limma step 1/6: choosing model.
## Choosing the non-intercept containing model.
## Assuming this data is similar to a micro array and not performign voom.
## Limma step 3/6: running lmFit with method: ls.
## Limma step 4/6: making and fitting contrasts with no intercept. (~ 0 + factors)
## Limma step 5/6: Running eBayes with robust = FALSE and trend = FALSE.
## Limma step 6/6: Writing limma outputs.
## Limma step 6/6: 1/3: Creating table: sh_vs_chr. Adjust = BH
## Limma step 6/6: 2/3: Creating table: uninf_vs_chr. Adjust = BH
## Limma step 6/6: 3/3: Creating table: uninf_vs_sh. Adjust = BH
## Limma step 6/6: 1/3: Creating table: chr. Adjust = BH
## Limma step 6/6: 2/3: Creating table: sh. Adjust = BH
## Limma step 6/6: 3/3: Creating table: uninf. Adjust = BH
## The factor uninf has 3 rows.
## The factor chr has 9 rows.
## The factor sh has 9 rows.
## Testing each factor against the others.
## Scoring uninf against everything else.
## Scoring chr against everything else.
## Scoring sh against everything else.
## Deleting the file excel/pbmc_batch_msig_c2_categories-v202102.xlsx before writing the tables.
pp(file="images/pbmc_c2_gsva_scores.svg")
## Going to write the image to: images/pbmc_c2_gsva_scores.svg when dev.off() is called.
$raw_plot
c2_pbmc_sigdev.off()
## png
## 2
pp(file="images/pbmc_c2_likelihoods.svg")
## Going to write the image to: images/pbmc_c2_likelihoods.svg when dev.off() is called.
$likelihood_plot c2_pbmc_sig
## NULL
dev.off()
## png
## 2
pp(file="images/pbmc_c2_subset.svg")
## Going to write the image to: images/pbmc_c2_subset.svg when dev.off() is called.
$subset_plot
c2_pbmc_sigdev.off()
## png
## 2
Ibid, but this time with C7.
simple_gsva(hs_cds_gsva_input, signatures=broad_c7,
c7_pbmc_gsva <-msig_xml = "reference/msigdb_v7.2.xml", cores = 10)
## Converting the rownames() of the expressionset to ENTREZID.
## 34 ENSEMBL ID's didn't have a matching ENTEREZ ID. Dropping them now.
## Before conversion, the expressionset has 12102 entries.
## After conversion, the expressionset has 12145 entries.
## Warning in .filterFeatures(expr, method): 295 genes with constant expression
## values throuhgout the samples.
## Mapping identifiers between gene sets and feature names
## Setting parallel calculations through a MulticoreParam back-end
## with workers=10 and tasks=100.
## Estimating ssGSEA scores for 4872 gene sets.
##
|
| | 0%
|
|=== | 5%
|
|======= | 10%
|
|========== | 14%
|
|============= | 19%
|
|================= | 24%
|
|==================== | 29%
|
|======================= | 33%
|
|=========================== | 38%
|
|============================== | 43%
|
|================================= | 48%
|
|===================================== | 52%
|
|======================================== | 57%
|
|=========================================== | 62%
|
|=============================================== | 67%
|
|================================================== | 71%
|
|===================================================== | 76%
|
|========================================================= | 81%
|
|============================================================ | 86%
|
|=============================================================== | 90%
|
|=================================================================== | 95%
|
|======================================================================| 100%
## Adding annotations from reference/msigdb_v7.2.xml.
get_sig_gsva_categories(
c7_pbmc_sig <-
c7_pbmc_gsva,excel=glue::glue("excel/pbmc_msig_c7_categories-v{ver}.xlsx"))
## Starting limma pairwise comparison.
## libsize was not specified, this parameter has profound effects on limma's result.
## Using the libsize from expt$libsize.
## Limma step 1/6: choosing model.
## Choosing the non-intercept containing model.
## Assuming this data is similar to a micro array and not performign voom.
## Limma step 3/6: running lmFit with method: ls.
## Limma step 4/6: making and fitting contrasts with no intercept. (~ 0 + factors)
## Limma step 5/6: Running eBayes with robust = FALSE and trend = FALSE.
## Limma step 6/6: Writing limma outputs.
## Limma step 6/6: 1/3: Creating table: sh_vs_chr. Adjust = BH
## Limma step 6/6: 2/3: Creating table: uninf_vs_chr. Adjust = BH
## Limma step 6/6: 3/3: Creating table: uninf_vs_sh. Adjust = BH
## Limma step 6/6: 1/3: Creating table: chr. Adjust = BH
## Limma step 6/6: 2/3: Creating table: sh. Adjust = BH
## Limma step 6/6: 3/3: Creating table: uninf. Adjust = BH
## The factor uninf has 3 rows.
## The factor chr has 9 rows.
## The factor sh has 9 rows.
## Testing each factor against the others.
## Scoring uninf against everything else.
## Scoring chr against everything else.
## Scoring sh against everything else.
## Deleting the file excel/pbmc_msig_c7_categories-v202102.xlsx before writing the tables.
pp(file="images/pbmc_c7_gsva_scores.svg")
## Going to write the image to: images/pbmc_c7_gsva_scores.svg when dev.off() is called.
$raw_plot
c7_pbmc_sigdev.off()
## png
## 2
pp(file="images/pbmc_c7_likelihoods.svg")
## Going to write the image to: images/pbmc_c7_likelihoods.svg when dev.off() is called.
$likelihood_plot c7_pbmc_sig
## NULL
dev.off()
## png
## 2
pp(file="images/pbmc_c7_subset.svg")
## Going to write the image to: images/pbmc_c7_subset.svg when dev.off() is called.
$subset_plot
c7_pbmc_sigdev.off()
## png
## 2
pbmc_sig[["deseq"]][["ups"]][["ch_nil"]]
ups <- goseq_msigdb(sig_genes=ups, signatures=broad_c7,
chr_uninf_msig_goseq <-signature_category="c7",
length_db=hs_length,
excel=glue::glue("excel/pbmc_chr_uninf_up_msig_goseq-v{ver}.xlsx"))
## Starting to coerce the msig data to the ontology format.
## Finished coercing the msig data.
## Error in requireNamespace(orgdb): object 'orgdb' not found
pp(file="images/pbmc_chronic_vs_uninfected_msig_goseq.png", width=18)
## Going to write the image to: images/pbmc_chronic_vs_uninfected_msig_goseq.png when dev.off() is called.
$pvalue_plots$mfp_plot_over chr_uninf_msig_goseq
## Error in eval(expr, envir, enclos): object 'chr_uninf_msig_goseq' not found
dev.off()
## png
## 2
pbmc_sig[["deseq"]][["downs"]][["ch_nil"]]
downs <- goseq_msigdb(sig_genes=downs, signatures=broad_c7,
chr_uninf_down_msig_goseq <-signature_category="c7",
length_db=hs_length,
excel=glue::glue("excel/pbmc_chr_uninf_down_msig_goseq-v{ver}.xlsx"))
## Starting to coerce the msig data to the ontology format.
## Finished coercing the msig data.
## Error in convert_ids(rownames(sig_genes), from = current_id, to = required_id, : object 'orgdb' not found
pp(file="images/pbmc_chronic_vs_uninfected_msig_goseq_down.png", width=18)
## Going to write the image to: images/pbmc_chronic_vs_uninfected_msig_goseq_down.png when dev.off() is called.
$pvalue_plots$mfp_plot_over chr_uninf_down_msig_goseq
## Error in eval(expr, envir, enclos): object 'chr_uninf_down_msig_goseq' not found
dev.off()
## png
## 2
pbmc_sig[["deseq"]][["ups"]][["sh_nil"]]
ups <- goseq_msigdb(sig_genes=ups, signatures=broad_c7,
sh_uninf_msig_goseq <-signature_category="c7",
length_db=hs_length,
excel=glue::glue("excel/pbmc_sh_uninf_up_msig_goseq-v{ver}.xlsx"))
## Starting to coerce the msig data to the ontology format.
## Finished coercing the msig data.
## Error in convert_ids(rownames(sig_genes), from = current_id, to = required_id, : object 'orgdb' not found
pp(file="images/pbmc_selfhealing_vs_uninfected_msig_goseq.png", width=18)
## Going to write the image to: images/pbmc_selfhealing_vs_uninfected_msig_goseq.png when dev.off() is called.
$pvalue_plots$mfp_plot_over sh_uninf_msig_goseq
## Error in eval(expr, envir, enclos): object 'sh_uninf_msig_goseq' not found
dev.off()
## png
## 2
pbmc_sig[["deseq"]][["downs"]][["sh_nil"]]
downs <- goseq_msigdb(sig_genes=downs, signatures=broad_c7,
sh_uninf_down_msig_goseq <-signature_category="c7",
length_db=hs_length,
excel=glue::glue("excel/pbmc_sh_uninf_down_msig_goseq-v{ver}.xlsx"))
## Starting to coerce the msig data to the ontology format.
## Finished coercing the msig data.
## Error in convert_ids(rownames(sig_genes), from = current_id, to = required_id, : object 'orgdb' not found
pp(file="images/pbmc_selfhealing_vs_uninfected_msig_goseq_down.png", width=18)
## Going to write the image to: images/pbmc_selfhealing_vs_uninfected_msig_goseq_down.png when dev.off() is called.
$pvalue_plots$mfp_plot_over sh_uninf_down_msig_goseq
## Error in eval(expr, envir, enclos): object 'sh_uninf_down_msig_goseq' not found
dev.off()
## png
## 2
simple_gsva(hs_cds_gsva_input,
c8_pbmc_gsva <-signatures="reference/msigdb_v7.2/c8.all.v7.2.entrez.gmt",
signature_category="c8")
## Converting the rownames() of the expressionset to ENTREZID.
## 34 ENSEMBL ID's didn't have a matching ENTEREZ ID. Dropping them now.
## Before conversion, the expressionset has 12102 entries.
## After conversion, the expressionset has 12145 entries.
## Warning in .filterFeatures(expr, method): 295 genes with constant expression
## values throuhgout the samples.
## Mapping identifiers between gene sets and feature names
## Setting parallel calculations through a MulticoreParam back-end
## with workers=8 and tasks=100.
## Estimating ssGSEA scores for 282 gene sets.
##
|
| | 0%
|
|=== | 5%
|
|======= | 10%
|
|========== | 14%
|
|============= | 19%
|
|================= | 24%
|
|==================== | 29%
|
|======================= | 33%
|
|=========================== | 38%
|
|============================== | 43%
|
|================================= | 48%
|
|===================================== | 52%
|
|======================================== | 57%
|
|=========================================== | 62%
|
|=============================================== | 67%
|
|================================================== | 71%
|
|===================================================== | 76%
|
|========================================================= | 81%
|
|============================================================ | 86%
|
|=============================================================== | 90%
|
|=================================================================== | 95%
|
|======================================================================| 100%
get_sig_gsva_categories(
c8_pbmc_sig <-
c8_pbmc_gsva,excel=glue::glue("excel/pbmc_batch_c8_categories-v{ver}.xlsx"))
## Starting limma pairwise comparison.
## libsize was not specified, this parameter has profound effects on limma's result.
## Using the libsize from expt$libsize.
## Limma step 1/6: choosing model.
## Choosing the non-intercept containing model.
## Assuming this data is similar to a micro array and not performign voom.
## Limma step 3/6: running lmFit with method: ls.
## Limma step 4/6: making and fitting contrasts with no intercept. (~ 0 + factors)
## Limma step 5/6: Running eBayes with robust = FALSE and trend = FALSE.
## Limma step 6/6: Writing limma outputs.
## Limma step 6/6: 1/3: Creating table: sh_vs_chr. Adjust = BH
## Limma step 6/6: 2/3: Creating table: uninf_vs_chr. Adjust = BH
## Limma step 6/6: 3/3: Creating table: uninf_vs_sh. Adjust = BH
## Limma step 6/6: 1/3: Creating table: chr. Adjust = BH
## Limma step 6/6: 2/3: Creating table: sh. Adjust = BH
## Limma step 6/6: 3/3: Creating table: uninf. Adjust = BH
## The factor uninf has 3 rows.
## The factor chr has 9 rows.
## The factor sh has 9 rows.
## Testing each factor against the others.
## Scoring uninf against everything else.
## Scoring chr against everything else.
## Scoring sh against everything else.
pp(file="images/pbmc_c8_gsva_scores.svg")
## Going to write the image to: images/pbmc_c8_gsva_scores.svg when dev.off() is called.
$raw_plot
c8_pbmc_sigdev.off()
## png
## 2
pp(file="images/pbmc_c8_likelihoods.svg")
## Going to write the image to: images/pbmc_c8_likelihoods.svg when dev.off() is called.
$likelihood_plot c8_pbmc_sig
## NULL
dev.off()
## png
## 2
pp(file="images/pbmc_c8_subset.svg")
## Going to write the image to: images/pbmc_c8_subset.svg when dev.off() is called.
$subset_plot
c8_pbmc_sigdev.off()
## png
## 2
This is a matrix of expected gene abundances as rows and cell types as columns. For the moment I am just going to use the example file: “mixture_NSCLCbulk_Fig3g.txt”.
I think this is the exprs() data with hgnc IDs as rows.
exprs(hs_cds_norm) start <-
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'exprs': object 'hs_cds_norm' not found
fData(hs_cds_norm)[, c("ensembl_gene_id", "hgnc_symbol")] xref <-
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'fData': object 'hs_cds_norm' not found
merge(xref, start, by.x="ensembl_gene_id", by.y="row.names") new <-
## Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'merge': object 'xref' not found
rownames(new) <- make.names(new[["hgnc_symbol"]], unique=TRUE)
## Error in new[["hgnc_symbol"]]: object of type 'closure' is not subsettable
"hgnc_symbol"]] <- NULL new[[
## Error in new[["hgnc_symbol"]] <- NULL: object of type 'closure' is not subsettable
"ensembl_gene_id"]] <- NULL new[[
## Error in new[["ensembl_gene_id"]] <- NULL: object of type 'closure' is not subsettable
::write_tsv(x=new, file="cibersort_mixture_file.txt") readr
## Error in write_delim(x, file, delim = "\t", na = na, append = append, : is.data.frame(x) is not TRUE
This makes 0 sense.
Maria Adelaida would like also to consider how the resolution is related to the different donors. Recall therefore that two donors are quite similar, and one is rather different. Thus, a differential expression analysis across donors might prove helpful.
set_expt_conditions(hs_cds_all, fact="donor") donor_cds_all <-
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'pData': object 'hs_cds_all' not found
set_expt_conditions(hs_cds_inf, fact="donor")
donor_cds_minus <- normalize_expt(donor_cds_all, filter=TRUE, transform="log2",
donor_cds_norm <-convert="cpm", norm="quant")
## Error in normalize_expt(donor_cds_all, filter = TRUE, transform = "log2", : object 'donor_cds_all' not found
plot_pca(donor_cds_norm)$plot
## Error in plot_pca(donor_cds_norm): object 'donor_cds_norm' not found
normalize_expt(donor_cds_all, filter=TRUE, transform="log2",
donor_cds_nb <-convert="cpm", batch="svaseq")
## Error in normalize_expt(donor_cds_all, filter = TRUE, transform = "log2", : object 'donor_cds_all' not found
plot_pca(donor_cds_nb)$plot
## Error in plot_pca(donor_cds_nb): object 'donor_cds_nb' not found
all_pairwise(donor_cds_all, filter=TRUE, model_batch="svaseq") donor_all_de <-
## Error in normalize_expt(input, filter = filter): object 'donor_cds_all' not found
all_pairwise(donor_cds_minus, filter=TRUE, model_batch="svaseq") donor_minus_de <-
## batch_counts: Before batch/surrogate estimation, 180330 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 227 entries are x==0: 0%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (12047 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 176883 entries are x>1: 98%.
## batch_counts: Before batch/surrogate estimation, 227 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 3595 entries are 0<x<1: 2%.
## The be method chose 2 surrogate variables.
## Attempting svaseq estimation with 2 surrogates.
## There are 108 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Step 4: transforming the data with log2.
## transform_counts: Found 108 values equal to 0, adding 1 to the matrix.
## Not putting labels on the PC plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
combine_de_tables(donor_all_de,
donor_all_tables <-excel="excel/all_donor_tables.xlsx")
## Deleting the file excel/all_donor_tables.xlsx before writing the tables.
## Error in combine_de_tables(donor_all_de, excel = "excel/all_donor_tables.xlsx"): object 'donor_all_de' not found
combine_de_tables(donor_minus_de,
donor_minus_tables <-excel="excel/minus_donor_tables.xlsx")
## Deleting the file excel/minus_donor_tables.xlsx before writing the tables.
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on table 1/3: d108_vs_d107
## Working on table 2/3: d110_vs_d107
## Working on table 3/3: d110_vs_d108
## Adding venn plots for d108_vs_d107.
## Limma expression coefficients for d108_vs_d107; R^2: 0.977; equation: y = 0.985x + 0.0671
## Deseq expression coefficients for d108_vs_d107; R^2: 0.977; equation: y = 1x + 0.00232
## Edger expression coefficients for d108_vs_d107; R^2: 0.978; equation: y = 1x - 0.00867
## Adding venn plots for d110_vs_d107.
## Limma expression coefficients for d110_vs_d107; R^2: 0.958; equation: y = 0.974x + 0.145
## Deseq expression coefficients for d110_vs_d107; R^2: 0.958; equation: y = 0.961x + 0.321
## Edger expression coefficients for d110_vs_d107; R^2: 0.958; equation: y = 0.961x + 0.298
## Adding venn plots for d110_vs_d108.
## Limma expression coefficients for d110_vs_d108; R^2: 0.966; equation: y = 0.977x + 0.152
## Deseq expression coefficients for d110_vs_d108; R^2: 0.966; equation: y = 0.953x + 0.406
## Edger expression coefficients for d110_vs_d108; R^2: 0.966; equation: y = 0.954x + 0.38
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/minus_donor_tables.xlsx.
Construct figure 4, this should include the following panels:
pp(file=glue::glue("images/figure_4a-v{ver}.pdf"))
## Going to write the image to: images/figure_4a-v202102.pdf when dev.off() is called.
$raw_libsize
hs_inf_writedev.off()
## png
## 2
pp(file=glue::glue("images/figure_4b-v{ver}.pdf"))
## Going to write the image to: images/figure_4b-v202102.pdf when dev.off() is called.
$raw_scaled_pca
hs_inf_writedev.off()
## png
## 2
pp(file=glue::glue("images/figure_4c-v{ver}.pdf"))
## Going to write the image to: images/figure_4c-v202102.pdf when dev.off() is called.
$norm_pca
hs_inf_writedev.off()
## png
## 2
list("sh_nil" = c("sh", "uninf"),
keepers <-"ch_nil" = c("chr", "uninf"),
"ch_sh" = c("chr", "sh"))
sm(all_pairwise(hs_cds_inf,
hs_pairwise_nobatch <-model_batch=FALSE))
sm(all_pairwise(hs_cds_inf, model_batch=TRUE)) hs_pairwise_batch <-
sm(all_pairwise(hs_cds_inf, model_batch="svaseq", filter=TRUE)) hs_pairwise_sva <-
combine_de_tables(
hs_nobatch_tables <-
hs_pairwise_nobatch,keepers=keepers,
excel=glue::glue("excel/hs_infect_nobatch-v{ver}.xlsx"))
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on 1/3: sh_nil which is: sh/uninf.
## Found inverse table with uninf_vs_sh
## Working on 2/3: ch_nil which is: chr/uninf.
## Found inverse table with uninf_vs_chr
## Working on 3/3: ch_sh which is: chr/sh.
## Found inverse table with sh_vs_chr
## Adding venn plots for sh_nil.
## Limma expression coefficients for sh_nil; R^2: 0.988; equation: y = 0.989x - 0.0731
## Deseq expression coefficients for sh_nil; R^2: 0.975; equation: y = 0.996x + 0.0234
## Edger expression coefficients for sh_nil; R^2: 0.985; equation: y = 1x - 0.0334
## Adding venn plots for ch_nil.
## Limma expression coefficients for ch_nil; R^2: 0.988; equation: y = 0.988x - 0.0712
## Deseq expression coefficients for ch_nil; R^2: 0.973; equation: y = 0.986x + 0.0955
## Edger expression coefficients for ch_nil; R^2: 0.984; equation: y = 0.999x - 0.0188
## Adding venn plots for ch_sh.
## Limma expression coefficients for ch_sh; R^2: 0.997; equation: y = 0.998x + 0.00331
## Deseq expression coefficients for ch_sh; R^2: 0.995; equation: y = 0.991x + 0.0711
## Edger expression coefficients for ch_sh; R^2: 0.998; equation: y = 0.999x + 0.00968
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/hs_infect_nobatch-v202102.xlsx.
combine_de_tables(
hs_batch_tables <-
hs_pairwise_batch,keepers=keepers,
excel=glue::glue("excel/hs_infect_batch-v{ver}.xlsx"))
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on 1/3: sh_nil which is: sh/uninf.
## Found inverse table with uninf_vs_sh
## Working on 2/3: ch_nil which is: chr/uninf.
## Found inverse table with uninf_vs_chr
## Working on 3/3: ch_sh which is: chr/sh.
## Found inverse table with sh_vs_chr
## Adding venn plots for sh_nil.
## Limma expression coefficients for sh_nil; R^2: 0.988; equation: y = 0.989x - 0.0688
## Deseq expression coefficients for sh_nil; R^2: 0.976; equation: y = 0.996x + 0.00961
## Edger expression coefficients for sh_nil; R^2: 0.985; equation: y = 1x - 0.0431
## Adding venn plots for ch_nil.
## Limma expression coefficients for ch_nil; R^2: 0.988; equation: y = 0.988x - 0.0667
## Deseq expression coefficients for ch_nil; R^2: 0.974; equation: y = 0.985x + 0.0924
## Edger expression coefficients for ch_nil; R^2: 0.985; equation: y = 0.998x - 0.0206
## Adding venn plots for ch_sh.
## Limma expression coefficients for ch_sh; R^2: 0.997; equation: y = 0.998x + 0.00349
## Deseq expression coefficients for ch_sh; R^2: 0.995; equation: y = 0.99x + 0.0769
## Edger expression coefficients for ch_sh; R^2: 0.998; equation: y = 0.998x + 0.013
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/hs_infect_batch-v202102.xlsx.
combine_de_tables(
hs_sva_tables <-
hs_pairwise_sva,keepers=keepers,
excel=glue::glue("excel/hs_infect_sva-v{ver}.xlsx"))
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on 1/3: sh_nil which is: sh/uninf.
## Found inverse table with uninf_vs_sh
## Working on 2/3: ch_nil which is: chr/uninf.
## Found inverse table with uninf_vs_chr
## Working on 3/3: ch_sh which is: chr/sh.
## Found inverse table with sh_vs_chr
## Adding venn plots for sh_nil.
## Limma expression coefficients for sh_nil; R^2: 0.953; equation: y = 0.978x + 0.00797
## Deseq expression coefficients for sh_nil; R^2: 0.953; equation: y = 1.01x - 0.154
## Edger expression coefficients for sh_nil; R^2: 0.953; equation: y = 1.01x - 0.156
## Adding venn plots for ch_nil.
## Limma expression coefficients for ch_nil; R^2: 0.946; equation: y = 0.972x + 0.0337
## Deseq expression coefficients for ch_nil; R^2: 0.947; equation: y = 1x - 0.0596
## Edger expression coefficients for ch_nil; R^2: 0.947; equation: y = 1x - 0.0605
## Adding venn plots for ch_sh.
## Limma expression coefficients for ch_sh; R^2: 0.993; equation: y = 0.997x + 0.0107
## Deseq expression coefficients for ch_sh; R^2: 0.994; equation: y = 0.992x + 0.0661
## Edger expression coefficients for ch_sh; R^2: 0.994; equation: y = 0.993x + 0.0517
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/hs_infect_sva-v202102.xlsx.
The following subset operation extract the samples used for the macrophage experiment. The next three lines then change the colors from the defaults.
c("#009900", "#990000", "#000099")
new_colors <-names(new_colors) <- c("uninf", "chr", "sh")
subset_expt(hs_all, subset = "") hs_macr <-
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'sampleNames': object 'hs_all' not found
set_expt_colors(hs_macr, colors=new_colors) hs_macr <-
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'pData': object 'hs_macr' not found
as.character(pData(hs_macr)[["label"]]) labels <-
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'pData': object 'hs_macr' not found
set_expt_samplenames(hs_macr, labels) hs_macr <-
## Error in set_expt_samplenames(hs_macr, labels): object 'hs_macr' not found
set_expt_colors(hs_cds_macr, colors=new_colors) hs_cds_macr <-
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'pData': object 'hs_cds_macr' not found
set_expt_samplenames(hs_cds_macr, labels) hs_cds_macr <-
## Error in set_expt_samplenames(hs_cds_macr, labels): object 'hs_cds_macr' not found
In our meeting this morning (20190708), Najib and Maria Adelaida asked for how the data looks depending on batch methodology.
sm(normalize_expt(hs_cds_macr, filter=TRUE, convert="cpm",
start <-norm="quant", transform="log2"))
## Error in normalize_expt(hs_cds_macr, filter = TRUE, convert = "cpm", norm = "quant", : object 'hs_cds_macr' not found
pp(file="cpm_quant_filter_pca.png", image=plot_pca(start)$plot)
## Error in `rownames<-`(`*tmp*`, value = avail_samples): attempt to set 'rownames' on an object with no dimensions
sm(normalize_expt(hs_cds_macr, filter=TRUE,
limma_batch <-convert="cpm", norm="quant",
transform="log2", batch="limma"))
## Error in normalize_expt(hs_cds_macr, filter = TRUE, convert = "cpm", norm = "quant", : object 'hs_cds_macr' not found
pp(file="lcqf_limma.png", image=plot_pca(limma_batch)$plot)
## Error in plot_pca(limma_batch): object 'limma_batch' not found
sm(normalize_expt(hs_cds_macr, filter=TRUE,
pca_batch <-convert="cpm", norm="quant",
transform="log2", batch="pca"))
## Error in normalize_expt(hs_cds_macr, filter = TRUE, convert = "cpm", norm = "quant", : object 'hs_cds_macr' not found
pp(file="lcqf_pca.png", image=plot_pca(pca_batch)$plot)
## Error in plot_pca(pca_batch): object 'pca_batch' not found
sm(normalize_expt(hs_cds_macr, filter=TRUE,
svaseq_batch <-
convert="cpm",
transform="log2", batch="svaseq"))
## Error in normalize_expt(hs_cds_macr, filter = TRUE, convert = "cpm", transform = "log2", : object 'hs_cds_macr' not found
pp(file="lcqf_svaseq.png", image=plot_pca(svaseq_batch)$plot)
## Error in plot_pca(svaseq_batch): object 'svaseq_batch' not found
sm(normalize_expt(hs_cds_macr, filter=TRUE,
sva_batch <-convert="cpm",
transform="log2", batch="fsva"))
## Error in normalize_expt(hs_cds_macr, filter = TRUE, convert = "cpm", transform = "log2", : object 'hs_cds_macr' not found
pp(file="lcqf_sva.png", image=plot_pca(sva_batch)$plot)
## Error in plot_pca(sva_batch): object 'sva_batch' not found
sm(normalize_expt(hs_cds_macr, filter=TRUE,
combat_batch <-convert="cpm",
transform="log2", batch="combat"))
## Error in normalize_expt(hs_cds_macr, filter = TRUE, convert = "cpm", transform = "log2", : object 'hs_cds_macr' not found
pp(file="lcqf_combat.png", image=plot_pca(combat_batch)$plot)
## Error in plot_pca(combat_batch): object 'combat_batch' not found
sm(normalize_expt(hs_cds_macr, filter=TRUE,
combatnp_batch <-convert="cpm",
transform="log2", batch="combat_noprior"))
## Error in normalize_expt(hs_cds_macr, filter = TRUE, convert = "cpm", transform = "log2", : object 'hs_cds_macr' not found
pp(file="lcqf_combatnp.png", image=plot_pca(combatnp_batch)$plot)
## Error in plot_pca(combatnp_batch): object 'combatnp_batch' not found
Figure S1 should include nice versions of the sample metrics. The normalization chosen is batch-in-model.
First, however, we will make some plots of the raw data.
Sample names are going to be ‘infectionstate_strainnumber’ : chr_7721
sm(write_expt(
fig_s1 <-norm="raw", violin=FALSE, convert="cpm",
hs_cds_macr, transform="log2", batch="svaseq", filter=TRUE,
excel=paste0("excel/figure_s1_sample_estimation-v", ver, ".xlsx")))
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'exprs': object 'hs_cds_macr' not found
pp(file="fig_s1a_libsizes.tif", image=fig_s1$raw_libsize)
## Error in pp(file = "fig_s1a_libsizes.tif", image = fig_s1$raw_libsize): object 'fig_s1' not found
pp(file="fig_s1b_heatmap.tif", image=fig_s1$norm_disheat)
## Error in pp(file = "fig_s1b_heatmap.tif", image = fig_s1$norm_disheat): object 'fig_s1' not found
pp(file="fig_s1c_raw_pca.tif", image=fig_s1$raw_scaled_pca)
## Error in pp(file = "fig_s1c_raw_pca.tif", image = fig_s1$raw_scaled_pca): object 'fig_s1' not found
pp(file="fig_1a_norm_pca.tif", image=fig_s1$norm_pca)
## Error in pp(file = "fig_1a_norm_pca.tif", image = fig_s1$norm_pca): object 'fig_s1' not found
The most likely batch method from the above is svaseq. Let us perform differential expression analyses with it along with a few other methods.
list(
hs_contrasts <-"macro_chr-sh" = c("chr","sh"),
"macro_chr-nil" = c("chr","uninf"),
"macro_sh-nil" = c("sh", "uninf"))
## Set up the data used in the comparative contrast sets.
Print a reminder of what we can expect when doing this with no batch information.
sm(normalize_expt(hs_cds_macr, filter=TRUE)) hs_macr_lowfilt <-
## Error in normalize_expt(hs_cds_macr, filter = TRUE): object 'hs_cds_macr' not found
sm(plot_pca(hs_cds_macr, transform="log2")) hs_lowfilt_pca <-
## Error in normalize_expt(data, transform = arglist[["transform"]], convert = arglist[["convert"]], : object 'hs_cds_macr' not found
$plot hs_lowfilt_pca
## Error in eval(expr, envir, enclos): object 'hs_lowfilt_pca' not found
all_pairwise(input=hs_cds_macr, model_batch=FALSE,
hs_macr_nobatch <-limma_method="robust")
## Error in normalize_expt(input, filter = TRUE, batch = FALSE, transform = "log2", : object 'hs_cds_macr' not found
## wow, all tools including basic agree almost completely
hs_macr_nobatch$basic$medians medians_by_condition <-
## Error in eval(expr, envir, enclos): object 'hs_macr_nobatch' not found
glue::glue("excel/{rundate}_hs_macr_nobatch_contr-v{ver}.xlsx")
excel_file <- sm(combine_de_tables(hs_macr_nobatch,
hs_macr_nobatch_tables <-excel=excel_file,
keepers=hs_contrasts,
extra_annot=medians_by_condition))
## Error in combine_de_tables(hs_macr_nobatch, excel = excel_file, keepers = hs_contrasts, : object 'hs_macr_nobatch' not found
glue::glue("excel/{rundate}_hs_macr_nobatch_sig-v{ver}.xlsx")
excel_file <- sm(extract_significant_genes(hs_macr_nobatch_tables, lfc=0.585,
hs_macr_nobatch_sig <-excel=excel_file,
according_to="all"))
## Error in extract_significant_genes(hs_macr_nobatch_tables, lfc = 0.585, : object 'hs_macr_nobatch_tables' not found
sm(plot_pca(hs_cds_macr, transform="log2",
hs_lowfilt_batch_pca <-convert="cpm", batch="limma", filter=TRUE))
## Error in normalize_expt(data, transform = arglist[["transform"]], convert = arglist[["convert"]], : object 'hs_cds_macr' not found
$plot hs_lowfilt_batch_pca
## Error in eval(expr, envir, enclos): object 'hs_lowfilt_batch_pca' not found
In this attempt, we add a batch factor in the experimental model and see how it does.
## Here just let all_pairwise run on filtered data and do its normal ~ 0 + condition + batch analyses
all_pairwise(input=hs_cds_macr, batch=TRUE, limma_method="robust") hs_macr_batch <-
## Error in normalize_expt(input, filter = TRUE, batch = FALSE, transform = "log2", : object 'hs_cds_macr' not found
hs_macr_batch$basic$medians medians_by_condition <-
## Error in eval(expr, envir, enclos): object 'hs_macr_batch' not found
glue::glue("excel/{rundate}_hs_macr_batchmodel_contr-v{ver}.xlsx")
excel_file <- combine_de_tables(
hs_macr_batch_tables <-
hs_macr_batch,keepers=hs_contrasts,
extra_annot=medians_by_condition,
excel=excel_file)
## Error in combine_de_tables(hs_macr_batch, keepers = hs_contrasts, extra_annot = medians_by_condition, : object 'hs_macr_batch' not found
glue::glue("excel/{rundate}_hs_macr_batchmodel_sig-v{ver}.xlsx")
excel_file <- extract_significant_genes(
hs_macr_batch_sig <-excel=excel_file, lfc=0.585,
hs_macr_batch_tables, according_to="deseq")
## Error in extract_significant_genes(hs_macr_batch_tables, excel = excel_file, : object 'hs_macr_batch_tables' not found
glue::glue("excel/{rundate}_hs_macr_batchmodel_abund-v{ver}.xlsx")
excel_file <- sm(extract_abundant_genes(
hs_macr_batch_abun <-excel=excel_file,
hs_macr_batch_tables, according_to="deseq"))
## Error in extract_abundant_genes(hs_macr_batch_tables, excel = excel_file, : object 'hs_macr_batch_tables' not found
sm(plot_pca(hs_cds_macr, transform="log2",
hs_lowfilt_sva_pca <-convert="cpm", batch="svaseq", filter=TRUE))
## Error in normalize_expt(data, transform = arglist[["transform"]], convert = arglist[["convert"]], : object 'hs_cds_macr' not found
$plot hs_lowfilt_sva_pca
## Error in eval(expr, envir, enclos): object 'hs_lowfilt_sva_pca' not found
In this attempt, we tell all pairwise to invoke svaseq.
all_pairwise(input=hs_cds_macr, model_batch="svaseq", filter=TRUE, limma_method="robust") hs_macr_sva <-
## Error in normalize_expt(input, filter = filter): object 'hs_cds_macr' not found
hs_macr_sva$basic$medians medians_by_condition <-
## Error in eval(expr, envir, enclos): object 'hs_macr_sva' not found
glue::glue("excel/{rundate}_hs_macr_svamodel_contr-v{ver}.xlsx")
excel_file <- combine_de_tables(
hs_macr_sva_tables <-
hs_macr_sva,keepers=hs_contrasts,
extra_annot=medians_by_condition,
excel=excel_file)
## Error in combine_de_tables(hs_macr_sva, keepers = hs_contrasts, extra_annot = medians_by_condition, : object 'hs_macr_sva' not found
glue::glue("excel/{rundate}_hs_macr_svamodel_sig-v{ver}.xlsx")
excel_file <- extract_significant_genes(
hs_macr_sva_sig <-excel=excel_file,
hs_macr_sva_tables, according_to="deseq")
## Error in extract_significant_genes(hs_macr_sva_tables, excel = excel_file, : object 'hs_macr_sva_tables' not found
glue::glue("excel/{rundate}_hs_macr_svamodel_abund-v{ver}.xlsx")
excel_file <- sm(extract_abundant_genes(
hs_macr_sva_abun <-excel=excel_file,
hs_macr_sva_tables, according_to="deseq"))
## Error in extract_abundant_genes(hs_macr_sva_tables, excel = excel_file, : object 'hs_macr_sva_tables' not found
As you know, combat actually changes the data, so it will require a slightly different setup.
normalize_expt(hs_cds_macr, filter=TRUE,
combatnp_input <-convert="cpm",
batch="combat_noprior")
## Error in normalize_expt(hs_cds_macr, filter = TRUE, convert = "cpm", batch = "combat_noprior"): object 'hs_cds_macr' not found
In this attempt, we tell all pairwise to invoke svaseq.
all_pairwise(input=combatnp_input, model_batch=FALSE,
hs_macr_combat <-force=TRUE, limma_method="robust")
## Error in normalize_expt(input, filter = TRUE, batch = FALSE, transform = "log2", : object 'combatnp_input' not found
glue::glue("excel/{rundate}_hs_macr_combat_contr-v{ver}.xlsx")
excel_file <- combine_de_tables(
hs_macr_combat_tables <-
hs_macr_combat,keepers=hs_contrasts,
extra_annot=medians_by_condition,
excel=excel_file)
## Error in combine_de_tables(hs_macr_combat, keepers = hs_contrasts, extra_annot = medians_by_condition, : object 'hs_macr_combat' not found
glue::glue("excel/{rundate}_hs_macr_combat_sig-v{ver}.xlsx")
excel_file <- extract_significant_genes(
hs_macr_combat_sig <-excel=excel_file,
hs_macr_combat_tables, according_to="deseq")
## Error in extract_significant_genes(hs_macr_combat_tables, excel = excel_file, : object 'hs_macr_combat_tables' not found
glue::glue("excel/{rundate}_hs_macr_combat_abund-v{ver}.xlsx")
excel_file <- sm(extract_abundant_genes(
hs_macr_combat_abun <-excel=excel_file,
hs_macr_combat_tables, according_to="deseq"))
## Error in extract_abundant_genes(hs_macr_combat_tables, excel = excel_file, : object 'hs_macr_combat_tables' not found
compare_de_results(first=hs_macr_nobatch_tables,
nobatch_batch <-second=hs_macr_batch_tables)
## Testing method: limma.
## Error in compare_de_results(first = hs_macr_nobatch_tables, second = hs_macr_batch_tables): object 'hs_macr_nobatch_tables' not found
$logfc nobatch_batch
## Error in eval(expr, envir, enclos): object 'nobatch_batch' not found
compare_de_results(first=hs_macr_batch_tables,
batch_sva <-second=hs_macr_sva_tables)
## Testing method: limma.
## Error in compare_de_results(first = hs_macr_batch_tables, second = hs_macr_sva_tables): object 'hs_macr_batch_tables' not found
$logfc batch_sva
## Error in eval(expr, envir, enclos): object 'batch_sva' not found
compare_de_results(first=hs_macr_batch_tables,
batch_combat <-second=hs_macr_combat_tables)
## Testing method: limma.
## Error in compare_de_results(first = hs_macr_batch_tables, second = hs_macr_combat_tables): object 'hs_macr_batch_tables' not found
$logfc batch_combat
## Error in eval(expr, envir, enclos): object 'batch_combat' not found
compare_de_results(first=hs_macr_sva_tables,
sva_combat <-second=hs_macr_combat_tables)
## Testing method: limma.
## Error in compare_de_results(first = hs_macr_sva_tables, second = hs_macr_combat_tables): object 'hs_macr_sva_tables' not found
$logfc sva_combat
## Error in eval(expr, envir, enclos): object 'sva_combat' not found
## Interesting how much sva and combat disagree.
plot_volcano_de(
batchmodel_volcano <-table=hs_macr_batch_tables[["data"]][["macro_chr-sh"]],
color_by="state",
fc_col="deseq_logfc",
p_col="deseq_adjp",
logfc=0.58,
shapes_by_state=FALSE,
line_position="top")
## Error in data.frame(xaxis = as.numeric(table[[fc_col]]), yaxis = as.numeric(table[[p_col]]), : object 'hs_macr_batch_tables' not found
$plot batchmodel_volcano
## Error in eval(expr, envir, enclos): object 'batchmodel_volcano' not found
plot_volcano_de(
svamodel_volcano <-table=hs_macr_sva_tables[["data"]][["macro_chr-sh"]],
color_by="state",
fc_col="deseq_logfc",
p_col="deseq_adjp",
logfc=0.58,
shapes_by_state=FALSE,
line_position="top")
## Error in data.frame(xaxis = as.numeric(table[[fc_col]]), yaxis = as.numeric(table[[p_col]]), : object 'hs_macr_sva_tables' not found
pp(file="sva_chsh_deseq_volcano.tif", image=svamodel_volcano$plot)
## Error in pp(file = "sva_chsh_deseq_volcano.tif", image = svamodel_volcano$plot): object 'svamodel_volcano' not found
simple_proper(de_tables=hs_macr_batch_tables) hs_proper <-
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'exprs': object 'hs_macr_batch_tables' not found
Recall that I made the variables ‘hs_macr_sva_sig’ and ‘hs_macr_sva_abun’ to hold the results of the most significantly changed and abundant genes.
gProfiler is my favorite tool for ontology searching, however they recently had a big update and split their code. The new version has all sorts of cool toys, but as of the last time I tried it, did not work. Thus the following is still using the older methods.
hs_macr_sva_sig[["deseq"]][["ups"]][["macro_chr-sh"]] lfs_up <-
## Error in eval(expr, envir, enclos): object 'hs_macr_sva_sig' not found
hs_macr_sva_sig[["deseq"]][["downs"]][["macro_chr-sh"]] lfs_down <-
## Error in eval(expr, envir, enclos): object 'hs_macr_sva_sig' not found
simple_gprofiler(sig_genes=lfs_up, species="hsapiens") up_gp <-
## Error in simple_gprofiler(sig_genes = lfs_up, species = "hsapiens"): object 'lfs_up' not found
"pvalue_plots"]][["bpp_plot_over"]] up_gp[[
## Error in eval(expr, envir, enclos): object 'up_gp' not found
simple_gprofiler(sig_genes=lfs_down, species="hsapiens") down_gp <-
## Error in simple_gprofiler(sig_genes = lfs_down, species = "hsapiens"): object 'lfs_down' not found
"pvalue_plots"]][["bpp_plot_over"]] down_gp[[
## Error in eval(expr, envir, enclos): object 'down_gp' not found
simple_goseq(sig_genes=lfs_up, go_db=hs_go[["go"]], length_db=hs_lengths) up_goseq <-
## Error in simple_goseq(sig_genes = lfs_up, go_db = hs_go[["go"]], length_db = hs_lengths): object 'lfs_up' not found
"pvalue_plots"]][["bpp_plot_over"]] up_goseq[[
## Error in eval(expr, envir, enclos): object 'up_goseq' not found
simple_goseq(sig_genes=lfs_down, go_db=hs_go[["go"]], length_db=hs_lengths) down_goseq <-
## Error in simple_goseq(sig_genes = lfs_down, go_db = hs_go[["go"]], length_db = hs_lengths): object 'lfs_down' not found
"pvalue_plots"]][["bpp_plot_over"]] down_goseq[[
## Error in eval(expr, envir, enclos): object 'down_goseq' not found
simple_topgo(sig_genes=lfs_up, go_db=hs_go[["go"]]) up_topgo <-
## Error in simple_topgo(sig_genes = lfs_up, go_db = hs_go[["go"]]): object 'lfs_up' not found
"pvalue_plots"]][["bpp_plot_over"]] up_topgo[[
## Error in eval(expr, envir, enclos): object 'up_topgo' not found
simple_topgo(sig_genes=lfs_down, go_db=hs_go[["go"]]) down_topgo <-
## Error in simple_topgo(sig_genes = lfs_down, go_db = hs_go[["go"]]): object 'lfs_down' not found
"pvalue_plots"]][["bpp_plot_over"]] down_topgo[[
## Error in eval(expr, envir, enclos): object 'down_topgo' not found
simple_clusterprofiler(sig_genes=lfs_up, do_david=FALSE, do_gsea=FALSE, orgdb="org.Hs.eg.db", fc_column="deseq_logfc") up_cp <-
## Error in rownames(sig_genes): object 'lfs_up' not found
"pvalue_plots"]][["ego_all_bp"]] up_cp[[
## Error in eval(expr, envir, enclos): object 'up_cp' not found
"plots"]][["dot_all_bp"]] up_cp[[
## Error in eval(expr, envir, enclos): object 'up_cp' not found
simple_clusterprofiler(sig_genes=lfs_down, do_david=FALSE, do_gsea=FALSE, orgdb="org.Hs.eg.db", fc_column="deseq_logfc") down_cp <-
## Error in rownames(sig_genes): object 'lfs_down' not found
"pvalue_plots"]][["ego_all_bp"]] down_cp[[
## Error in eval(expr, envir, enclos): object 'down_cp' not found
"plots"]][["dot_all_bp"]] down_cp[[
## Error in eval(expr, envir, enclos): object 'down_cp' not found
all_pairwise(hs_cds_all, model_batch="svaseq", filter=TRUE) all_de <-
## Error in normalize_expt(input, filter = filter): object 'hs_cds_all' not found
subset_expt(hs_cds_all, subset="donor=='d108'") d108 <-
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'sampleNames': object 'hs_cds_all' not found
subset_expt(hs_cds_all, subset="donor=='d107'") d107 <-
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'sampleNames': object 'hs_cds_all' not found
subset_expt(hs_cds_all, subset="donor=='d110'") d110 <-
## Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'sampleNames': object 'hs_cds_all' not found
all_pairwise(d108, model_batch="svaseq", filter=TRUE) d108_de <-
## Error in normalize_expt(input, filter = filter): object 'd108' not found
all_pairwise(d107, model_batch="svaseq", filter=TRUE) d107_de <-
## Error in normalize_expt(input, filter = filter): object 'd107' not found
all_pairwise(d110, model_batch="svaseq", filter=TRUE) d110_de <-
## Error in normalize_expt(input, filter = filter): object 'd110' not found
combine_de_tables(d108_de, excel="excel/d108_tables.xlsx") d108_table <-
## Deleting the file excel/d108_tables.xlsx before writing the tables.
## Error in combine_de_tables(d108_de, excel = "excel/d108_tables.xlsx"): object 'd108_de' not found
combine_de_tables(d107_de, excel="excel/d107_tables.xlsx") d107_table <-
## Deleting the file excel/d107_tables.xlsx before writing the tables.
## Error in combine_de_tables(d107_de, excel = "excel/d107_tables.xlsx"): object 'd107_de' not found
combine_de_tables(d110_de, excel="excel/d110_tables.xlsx") d110_table <-
## Deleting the file excel/d110_tables.xlsx before writing the tables.
## Error in combine_de_tables(d110_de, excel = "excel/d110_tables.xlsx"): object 'd110_de' not found
subset_expt(hs_cds_inf, subset="donor=='d108'") d108 <-
## Using a subset expression.
## There were 15, now there are 5 samples.
subset_expt(hs_cds_inf, subset="donor=='d107'") d107 <-
## Using a subset expression.
## There were 15, now there are 5 samples.
subset_expt(hs_cds_inf, subset="donor=='d110'") d110 <-
## Using a subset expression.
## There were 15, now there are 5 samples.
all_pairwise(d108, model_batch="svaseq", filter=TRUE) d108_de <-
## batch_counts: Before batch/surrogate estimation, 57939 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 8 entries are x==0: 0%.
## The be method chose 1 surrogate variable.
## Attempting svaseq estimation with 1 surrogate.
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (11591 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 57753 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 8 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 194 entries are 0<x<1: 0%.
## The be method chose 1 surrogate variable.
## Attempting svaseq estimation with 1 surrogate.
## Step 4: transforming the data with log2.
## Not putting labels on the PC plot.
## Error in checkForRemoteErrors(val): 2 nodes produced errors; first error: c("Error in checkFullRank(modelMatrix) : \n the model matrix is not full rank, so the model cannot be fit as specified.\n One or more variables or interaction terms in the design formula are linear\n combinations of the others and must be removed.\n\n Please read the vignette section 'Model matrix not full rank':\n\n vignette('DESeq2')\n", "deseq")
all_pairwise(d107, model_batch="svaseq", filter=TRUE) d107_de <-
## batch_counts: Before batch/surrogate estimation, 57606 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 11 entries are x==0: 0%.
## The be method chose 1 surrogate variable.
## Attempting svaseq estimation with 1 surrogate.
## Plotting a PCA before surrogate/batch inclusion.
## Warning in summary.lm(object, ...): essentially perfect fit: summary may be
## unreliable
## Not putting labels on the PC plot.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (11525 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 57365 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 11 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 249 entries are 0<x<1: 0%.
## The be method chose 1 surrogate variable.
## Attempting svaseq estimation with 1 surrogate.
## There are 11525 (20%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Step 4: transforming the data with log2.
## transform_counts: Found 11525 values equal to 0, adding 1 to the matrix.
## Not putting labels on the PC plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
all_pairwise(d110, model_batch="svaseq", filter=TRUE) d110_de <-
## batch_counts: Before batch/surrogate estimation, 57153 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 6 entries are x==0: 0%.
## The be method chose 1 surrogate variable.
## Attempting svaseq estimation with 1 surrogate.
## Plotting a PCA before surrogate/batch inclusion.
## Not putting labels on the PC plot.
## Using svaseq to visualize before/after batch inclusion.
## Performing a test normalization with: raw
## This function will replace the expt$expressionset slot with:
## log2(svaseq(cpm(cbcb(data))))
## It will save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep libsizes in mind
## when invoking limma. The appropriate libsize is non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (11433 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## The method is: svaseq.
## Step 4: doing batch correction with svaseq.
## Using the current state of normalization.
## Passing the data to all_adjusters using the svaseq estimate type.
## batch_counts: Before batch/surrogate estimation, 56951 entries are x>1: 100%.
## batch_counts: Before batch/surrogate estimation, 6 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 208 entries are 0<x<1: 0%.
## The be method chose 1 surrogate variable.
## Attempting svaseq estimation with 1 surrogate.
## There are 2 (0%) elements which are < 0 after batch correction.
## Setting low elements to zero.
## Step 4: transforming the data with log2.
## transform_counts: Found 2 values equal to 0, adding 1 to the matrix.
## Not putting labels on the PC plot.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
combine_de_tables(d108_de, excel="excel/d108_minustwo_tables.xlsx") d108_table <-
## Deleting the file excel/d108_minustwo_tables.xlsx before writing the tables.
## Error in combine_de_tables(d108_de, excel = "excel/d108_minustwo_tables.xlsx"): object 'd108_de' not found
combine_de_tables(d107_de, excel="excel/d107_minustwo_tables.xlsx") d107_table <-
## Deleting the file excel/d107_minustwo_tables.xlsx before writing the tables.
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on table 1/3: sh_vs_chr
## Working on table 2/3: uninf_vs_chr
## Working on table 3/3: uninf_vs_sh
## Adding venn plots for sh_vs_chr.
## Limma expression coefficients for sh_vs_chr; R^2: 0.981; equation: y = 0.989x + 0.0477
## Deseq expression coefficients for sh_vs_chr; R^2: 0.981; equation: y = 0.979x + 0.201
## Edger expression coefficients for sh_vs_chr; R^2: 0.981; equation: y = 0.98x + 0.0965
## Adding venn plots for uninf_vs_chr.
## Limma expression coefficients for uninf_vs_chr; R^2: 0.927; equation: y = 0.952x + 0.16
## Deseq expression coefficients for uninf_vs_chr; R^2: 0.927; equation: y = 0.958x + 0.379
## Edger expression coefficients for uninf_vs_chr; R^2: 0.927; equation: y = 0.959x + 0.383
## Adding venn plots for uninf_vs_sh.
## Limma expression coefficients for uninf_vs_sh; R^2: 0.952; equation: y = 0.974x + 0.0449
## Deseq expression coefficients for uninf_vs_sh; R^2: 0.951; equation: y = 0.991x + 0.0619
## Edger expression coefficients for uninf_vs_sh; R^2: 0.952; equation: y = 0.99x + 0.0819
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/d107_minustwo_tables.xlsx.
combine_de_tables(d110_de, excel="excel/d110_minustwo_tables.xlsx") d110_table <-
## Deleting the file excel/d110_minustwo_tables.xlsx before writing the tables.
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Working on table 1/3: sh_vs_chr
## Working on table 2/3: uninf_vs_chr
## Working on table 3/3: uninf_vs_sh
## Adding venn plots for sh_vs_chr.
## Limma expression coefficients for sh_vs_chr; R^2: 0.978; equation: y = 0.989x + 0.0477
## Deseq expression coefficients for sh_vs_chr; R^2: 0.978; equation: y = 0.983x + 0.178
## Edger expression coefficients for sh_vs_chr; R^2: 0.979; equation: y = 0.983x + 0.0919
## Adding venn plots for uninf_vs_chr.
## Limma expression coefficients for uninf_vs_chr; R^2: 0.896; equation: y = 0.943x + 0.158
## Deseq expression coefficients for uninf_vs_chr; R^2: 0.897; equation: y = 1x - 0.00282
## Edger expression coefficients for uninf_vs_chr; R^2: 0.897; equation: y = 1x - 0.00294
## Adding venn plots for uninf_vs_sh.
## Limma expression coefficients for uninf_vs_sh; R^2: 0.897; equation: y = 0.947x + 0.16
## Deseq expression coefficients for uninf_vs_sh; R^2: 0.896; equation: y = 1.01x - 0.0854
## Edger expression coefficients for uninf_vs_sh; R^2: 0.896; equation: y = 1.01x - 0.0802
## Writing summary information, compare_plot is: TRUE.
## Performing save of excel/d110_minustwo_tables.xlsx.
::pander(sessionInfo()) pander
R version 4.0.3 (2020-10-10)
Platform: x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_US.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_US.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_US.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.UTF-8 and LC_IDENTIFICATION=C
attached base packages: splines, stats4, parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: GSVAdata(v.1.26.0), UpSetR(v.1.4.0), ruv(v.0.9.7.1), lme4(v.1.1-26), Matrix(v.1.3-2), BiocParallel(v.1.24.1), variancePartition(v.1.20.0), Homo.sapiens(v.1.3.1), TxDb.Hsapiens.UCSC.hg19.knownGene(v.3.2.2), org.Hs.eg.db(v.3.12.0), GO.db(v.3.12.1), OrganismDbi(v.1.32.0), GenomicFeatures(v.1.42.1), GenomicRanges(v.1.42.0), GenomeInfoDb(v.1.26.2), org.Cfasciculata.Cf.Cl.v46.eg.db(v.2020.07), org.Lmexicana.MHOMGT2001U1103.v46.eg.db(v.2020.07), org.Ldonovani.BPK282A1.v46.eg.db(v.2020.07), org.Lbraziliensis.MHOMBR75M2904.v46.eg.db(v.2020.07), org.Lpanamensis.MHOMCOL81L13.v46.eg.db(v.2020.07), org.Lmajor.Friedlin.v46.eg.db(v.2020.07), AnnotationDbi(v.1.52.0), IRanges(v.2.24.1), S4Vectors(v.0.28.1), futile.logger(v.1.4.3), hpgltools(v.1.0), testthat(v.3.0.2), R6(v.2.5.0), Biobase(v.2.50.0) and BiocGenerics(v.0.36.0)
loaded via a namespace (and not attached): rappdirs(v.0.3.3), SparseM(v.1.81), rtracklayer(v.1.50.0), AnnotationForge(v.1.32.0), R.methodsS3(v.1.8.1), tidyr(v.1.1.2), ggplot2(v.3.3.3), bit64(v.4.0.5), knitr(v.1.31), R.utils(v.2.10.1), DelayedArray(v.0.16.1), data.table(v.1.14.0), RCurl(v.1.98-1.2), doParallel(v.1.0.16), generics(v.0.1.0), preprocessCore(v.1.52.1), callr(v.3.5.1), cowplot(v.1.1.1), lambda.r(v.1.2.4), usethis(v.2.0.1), RSQLite(v.2.2.3), shadowtext(v.0.0.7), bit(v.4.0.4), enrichplot(v.1.10.2), xml2(v.1.3.2), httpuv(v.1.5.5), SummarizedExperiment(v.1.20.0), assertthat(v.0.2.1), viridis(v.0.5.1), xfun(v.0.21), hms(v.1.0.0), jquerylib(v.0.1.3), IHW(v.1.18.0), evaluate(v.0.14), promises(v.1.2.0.1), DEoptimR(v.1.0-8), fansi(v.0.4.2), progress(v.1.2.2), caTools(v.1.18.1), dbplyr(v.2.1.0), geneplotter(v.1.68.0), igraph(v.1.2.6), DBI(v.1.1.1), purrr(v.0.3.4), ellipsis(v.0.3.1), selectr(v.0.4-2), dplyr(v.1.0.4), backports(v.1.2.1), annotate(v.1.68.0), biomaRt(v.2.46.3), MatrixGenerics(v.1.2.1), vctrs(v.0.3.6), remotes(v.2.2.0), cachem(v.1.0.4), withr(v.2.4.1), ggforce(v.0.3.2), robustbase(v.0.93-7), AnnotationHubData(v.1.20.1), GenomicAlignments(v.1.26.0), fdrtool(v.1.2.16), prettyunits(v.1.1.1), DOSE(v.3.16.0), crayon(v.1.4.1), genefilter(v.1.72.1), slam(v.0.1-48), edgeR(v.3.32.1), pkgconfig(v.2.0.3), labeling(v.0.4.2), tweenr(v.1.0.1), nlme(v.3.1-152), pkgload(v.1.2.0), devtools(v.2.3.2), rlang(v.0.4.10), lifecycle(v.1.0.0), downloader(v.0.4), BiocFileCache(v.1.14.0), directlabels(v.2021.1.13), AnnotationHub(v.2.22.0), rprojroot(v.2.0.2), polyclip(v.1.10-0), GSVA(v.1.38.2), matrixStats(v.0.58.0), graph(v.1.68.0), lpsymphony(v.1.18.0), boot(v.1.3-27), processx(v.3.4.5), viridisLite(v.0.3.0), bitops(v.1.0-6), R.oo(v.1.24.0), KernSmooth(v.2.23-18), pander(v.0.6.3), Biostrings(v.2.58.0), blob(v.1.2.1), stringr(v.1.4.0), qvalue(v.2.22.0), gProfileR(v.0.7.0), readr(v.1.4.0), scales(v.1.1.1), GSEABase(v.1.52.1), memoise(v.2.0.0), magrittr(v.2.0.1), plyr(v.1.8.6), gplots(v.3.1.1), zlibbioc(v.1.36.0), compiler(v.4.0.3), scatterpie(v.0.1.5), RColorBrewer(v.1.1-2), DESeq2(v.1.30.1), Rsamtools(v.2.6.0), cli(v.2.3.1), XVector(v.0.30.0), ps(v.1.5.0), formatR(v.1.7), MASS(v.7.3-53.1), mgcv(v.1.8-34), tidyselect(v.1.1.0), stringi(v.1.5.3), EuPathDB(v.1.6.0), highr(v.0.8), yaml(v.2.2.1), GOSemSim(v.2.16.1), askpass(v.1.1), locfit(v.1.5-9.4), ggrepel(v.0.9.1), biocViews(v.1.58.1), grid(v.4.0.3), sass(v.0.3.1), fastmatch(v.1.1-0), tools(v.4.0.3), rstudioapi(v.0.13), foreach(v.1.5.1), gridExtra(v.2.3), Rtsne(v.0.15), farver(v.2.0.3), ggraph(v.2.0.5), digest(v.0.6.27), rvcheck(v.0.1.8), BiocManager(v.1.30.10), shiny(v.1.6.0), quadprog(v.1.5-8), Rcpp(v.1.0.6), broom(v.0.7.5), BiocVersion(v.3.12.0), later(v.1.1.0.1), httr(v.1.4.2), colorspace(v.2.0-0), topGO(v.2.42.0), rvest(v.0.3.6), XML(v.3.99-0.5), fs(v.1.5.0), RBGL(v.1.66.0), statmod(v.1.4.35), PROPER(v.1.22.0), graphlayouts(v.0.7.1), sessioninfo(v.1.1.1), xtable(v.1.8-4), jsonlite(v.1.7.2), nloptr(v.1.2.2.2), futile.options(v.1.0.1), tidygraph(v.1.2.0), corpcor(v.1.6.9), Vennerable(v.3.1.0.9000), RUnit(v.0.4.32), pillar(v.1.5.0), htmltools(v.0.5.1.1), mime(v.0.10), glue(v.1.4.2), fastmap(v.1.1.0), minqa(v.1.2.4), clusterProfiler(v.3.18.1), interactiveDisplayBase(v.1.28.0), codetools(v.0.2-18), fgsea(v.1.16.0), pkgbuild(v.1.2.0), utf8(v.1.1.4), lattice(v.0.20-41), bslib(v.0.2.4), tibble(v.3.0.6), sva(v.3.38.0), pbkrtest(v.0.5-0.1), curl(v.4.3), colorRamps(v.2.3), gtools(v.3.8.2), zip(v.2.1.1), openxlsx(v.4.2.3), openssl(v.1.4.3), survival(v.3.2-7), limma(v.3.46.0), rmarkdown(v.2.7), desc(v.1.2.0), munsell(v.0.5.0), fastcluster(v.1.1.25), DO.db(v.2.9), GenomeInfoDbData(v.1.2.4), iterators(v.1.0.13), reshape2(v.1.4.4) and gtable(v.0.3.0)
message(paste0("This is hpgltools commit: ", get_git_commit()))
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset f7d3248107373a9f6b0a79b70e90b82371490162
## This is hpgltools commit: Thu Mar 4 15:10:56 2021 -0500: f7d3248107373a9f6b0a79b70e90b82371490162
paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
this_save <-message(paste0("Saving to ", this_save))
## Saving to pbmc_all_analyses_202102-v202102.rda.xz
sm(saveme(filename=this_save)) tmp <-