Let us check out some new cruzi infections following the deletion of a specific gene.
I thought I also did the interrogation of the CLBrener transcriptome, but that appears untrue. I think I may have forgotten to copy the genome in place…
I have a pretty new genome downloaded (202509), so I will (for now) just let my annotation function grab whatever it thinks is reasonable. It chose the 202410 set. Seems good to me.
## The biomart annotations file already exists, loading from it.
tc_annot <- load_gff_annotations("~/libraries/genome/gff/tcruzi_all.gff",
type = "mRNA", id_col = "Parent")## Returning a df with 24 columns and 23305 rows.
rownames(tc_annot) <- gsub(x = make.names(tc_annot[["Name"]], unique = TRUE),
pattern = "\\.\\d+$", replacement = "")
esmer_db <- "org.Tcruzi.CL.Brener.Esmeraldo.like.v68.eg.db"
library(esmer_db, character.only = TRUE)## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:hpgltools':
##
## annotation, annotation<-, conditions, conditions<-, normalize
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:glue':
##
## trim
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
##
esmer_db <- get0(esmer_db)
all_keytypes <- keytypes(esmer_db)
wanted_idx <- grepl(x = all_keytypes, pattern = "^ANNOT_")
wanted_fields <- all_keytypes[wanted_idx]
nonesmer_db <- "org.Tcruzi.CL.Brener.Non.Esmeraldo.like.v68.eg.db"
unas_db <- "org.Tcruzi.CL.Brener.v68.eg.db"
tc_esmer <- load_orgdb_annotations(esmer_db, keytype = "gid", fields = wanted_fields)## Unable to find CDSNAME, setting it to ANNOT_EXTERNAL_DB_NAME.
## Unable to find CDSCHROM in the db, removing it.
## Unable to find CDSSTRAND in the db, removing it.
## Unable to find CDSSTART in the db, removing it.
## Unable to find CDSEND in the db, removing it.
## Extracted all gene ids.
## Attempting to select: ANNOT_EXTERNAL_DB_NAME, GENE_TYPE, ANNOT_AA_SEQUENCE_ID, ANNOT_ANNOTATED_GO_COMPONENT, ANNOT_ANNOTATED_GO_FUNCTION, ANNOT_ANNOTATED_GO_ID_COMPONENT, ANNOT_ANNOTATED_GO_ID_FUNCTION, ANNOT_ANNOTATED_GO_ID_PROCESS, ANNOT_ANNOTATED_GO_PROCESS, ANNOT_ANTICODON, ANNOT_APOLLO_LINK_OUT, ANNOT_APOLLO_TRANSCRIPT_DESCRIPTION, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_CODING_END, ANNOT_CODING_START, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_END_MAX, ANNOT_EXON_COUNT, ANNOT_EXTERNAL_DB_NAME, ANNOT_EXTERNAL_DB_VERSION, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_CONTEXT_END, ANNOT_GENE_CONTEXT_START, ANNOT_GENE_END_MAX, ANNOT_GENE_END_MAX_TEXT, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ENTREZ_LINK_DISPLAYTEXT, ANNOT_GENE_ENTREZ_LINK_URL, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_START_MIN, ANNOT_GENE_START_MIN_TEXT, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GENOMIC_SEQUENCE_LENGTH, ANNOT_GENUS_SPECIES, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_DEPRECATED, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MAP_LOCATION, ANNOT_MCMC_LOCATION, ANNOT_MOLECULAR_WEIGHT, ANNOT_NCBI_TAX_ID, ANNOT_ORTHOMCL_LINK, ANNOT_OVERVIEW, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PRIMARY_KEY, ANNOT_PROB_MAP, ANNOT_PROB_MCMC, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_PROTEIN_SOURCE_ID, ANNOT_PSEUDO_STRING, ANNOT_SEQUENCE_DATABASE_NAME, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SNPOVERVIEW, ANNOT_SO_ID, ANNOT_SO_TERM_DEFINITION, ANNOT_SO_TERM_NAME, ANNOT_SO_VERSION, ANNOT_START_MIN, ANNOT_STRAND, ANNOT_STRAND_PLUS_MINUS, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_IDS, ANNOT_UNIPROT_LINKS
## 'select()' returned 1:1 mapping between keys and columns
##
## Unable to find CDSNAME, setting it to ANNOT_EXTERNAL_DB_NAME.
## Unable to find CDSCHROM in the db, removing it.
## Unable to find CDSSTRAND in the db, removing it.
## Unable to find CDSSTART in the db, removing it.
## Unable to find CDSEND in the db, removing it.
## Extracted all gene ids.
## Attempting to select: ANNOT_EXTERNAL_DB_NAME, GENE_TYPE, ANNOT_AA_SEQUENCE_ID, ANNOT_ANNOTATED_GO_COMPONENT, ANNOT_ANNOTATED_GO_FUNCTION, ANNOT_ANNOTATED_GO_ID_COMPONENT, ANNOT_ANNOTATED_GO_ID_FUNCTION, ANNOT_ANNOTATED_GO_ID_PROCESS, ANNOT_ANNOTATED_GO_PROCESS, ANNOT_ANTICODON, ANNOT_APOLLO_LINK_OUT, ANNOT_APOLLO_TRANSCRIPT_DESCRIPTION, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_CODING_END, ANNOT_CODING_START, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_END_MAX, ANNOT_EXON_COUNT, ANNOT_EXTERNAL_DB_NAME, ANNOT_EXTERNAL_DB_VERSION, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_CONTEXT_END, ANNOT_GENE_CONTEXT_START, ANNOT_GENE_END_MAX, ANNOT_GENE_END_MAX_TEXT, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ENTREZ_LINK_DISPLAYTEXT, ANNOT_GENE_ENTREZ_LINK_URL, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_START_MIN, ANNOT_GENE_START_MIN_TEXT, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GENOMIC_SEQUENCE_LENGTH, ANNOT_GENUS_SPECIES, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_DEPRECATED, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MAP_LOCATION, ANNOT_MCMC_LOCATION, ANNOT_MOLECULAR_WEIGHT, ANNOT_NCBI_TAX_ID, ANNOT_ORTHOMCL_LINK, ANNOT_OVERVIEW, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PRIMARY_KEY, ANNOT_PROB_MAP, ANNOT_PROB_MCMC, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_PROTEIN_SOURCE_ID, ANNOT_PSEUDO_STRING, ANNOT_SEQUENCE_DATABASE_NAME, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SNPOVERVIEW, ANNOT_SO_ID, ANNOT_SO_TERM_DEFINITION, ANNOT_SO_TERM_NAME, ANNOT_SO_VERSION, ANNOT_START_MIN, ANNOT_STRAND, ANNOT_STRAND_PLUS_MINUS, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_IDS, ANNOT_UNIPROT_LINKS
## 'select()' returned 1:1 mapping between keys and columns
##
## Unable to find CDSNAME, setting it to ANNOT_EXTERNAL_DB_NAME.
## Unable to find CDSCHROM in the db, removing it.
## Unable to find CDSSTRAND in the db, removing it.
## Unable to find CDSSTART in the db, removing it.
## Unable to find CDSEND in the db, removing it.
## Extracted all gene ids.
## Attempting to select: ANNOT_EXTERNAL_DB_NAME, GENE_TYPE, ANNOT_AA_SEQUENCE_ID, ANNOT_ANNOTATED_GO_COMPONENT, ANNOT_ANNOTATED_GO_FUNCTION, ANNOT_ANNOTATED_GO_ID_COMPONENT, ANNOT_ANNOTATED_GO_ID_FUNCTION, ANNOT_ANNOTATED_GO_ID_PROCESS, ANNOT_ANNOTATED_GO_PROCESS, ANNOT_ANTICODON, ANNOT_APOLLO_LINK_OUT, ANNOT_APOLLO_TRANSCRIPT_DESCRIPTION, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_CODING_END, ANNOT_CODING_START, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_END_MAX, ANNOT_EXON_COUNT, ANNOT_EXTERNAL_DB_NAME, ANNOT_EXTERNAL_DB_VERSION, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_CONTEXT_END, ANNOT_GENE_CONTEXT_START, ANNOT_GENE_END_MAX, ANNOT_GENE_END_MAX_TEXT, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ENTREZ_LINK_DISPLAYTEXT, ANNOT_GENE_ENTREZ_LINK_URL, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_START_MIN, ANNOT_GENE_START_MIN_TEXT, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GENOMIC_SEQUENCE_LENGTH, ANNOT_GENUS_SPECIES, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_DEPRECATED, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MAP_LOCATION, ANNOT_MCMC_LOCATION, ANNOT_MOLECULAR_WEIGHT, ANNOT_NCBI_TAX_ID, ANNOT_ORTHOMCL_LINK, ANNOT_OVERVIEW, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PRIMARY_KEY, ANNOT_PROB_MAP, ANNOT_PROB_MCMC, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_PROTEIN_SOURCE_ID, ANNOT_PSEUDO_STRING, ANNOT_SEQUENCE_DATABASE_NAME, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SNPOVERVIEW, ANNOT_SO_ID, ANNOT_SO_TERM_DEFINITION, ANNOT_SO_TERM_NAME, ANNOT_SO_VERSION, ANNOT_START_MIN, ANNOT_STRAND, ANNOT_STRAND_PLUS_MINUS, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_IDS, ANNOT_UNIPROT_LINKS
## 'select()' returned 1:1 mapping between keys and columns
tc_more <- rbind(tc_esmer$genes, tc_nonesmer$genes, tc_unas$genes)
tc_annot <- merge(tc_annot, tc_more, by = "row.names")
rownames(tc_annot) <- tc_annot[["gid"]]
tc_annot[["gid"]] <- NULL
dim(tc_annot)## [1] 23304 135
## This is an orgdb, good.
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## This is an orgdb, good.
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## This is an orgdb, good.
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
I asked for one from Najib/Amalie but unless I am mistaken it has not arrived. That is not a problem, given two helpful things: April provides one, I also named the directories so that the sample IDs are built in; so I will just make a fake one for now and then merge in whatever I get from them…
sample_sheet <- "sample_sheets/all_samples.xlsx"
plot_meta_sankey(as.data.frame(extract_metadata(sample_sheet)),
factors = c("background", "exp_number"))## Did not find the condition column in the sample sheet.
## Filling it in as undefined.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## Checking the state of the condition column.
## Checking the state of the batch column.
## Checking the condition factor.
## Warning: attributes are not identical across measure variables; they will be
## dropped
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the ggsankey package.
## Please report the issue at
## <https://github.com/davidsjoberg/ggsankey/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## A sankey plot describing the metadata of 14 samples,
## including 19 out of 0 nodes and traversing metadata factors:
## background, exp_number.
Let us see how well my preprocess gatherer does…
## Did not find the condition column in the sample sheet.
## Filling it in as undefined.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## Checking the state of the condition column.
## Checking the state of the batch column.
## Checking the condition factor.
## Warning in dispatch_regex_search(meta, search, replace, input_file_spec, : NAs
## introduced by coercion
## preprocessing/02_HeLa_control_60h/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/04_HeLa_WT_60hpi/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/06_HeLa_KO7_60hpi/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/08_HeLa_Cas_60hpi/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/18_HeLa_control_60h/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/20_HeLa_WT_60hpi/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/22_HeLa_KO7_60hpi/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/23_HeLa_AB10_60hpi/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/34_HeLa_control_60h/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/36_HeLa_WT_60hpi/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/38_HeLa_KO7_60hpi/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/39_HeLa_AB10_60hpi/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/40_HeLa_AB10_60hpi/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/pos_ctrl/outputs/*hisat*_hg38_115/hg38_115_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/02_HeLa_control_60h/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/04_HeLa_WT_60hpi/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/06_HeLa_KO7_60hpi/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/08_HeLa_Cas_60hpi/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/18_HeLa_control_60h/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/20_HeLa_WT_60hpi/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/22_HeLa_KO7_60hpi/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/23_HeLa_AB10_60hpi/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/34_HeLa_control_60h/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/36_HeLa_WT_60hpi/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/38_HeLa_KO7_60hpi/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/39_HeLa_AB10_60hpi/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/40_HeLa_AB10_60hpi/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## preprocessing/pos_ctrl/outputs/*hisat*_tcruzi_all/tcruzi_all_*genome*_gene_ID_fcounts.csv.xz
## Writing new metadata to: sample_sheets/all_samples_modified.xlsx
## Deleting the file sample_sheets/all_samples_modified.xlsx before writing the tables.
## sampleid samplenumber celltype background hpi
## 02_HeLa_control_60h 02_HeLa_control_60h 2 HeLa control t60h
## 04_HeLa_WT_60hpi 04_HeLa_WT_60hpi 4 HeLa wt t60h
## 06_HeLa_KO7_60hpi 06_HeLa_KO7_60hpi 6 HeLa ko7 t60h
## 08_HeLa_Cas_60hpi 08_HeLa_Cas_60hpi 8 HeLa cas t60h
## 18_HeLa_control_60h 18_HeLa_control_60h 18 HeLa control t60h
## 20_HeLa_WT_60hpi 20_HeLa_WT_60hpi 20 HeLa wt t60h
## exp_number amount_in_10ul amount_fact condition batch
## 02_HeLa_control_60h e1 183 low undefined undefined
## 04_HeLa_WT_60hpi e1 304 mid undefined undefined
## 06_HeLa_KO7_60hpi e1 298 mid undefined undefined
## 08_HeLa_Cas_60hpi e1 284 mid undefined undefined
## 18_HeLa_control_60h e2 62 low undefined undefined
## 20_HeLa_WT_60hpi e2 228 mid undefined undefined
## sampleid_backup trimomatic_input trimomatic_output
## 02_HeLa_control_60h 02_HeLa_control_60h 34421670 31723102
## 04_HeLa_WT_60hpi 04_HeLa_WT_60hpi 33338315 30831462
## 06_HeLa_KO7_60hpi 06_HeLa_KO7_60hpi 36904955 34168992
## 08_HeLa_Cas_60hpi 08_HeLa_Cas_60hpi 34230672 30953413
## 18_HeLa_control_60h 18_HeLa_control_60h 31154298 28104898
## 20_HeLa_WT_60hpi 20_HeLa_WT_60hpi 35726918 32916331
## trimomatic_percent fastqc_pct_gc
## 02_HeLa_control_60h 0.922 52
## 04_HeLa_WT_60hpi 0.925 50
## 06_HeLa_KO7_60hpi 0.926 50
## 08_HeLa_Cas_60hpi 0.904 50
## 18_HeLa_control_60h 0.902 51
## 20_HeLa_WT_60hpi 0.921 50
## kraken_bacterial_classified kraken_bacterial_unclassified
## 02_HeLa_control_60h 147699 418871
## 04_HeLa_WT_60hpi 285754 6263711
## 06_HeLa_KO7_60hpi 414463 8109109
## 08_HeLa_Cas_60hpi 309973 7277804
## 18_HeLa_control_60h 147359 374703
## 20_HeLa_WT_60hpi 323491 8424975
## kraken_first_bacterial_species
## 02_HeLa_control_60h Porphyrobacter sp. GA68
## 04_HeLa_WT_60hpi Mycoplasmopsis arginini
## 06_HeLa_KO7_60hpi Mycoplasmopsis arginini
## 08_HeLa_Cas_60hpi Mycoplasmopsis arginini
## 18_HeLa_control_60h Porphyrobacter sp. GA68
## 20_HeLa_WT_60hpi Klebsiella pneumoniae
## kraken_first_bacterial_species_reads
## 02_HeLa_control_60h 34515
## 04_HeLa_WT_60hpi 20649
## 06_HeLa_KO7_60hpi 95574
## 08_HeLa_Cas_60hpi 22086
## 18_HeLa_control_60h 22324
## 20_HeLa_WT_60hpi 4599
## kraken_matrix_bacterial
## 02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## 04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## 06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## 08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## 18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## 20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## hisat_genome_input_reads_hg38_115
## 02_HeLa_control_60h 31723102
## 04_HeLa_WT_60hpi 30831462
## 06_HeLa_KO7_60hpi 0
## 08_HeLa_Cas_60hpi 30953413
## 18_HeLa_control_60h 28104898
## 20_HeLa_WT_60hpi 32916331
## hisat_genome_input_reads_tcruzi_all
## 02_HeLa_control_60h 566570
## 04_HeLa_WT_60hpi 6549465
## 06_HeLa_KO7_60hpi 8523572
## 08_HeLa_Cas_60hpi 7587777
## 18_HeLa_control_60h 522062
## 20_HeLa_WT_60hpi 8748466
## hisat_genome_single_concordant_hg38_115
## 02_HeLa_control_60h 27374698
## 04_HeLa_WT_60hpi 21550886
## 06_HeLa_KO7_60hpi 0
## 08_HeLa_Cas_60hpi 20831115
## 18_HeLa_control_60h 24646849
## 20_HeLa_WT_60hpi 21560373
## hisat_genome_single_concordant_tcruzi_all
## 02_HeLa_control_60h 533
## 04_HeLa_WT_60hpi 3958505
## 06_HeLa_KO7_60hpi 5187623
## 08_HeLa_Cas_60hpi 4581882
## 18_HeLa_control_60h 1779
## 20_HeLa_WT_60hpi 5359267
## hisat_genome_multi_concordant_hg38_115
## 02_HeLa_control_60h 3781834
## 04_HeLa_WT_60hpi 2731111
## 06_HeLa_KO7_60hpi 0
## 08_HeLa_Cas_60hpi 2534521
## 18_HeLa_control_60h 2935987
## 20_HeLa_WT_60hpi 2607492
## hisat_genome_multi_concordant_tcruzi_all
## 02_HeLa_control_60h 148
## 04_HeLa_WT_60hpi 1769606
## 06_HeLa_KO7_60hpi 2326419
## 08_HeLa_Cas_60hpi 2096222
## 18_HeLa_control_60h 580
## 20_HeLa_WT_60hpi 2409017
## hisat_genome_single_all_hg38_115
## 02_HeLa_control_60h 393579
## 04_HeLa_WT_60hpi 386791
## 06_HeLa_KO7_60hpi 0
## 08_HeLa_Cas_60hpi 370232
## 18_HeLa_control_60h 371885
## 20_HeLa_WT_60hpi 394781
## hisat_genome_single_all_tcruzi_all
## 02_HeLa_control_60h 31940
## 04_HeLa_WT_60hpi 191224
## 06_HeLa_KO7_60hpi 219958
## 08_HeLa_Cas_60hpi 198309
## 18_HeLa_control_60h 44606
## 20_HeLa_WT_60hpi 254343
## hisat_genome_multi_all_hg38_115
## 02_HeLa_control_60h 147888
## 04_HeLa_WT_60hpi 125185
## 06_HeLa_KO7_60hpi 0
## 08_HeLa_Cas_60hpi 118754
## 18_HeLa_control_60h 118560
## 20_HeLa_WT_60hpi 124747
## hisat_genome_multi_all_tcruzi_all hisat_unmapped_hg38_115
## 02_HeLa_control_60h 26257 485321
## 04_HeLa_WT_60hpi 98192 12501300
## 06_HeLa_KO7_60hpi 120720 0
## 08_HeLa_Cas_60hpi 105444 14599664
## 18_HeLa_control_60h 22098 474391
## 20_HeLa_WT_60hpi 122425 16893802
## hisat_unmapped_tcruzi_all hisat_genome_percent_log_hg38_115
## 02_HeLa_control_60h 1073519 99.24
## 04_HeLa_WT_60hpi 1335114 79.73
## 06_HeLa_KO7_60hpi 1655578 0.00
## 08_HeLa_Cas_60hpi 1495987 76.42
## 18_HeLa_control_60h 972594 99.16
## 20_HeLa_WT_60hpi 1557970 74.34
## hisat_genome_percent_log_tcruzi_all
## 02_HeLa_control_60h 5.26
## 04_HeLa_WT_60hpi 89.81
## 06_HeLa_KO7_60hpi 90.29
## 08_HeLa_Cas_60hpi 90.14
## 18_HeLa_control_60h 6.85
## 20_HeLa_WT_60hpi 91.10
## hisat_alignment_hg38_115
## 02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## 04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## 06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## 08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## 18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## 20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## hisat_alignment_tcruzi_all
## 02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam
## 04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam
## 06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam
## 08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam
## 18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam
## 20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam
## salmon_percent_hg38_115 salmon_percent_tcruzi_all
## 02_HeLa_control_60h 45.40 NA
## 04_HeLa_WT_60hpi 35.05 NA
## 06_HeLa_KO7_60hpi 33.15 NA
## 08_HeLa_Cas_60hpi 33.47 NA
## 18_HeLa_control_60h 43.10 NA
## 20_HeLa_WT_60hpi 34.33 NA
## salmon_observed_genes_hg38_115
## 02_HeLa_control_60h 47839
## 04_HeLa_WT_60hpi 46509
## 06_HeLa_KO7_60hpi 48117
## 08_HeLa_Cas_60hpi 46351
## 18_HeLa_control_60h 47978
## 20_HeLa_WT_60hpi 47985
## salmon_observed_genes_tcruzi_all
## 02_HeLa_control_60h NA
## 04_HeLa_WT_60hpi NA
## 06_HeLa_KO7_60hpi NA
## 08_HeLa_Cas_60hpi NA
## 18_HeLa_control_60h NA
## 20_HeLa_WT_60hpi NA
## input_r1
## 02_HeLa_control_60h unprocessed/02_HeLa_control_60h_2_S1_R1_001.fastq.gz
## 04_HeLa_WT_60hpi unprocessed/04_HeLa_WT_60hpi_2_S2_R1_001.fastq.gz
## 06_HeLa_KO7_60hpi unprocessed/06_HeLa_KO7_60hpi_2_S3_R1_001.fastq.gz
## 08_HeLa_Cas_60hpi unprocessed/08_HeLa_Cas_60hpi_2_S4_R1_001.fastq.gz
## 18_HeLa_control_60h unprocessed/18_HeLa_control_60h_2_S5_R1_001.fastq.gz
## 20_HeLa_WT_60hpi unprocessed/20_HeLa_WT_60hpi_2_S6_R1_001.fastq.gz
## input_r2
## 02_HeLa_control_60h unprocessed/02_HeLa_control_60h_2_S1_R2_001.fastq.gz
## 04_HeLa_WT_60hpi unprocessed/04_HeLa_WT_60hpi_2_S2_R2_001.fastq.gz
## 06_HeLa_KO7_60hpi unprocessed/06_HeLa_KO7_60hpi_2_S3_R2_001.fastq.gz
## 08_HeLa_Cas_60hpi unprocessed/08_HeLa_Cas_60hpi_2_S4_R2_001.fastq.gz
## 18_HeLa_control_60h unprocessed/18_HeLa_control_60h_2_S5_R2_001.fastq.gz
## 20_HeLa_WT_60hpi unprocessed/20_HeLa_WT_60hpi_2_S6_R2_001.fastq.gz
## hisat_count_table_hg38_115
## 02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## 04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## 06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## 08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## 18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## 20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## hisat_count_table_tcruzi_all
## 02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## 04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## 06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## 08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## 18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## 20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## salmon_count_table_hg38_115
## 02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031salmon_hg38_115_CDS/quant.sf
## 04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031salmon_hg38_115_CDS/quant.sf
## 06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031salmon_hg38_115_CDS/quant.sf
## 08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031salmon_hg38_115_CDS/quant.sf
## 18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031salmon_hg38_115_CDS/quant.sf
## 20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031salmon_hg38_115_CDS/quant.sf
## salmon_count_table_tcruzi_all
## 02_HeLa_control_60h NA
## 04_HeLa_WT_60hpi NA
## 06_HeLa_KO7_60hpi NA
## 08_HeLa_Cas_60hpi NA
## 18_HeLa_control_60h NA
## 20_HeLa_WT_60hpi NA
hs_se <- create_se(new_meta[["new_meta"]], gene_info = hs_annot[["gene_annotations"]],
file_column = "hisat_count_table_hg38_115") %>%
set_conditions(fact = "background") %>%
set_batches(fact = "exp_number")## Reading the sample metadata.
## Checking the state of the condition column.
## Checking the state of the batch column.
## Checking the condition factor.
## The sample definitions comprises: 14 rows(samples) and 43 columns(metadata fields).
## Matched 21562 annotations and counts.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the summarized experiment to 'se.rda'.
## The final summarized experiment has 21571 rows and 43 columns.
## The numbers of samples by condition are:
##
## AB10 cas control ko7 positive wt
## 3 1 3 3 1 3
## The number of samples by batch are:
##
## e1 e2 e3 undef
## 4 4 5 1
tc_se <- create_se(new_meta[["new_meta"]], gene_info = tc_annot,
file_column = "hisat_count_table_tcruzi_all") %>%
set_conditions(fact = "background") %>%
set_batches(fact = "exp_number")## Reading the sample metadata.
## Checking the state of the condition column.
## Checking the state of the batch column.
## Checking the condition factor.
## The sample definitions comprises: 14 rows(samples) and 43 columns(metadata fields).
## Matched 23304 annotations and counts.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the summarized experiment to 'se.rda'.
## The final summarized experiment has 25100 rows and 43 columns.
## The numbers of samples by condition are:
##
## AB10 cas control ko7 positive wt
## 3 1 3 3 1 3
## The number of samples by batch are:
##
## e1 e2 e3 undef
## 4 4 5 1
## The samples (and read coverage) removed when filtering 5000 non-zero genes are:
## 02_HeLa_control_60h 18_HeLa_control_60h 34_HeLa_control_60h pos_ctrl
## 122 663 107 48
## 02_HeLa_control_60h 18_HeLa_control_60h 34_HeLa_control_60h pos_ctrl
## 81 523 91 36
## Samples removed: 81, 523, 91, 36
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • colour = colors
## ℹ Did you misspell an argument name?
## Library sizes of 14 samples,
## ranging from 14,183,768 to 29,076,224.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the hpgltools package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## A non-zero genes plot of 14 samples.
## These samples have an average 20.55 CPM coverage and 15472 genes observed, ranging from 15027 to
## 17380.
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## 85373 entries are 0. We are on a log scale, adding 1 to the data.
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • colour = colors
## ℹ Did you misspell an argument name?
## Library sizes of 10 samples,
## ranging from 791,179 to 3,382,177.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## A non-zero genes plot of 10 samples.
## These samples have an average 2.064 CPM coverage and 21590 genes observed, ranging from 20421 to
## 22625.
## 35096 entries are 0. We are on a log scale, adding 1 to the data.
## Removing samples with less than 3 replicates.
## Removed: 08_HeLa_Cas_60hpi, pos_ctrl.
tc_replicated <- subset_se(tc_se, min_replicates = 3, fact = "condition") %>%
subset_se(nonzero = 10000)## Removing samples with less than 3 replicates.
## Removed: 08_HeLa_Cas_60hpi.
## No samples have fewer than 10000 observed genes.
## ℹ Loading hpgltools
## in method for 'plot_topn_gsea' with signature 'gse="all_cprofiler"': no definition for class "all_cprofiler"
##
## in method for 'plot_topn_gsea' with signature 'gse="clusterprofiler_result"': no definition for class "clusterprofiler_result"
## Warning: multiple methods tables found for 'annotation'
## Warning: multiple methods tables found for 'annotation<-'
hs_norm <- normalize(hs_replicated, transform = "log2", convert = "cpm",
norm = "quant", filter = TRUE)## Removing 9809 low-count genes (11762 remaining).
## Setting 1672 entries to zero.
## A heatmap of pairwise sample distances ranging from:
## 16.2110615399822 to 91.122185751637.
## A heatmap of pairwise sample correlations ranging from:
## 0.916078357207824 to 0.997343768762044.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, control, ko7, wt
## Shapes are defined by e1, e2, e3.
hs_rnorm <- normalize(hs_replicated, transform = "log2", convert = "cpm",
norm = "quant", filter = TRUE)## Removing 9809 low-count genes (11762 remaining).
## Setting 1672 entries to zero.
## A heatmap of pairwise sample distances ranging from:
## 16.2110615399822 to 91.122185751637.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, control, ko7, wt
## Shapes are defined by e1, e2, e3.
hs_rbnorm <- normalize(hs_replicated, transform = "log2", convert = "cpm",
filter = TRUE, batch = "svaseq")## Removing 9809 low-count genes (11762 remaining).
## transform_counts: Found 25 values less than 0.
## Warning in transform_counts(count_table, method = transform, design = design, :
## NaNs produced
## Setting 861 entries to zero.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, control, ko7, wt
## Shapes are defined by e1, e2, e3.
tc_norm <- normalize(tc_replicated, transform = "log2", convert = "cpm",
norm = "quant", filter = TRUE)## Removing 4164 low-count genes (20936 remaining).
## transform_counts: Found 47 values equal to 0, adding 1 to the matrix.
## A heatmap of pairwise sample distances ranging from:
## 63.0136687923128 to 117.863511218509.
## A heatmap of pairwise sample correlations ranging from:
## 0.905258573223482 to 0.972956304644025.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, ko7, wt
## Shapes are defined by e1, e2, e3.
## Removing 4102 low-count genes (20998 remaining).
## transform_counts: Found 52 values equal to 0, adding 1 to the matrix.
## A heatmap of pairwise sample distances ranging from:
## 62.9109619364137 to 117.665159976867.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, cas, ko7, wt
## Shapes are defined by e1, e2, e3.
## Removing 4102 low-count genes (20998 remaining).
## transform_counts: Found 1577 values less than 0.
## Warning in transform_counts(count_table, method = transform, design = design, :
## NaNs produced
## Setting 8116 entries to zero.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, cas, ko7, wt
## Shapes are defined by e1, e2, e3.
I am not thinking we will see many genes of interest.
hs_keepers <- list(
"ab_vs_control" = c("AB10", "control"),
"ko_vs_control" = c("ko7", "control"),
"ko_vs_wt" = c("ko7", "wt"),
"ab_vs_wt" = c("AB10", "wt"),
"ab_vs_ko" = c("AB10", "ko7"))
hs_de <- all_pairwise(hs_replicated, filter = TRUE, model_fstring = "~ 0 + condition",
model_svs = "svaseq")## AB10 control ko7 wt
## 3 3 3 3
## Removing 9809 low-count genes (11762 remaining).
## Basic step 0/3: Normalizing data.
## Basic step 0/3: Converting data.
## I think this is failing? SummarizedExperiment
## Basic step 0/3: Transforming data.
## Setting 1603 entries to zero.
## This received a matrix of SVs.
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## Warning in createContrastL(objFlt$formula, objFlt$data, L): Contrasts with only
## a single non-zero term are already evaluated by default.
## conditions
## AB10 control ko7 wt
## 3 3 3 3
## conditions
## AB10 control ko7 wt
## 3 3 3 3
## conditions
## AB10 control ko7 wt
## 3 3 3 3
## A pairwise differential expression with results from: basic, deseq, ebseq, edger, limma, noiseq.
## This used a surrogate/batch estimate from: svaseq.
## The primary analysis performed 6 comparisons.
## Deleting the file excel/hs_tables.xlsx before writing the tables.
## Looking for subscript invalid names, start of extract_keepers.
## Looking for subscript invalid names, end of extract_keepers.
## A set of combined differential expression results.
## table deseq_sigup deseq_sigdown edger_sigup edger_sigdown
## 1 control_vs_AB10-inverted 16 29 25 43
## 2 ko7_vs_control 82 20 104 28
## 3 wt_vs_ko7-inverted 0 3 0 4
## 4 wt_vs_AB10-inverted 0 1 0 1
## 5 ko7_vs_AB10-inverted 0 9 0 16
## limma_sigup limma_sigdown
## 1 2 1
## 2 4 0
## 3 0 0
## 4 0 0
## 5 0 0
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the UpSetR package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the UpSetR package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Plot describing unique/shared genes in a differential expression table.
## A set of genes deemed significant according to limma, edger, deseq, ebseq, basic.
## The parameters defining significant were:
## LFC cutoff: 1 adj P cutoff: 0.05
## limma_up limma_down edger_up edger_down deseq_up deseq_down
## ab_vs_control 2 1 25 43 16 29
## ko_vs_control 4 0 104 28 82 20
## ko_vs_wt 0 0 0 4 0 3
## ab_vs_wt 0 0 0 1 0 1
## ab_vs_ko 0 0 0 16 0 9
## ebseq_up ebseq_down basic_up basic_down
## ab_vs_control 11 0 0 0
## ko_vs_control 25 1 0 0
## ko_vs_wt 0 1 0 0
## ab_vs_wt 0 0 0 0
## ab_vs_ko 0 0 0 0
## 04_HeLa_WT_60hpi 06_HeLa_KO7_60hpi 20_HeLa_WT_60hpi 22_HeLa_KO7_60hpi
## wt ko7 wt ko7
## 23_HeLa_AB10_60hpi 36_HeLa_WT_60hpi 38_HeLa_KO7_60hpi 39_HeLa_AB10_60hpi
## AB10 wt ko7 AB10
## 40_HeLa_AB10_60hpi
## AB10
## Levels: AB10 ko7 wt
tc_keepers <- list(
"ab_vs_wt" = c("AB10", "wt"),
"ko_vs_wt" = c("ko7", "wt"),
"ab_vs_ko" = c("AB10", "ko7"))
tc_de <- all_pairwise(tc_replicated, filter = TRUE, model_fstring = "~ 0 + condition",
model_svs = "svaseq")## AB10 ko7 wt
## 3 3 3
## Removing 4164 low-count genes (20936 remaining).
## Basic step 0/3: Normalizing data.
## Basic step 0/3: Converting data.
## I think this is failing? SummarizedExperiment
## Basic step 0/3: Transforming data.
## Setting 6919 entries to zero.
## This received a matrix of SVs.
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## Warning in createContrastL(objFlt$formula, objFlt$data, L): Contrasts with only
## a single non-zero term are already evaluated by default.
## conditions
## AB10 ko7 wt
## 3 3 3
## conditions
## AB10 ko7 wt
## 3 3 3
## conditions
## AB10 ko7 wt
## 3 3 3
## A pairwise differential expression with results from: basic, deseq, ebseq, edger, limma, noiseq.
## This used a surrogate/batch estimate from: svaseq.
## The primary analysis performed 3 comparisons.
## Deleting the file excel/tc_tables.xlsx before writing the tables.
## Looking for subscript invalid names, start of extract_keepers.
## Looking for subscript invalid names, end of extract_keepers.
## A set of combined differential expression results.
## table deseq_sigup deseq_sigdown edger_sigup edger_sigdown
## 1 wt_vs_AB10-inverted 39 349 101 603
## 2 wt_vs_ko7-inverted 48 44 115 147
## 3 ko7_vs_AB10-inverted 11 259 44 428
## limma_sigup limma_sigdown
## 1 92 300
## 2 83 60
## 3 18 220
## Plot describing unique/shared genes in a differential expression table.
## Deleting the file excel/tc_sig.xlsx before writing the tables.
## A set of genes deemed significant according to limma, edger, deseq, ebseq, basic.
## The parameters defining significant were:
## LFC cutoff: 1 adj P cutoff: 0.05
## limma_up limma_down edger_up edger_down deseq_up deseq_down ebseq_up
## ab_vs_wt 92 300 101 603 39 349 51
## ko_vs_wt 83 60 115 147 48 44 114
## ab_vs_ko 18 220 44 428 11 259 8
## ebseq_down basic_up basic_down
## ab_vs_wt 277 0 0
## ko_vs_wt 39 0 0
## ab_vs_ko 222 0 0
ko_wt_up <- tc_sig[["deseq"]][["ups"]][["ko_vs_wt"]]
ko_wt_all <- tc_tables[["data"]][["ko_vs_wt"]]
tc_esmer_up_cp <- simple_clusterprofiler(
ko_wt_up, de_table = ko_wt_all, orgdb = esmer_db, orgdb_to = "GID",
organism = "tcruzi")## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (31.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 24 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## Error in orgdb == "org.Hs.eg.db": comparison (==) is possible only for atomic and list types
tc_nonesmer_up_cp <- simple_clusterprofiler(
ko_wt_up, de_table = ko_wt_all, orgdb = nonesmer_db, orgdb_to = "GID",
organism = "tcruzi")## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (30.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 56 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## --> No gene can be mapped....
## --> Expected input gene ID: 56105,90701,26128,5539,549,5243
## --> return NULL...
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## --> Expected input gene ID: 112495,3104,1069,10735,440093,3280
## --> No gene can be mapped....
## Error in (function (cl, name, valueClass) :
## 'organism' is not a slot in class "NULL"
tc_unas_up_cp <- simple_clusterprofiler(
ko_wt_up, de_table = ko_wt_all, orgdb = unas_db, orgdb_to = "GID",
organism = "tcruzi")## --> No gene can be mapped....
## --> Expected input gene ID: TcCLB.470399.10,TcCLB.510783.11,TcCLB.416735.10,TcCLB.507223.10,TcCLB.507697.10,TcCLB.507153.10
## --> return NULL...
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (67.59% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 22 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some of the pathways the P-values were likely overestimated. For
## such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : For some pathways, in reality P-values are less than 1e-10. You can
## set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## --> No gene can be mapped....
## --> Expected input gene ID: 440157,55157,84735,319089,116028,7069
## --> return NULL...
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## --> Expected input gene ID: 83608,3398,132321,90580,957,57187
## --> No gene can be mapped....
## Error in (function (cl, name, valueClass) :
## 'organism' is not a slot in class "NULL"
length_db <- as.data.frame(rowData(tc_se))
length_db[["gid"]] <- rownames(length_db)
length_db <- length_db[, c("gid", "width")]
tc_up_gs <- simple_goseq(ko_wt_up, go_db = tc_go, length_db = length_db, min_xref = 10)## Found 29 go_db genes and 48 length_db genes out of 48.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
## Warning: `aes_()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`
## ℹ The deprecated feature was likely used in the enrichplot package.
## Please report the issue at
## <https://github.com/GuangchuangYu/enrichplot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • as.Date = as.Date
## • yscale_mapping = yscale_mapping
## • show.legend = FALSE
## ℹ Did you misspell an argument name?
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?