Let us check out some new cruzi infections following the deletion of a specific gene.
I thought I also did the interrogation of the CLBrener transcriptome, but that appears untrue. I think I may have forgotten to copy the genome in place…
I have a pretty new genome downloaded (202509), so I will (for now) just let my annotation function grab whatever it thinks is reasonable. It chose the 202410 set. Seems good to me.
## The biomart annotations file already exists, loading from it.
tc_annot <- load_gff_annotations("~/libraries/genome/gff/tcruzi_all.gff",
type = "mRNA", id_col = "Parent")## Returning a df with 24 columns and 23305 rows.
rownames(tc_annot) <- gsub(x = make.names(tc_annot[["Name"]], unique = TRUE),
pattern = "\\.\\d+$", replacement = "")
esmer_db <- "org.Tcruzi.CL.Brener.Esmeraldo.like.v68.eg.db"
library(esmer_db, character.only = TRUE)## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:hpgltools':
##
## annotation<-, conditions, conditions<-
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff, table, tapply, union, unique, unsplit,
## which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")',
## and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:glue':
##
## trim
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
##
esmer_db <- get0(esmer_db)
all_keytypes <- keytypes(esmer_db)
wanted_idx <- grepl(x = all_keytypes, pattern = "^ANNOT_")
wanted_fields <- all_keytypes[wanted_idx]
nonesmer_db <- "org.Tcruzi.CL.Brener.Non.Esmeraldo.like.v68.eg.db"
unas_db <- "org.Tcruzi.CL.Brener.v68.eg.db"
tc_esmer <- load_orgdb_annotations(esmer_db, keytype = "gid", fields = wanted_fields)## Unable to find CDSNAME, setting it to ANNOT_EXTERNAL_DB_NAME.
## Unable to find CDSCHROM in the db, removing it.
## Unable to find CDSSTRAND in the db, removing it.
## Unable to find CDSSTART in the db, removing it.
## Unable to find CDSEND in the db, removing it.
## Extracted all gene ids.
## Attempting to select: ANNOT_EXTERNAL_DB_NAME, GENE_TYPE, ANNOT_AA_SEQUENCE_ID, ANNOT_ANNOTATED_GO_COMPONENT, ANNOT_ANNOTATED_GO_FUNCTION, ANNOT_ANNOTATED_GO_ID_COMPONENT, ANNOT_ANNOTATED_GO_ID_FUNCTION, ANNOT_ANNOTATED_GO_ID_PROCESS, ANNOT_ANNOTATED_GO_PROCESS, ANNOT_ANTICODON, ANNOT_APOLLO_LINK_OUT, ANNOT_APOLLO_TRANSCRIPT_DESCRIPTION, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_CODING_END, ANNOT_CODING_START, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_END_MAX, ANNOT_EXON_COUNT, ANNOT_EXTERNAL_DB_NAME, ANNOT_EXTERNAL_DB_VERSION, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_CONTEXT_END, ANNOT_GENE_CONTEXT_START, ANNOT_GENE_END_MAX, ANNOT_GENE_END_MAX_TEXT, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ENTREZ_LINK_DISPLAYTEXT, ANNOT_GENE_ENTREZ_LINK_URL, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_START_MIN, ANNOT_GENE_START_MIN_TEXT, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GENOMIC_SEQUENCE_LENGTH, ANNOT_GENUS_SPECIES, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_DEPRECATED, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MAP_LOCATION, ANNOT_MCMC_LOCATION, ANNOT_MOLECULAR_WEIGHT, ANNOT_NCBI_TAX_ID, ANNOT_ORTHOMCL_LINK, ANNOT_OVERVIEW, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PRIMARY_KEY, ANNOT_PROB_MAP, ANNOT_PROB_MCMC, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_PROTEIN_SOURCE_ID, ANNOT_PSEUDO_STRING, ANNOT_SEQUENCE_DATABASE_NAME, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SNPOVERVIEW, ANNOT_SO_ID, ANNOT_SO_TERM_DEFINITION, ANNOT_SO_TERM_NAME, ANNOT_SO_VERSION, ANNOT_START_MIN, ANNOT_STRAND, ANNOT_STRAND_PLUS_MINUS, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_IDS, ANNOT_UNIPROT_LINKS
## 'select()' returned 1:1 mapping between keys and columns
##
## Unable to find CDSNAME, setting it to ANNOT_EXTERNAL_DB_NAME.
## Unable to find CDSCHROM in the db, removing it.
## Unable to find CDSSTRAND in the db, removing it.
## Unable to find CDSSTART in the db, removing it.
## Unable to find CDSEND in the db, removing it.
## Extracted all gene ids.
## Attempting to select: ANNOT_EXTERNAL_DB_NAME, GENE_TYPE, ANNOT_AA_SEQUENCE_ID, ANNOT_ANNOTATED_GO_COMPONENT, ANNOT_ANNOTATED_GO_FUNCTION, ANNOT_ANNOTATED_GO_ID_COMPONENT, ANNOT_ANNOTATED_GO_ID_FUNCTION, ANNOT_ANNOTATED_GO_ID_PROCESS, ANNOT_ANNOTATED_GO_PROCESS, ANNOT_ANTICODON, ANNOT_APOLLO_LINK_OUT, ANNOT_APOLLO_TRANSCRIPT_DESCRIPTION, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_CODING_END, ANNOT_CODING_START, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_END_MAX, ANNOT_EXON_COUNT, ANNOT_EXTERNAL_DB_NAME, ANNOT_EXTERNAL_DB_VERSION, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_CONTEXT_END, ANNOT_GENE_CONTEXT_START, ANNOT_GENE_END_MAX, ANNOT_GENE_END_MAX_TEXT, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ENTREZ_LINK_DISPLAYTEXT, ANNOT_GENE_ENTREZ_LINK_URL, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_START_MIN, ANNOT_GENE_START_MIN_TEXT, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GENOMIC_SEQUENCE_LENGTH, ANNOT_GENUS_SPECIES, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_DEPRECATED, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MAP_LOCATION, ANNOT_MCMC_LOCATION, ANNOT_MOLECULAR_WEIGHT, ANNOT_NCBI_TAX_ID, ANNOT_ORTHOMCL_LINK, ANNOT_OVERVIEW, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PRIMARY_KEY, ANNOT_PROB_MAP, ANNOT_PROB_MCMC, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_PROTEIN_SOURCE_ID, ANNOT_PSEUDO_STRING, ANNOT_SEQUENCE_DATABASE_NAME, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SNPOVERVIEW, ANNOT_SO_ID, ANNOT_SO_TERM_DEFINITION, ANNOT_SO_TERM_NAME, ANNOT_SO_VERSION, ANNOT_START_MIN, ANNOT_STRAND, ANNOT_STRAND_PLUS_MINUS, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_IDS, ANNOT_UNIPROT_LINKS
## 'select()' returned 1:1 mapping between keys and columns
##
## Unable to find CDSNAME, setting it to ANNOT_EXTERNAL_DB_NAME.
## Unable to find CDSCHROM in the db, removing it.
## Unable to find CDSSTRAND in the db, removing it.
## Unable to find CDSSTART in the db, removing it.
## Unable to find CDSEND in the db, removing it.
## Extracted all gene ids.
## Attempting to select: ANNOT_EXTERNAL_DB_NAME, GENE_TYPE, ANNOT_AA_SEQUENCE_ID, ANNOT_ANNOTATED_GO_COMPONENT, ANNOT_ANNOTATED_GO_FUNCTION, ANNOT_ANNOTATED_GO_ID_COMPONENT, ANNOT_ANNOTATED_GO_ID_FUNCTION, ANNOT_ANNOTATED_GO_ID_PROCESS, ANNOT_ANNOTATED_GO_PROCESS, ANNOT_ANTICODON, ANNOT_APOLLO_LINK_OUT, ANNOT_APOLLO_TRANSCRIPT_DESCRIPTION, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_CODING_END, ANNOT_CODING_START, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_END_MAX, ANNOT_EXON_COUNT, ANNOT_EXTERNAL_DB_NAME, ANNOT_EXTERNAL_DB_VERSION, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_CONTEXT_END, ANNOT_GENE_CONTEXT_START, ANNOT_GENE_END_MAX, ANNOT_GENE_END_MAX_TEXT, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ENTREZ_LINK_DISPLAYTEXT, ANNOT_GENE_ENTREZ_LINK_URL, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_START_MIN, ANNOT_GENE_START_MIN_TEXT, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GENOMIC_SEQUENCE_LENGTH, ANNOT_GENUS_SPECIES, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_DEPRECATED, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MAP_LOCATION, ANNOT_MCMC_LOCATION, ANNOT_MOLECULAR_WEIGHT, ANNOT_NCBI_TAX_ID, ANNOT_ORTHOMCL_LINK, ANNOT_OVERVIEW, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PRIMARY_KEY, ANNOT_PROB_MAP, ANNOT_PROB_MCMC, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_PROTEIN_SOURCE_ID, ANNOT_PSEUDO_STRING, ANNOT_SEQUENCE_DATABASE_NAME, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SNPOVERVIEW, ANNOT_SO_ID, ANNOT_SO_TERM_DEFINITION, ANNOT_SO_TERM_NAME, ANNOT_SO_VERSION, ANNOT_START_MIN, ANNOT_STRAND, ANNOT_STRAND_PLUS_MINUS, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_IDS, ANNOT_UNIPROT_LINKS
## 'select()' returned 1:1 mapping between keys and columns
tc_more <- rbind(tc_esmer$genes, tc_nonesmer$genes, tc_unas$genes)
tc_annot <- merge(tc_annot, tc_more, by = "row.names")
rownames(tc_annot) <- tc_annot[["gid"]]
tc_annot[["gid"]] <- NULL
dim(tc_annot)## [1] 23304 135
## This is an orgdb, good.
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## This is an orgdb, good.
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## This is an orgdb, good.
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
I asked for one from Najib/Amalie but unless I am mistaken it has not arrived. That is not a problem, given two helpful things: April provides one, I also named the directories so that the sample IDs are built in; so I will just make a fake one for now and then merge in whatever I get from them…
sample_sheet <- "sample_sheets/all_samples.xlsx"
plot_meta_sankey(as.data.frame(extract_metadata(sample_sheet)),
factors = c("background", "exp_number"))## Did not find the condition column in the sample sheet.
## Filling it in as undefined.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## Checking the state of the condition column.
## Checking the state of the batch column.
## Checking the condition factor.
## Warning: attributes are not identical across measure variables; they will be dropped
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the ggsankey package.
## Please report the issue at <https://github.com/davidsjoberg/ggsankey/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## A sankey plot describing the metadata of 14 samples,
## including 19 out of 0 nodes and traversing metadata factors:
## background, exp_number.
Let us see how well my preprocess gatherer does…
## Did not find the condition column in the sample sheet.
## Filling it in as undefined.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## Checking the state of the condition column.
## Checking the state of the batch column.
## Checking the condition factor.
## Warning in dispatch_regex_search(meta, search, replace, input_file_spec, : NAs introduced by coercion
## Writing new metadata to: sample_sheets/all_samples_modified.xlsx
## Deleting the file sample_sheets/all_samples_modified.xlsx before writing the tables.
## sampleid samplenumber celltype background hpi exp_number amount_in_10ul amount_fact condition batch
## X02_HeLa_control_60h 02_HeLa_control_60h 2 HeLa control t60h e1 183 low undefined undefined
## X04_HeLa_WT_60hpi 04_HeLa_WT_60hpi 4 HeLa wt t60h e1 304 mid undefined undefined
## X06_HeLa_KO7_60hpi 06_HeLa_KO7_60hpi 6 HeLa ko7 t60h e1 298 mid undefined undefined
## X08_HeLa_Cas_60hpi 08_HeLa_Cas_60hpi 8 HeLa cas t60h e1 284 mid undefined undefined
## X18_HeLa_control_60h 18_HeLa_control_60h 18 HeLa control t60h e2 62 low undefined undefined
## X20_HeLa_WT_60hpi 20_HeLa_WT_60hpi 20 HeLa wt t60h e2 228 mid undefined undefined
## sampleid_backup trimomatic_input trimomatic_output trimomatic_percent fastqc_pct_gc kraken_bacterial_classified
## X02_HeLa_control_60h 02_HeLa_control_60h 34421670 31723102 0.922 52 147699
## X04_HeLa_WT_60hpi 04_HeLa_WT_60hpi 33338315 30831462 0.925 50 285754
## X06_HeLa_KO7_60hpi 06_HeLa_KO7_60hpi 36904955 34168992 0.926 50 414463
## X08_HeLa_Cas_60hpi 08_HeLa_Cas_60hpi 34230672 30953413 0.904 50 309973
## X18_HeLa_control_60h 18_HeLa_control_60h 31154298 28104898 0.902 51 147359
## X20_HeLa_WT_60hpi 20_HeLa_WT_60hpi 35726918 32916331 0.921 50 323491
## kraken_bacterial_unclassified kraken_first_bacterial_species kraken_first_bacterial_species_reads
## X02_HeLa_control_60h 418871 Porphyrobacter sp. GA68 34515
## X04_HeLa_WT_60hpi 6263711 Mycoplasmopsis arginini 20649
## X06_HeLa_KO7_60hpi 8109109 Mycoplasmopsis arginini 95574
## X08_HeLa_Cas_60hpi 7277804 Mycoplasmopsis arginini 22086
## X18_HeLa_control_60h 374703 Porphyrobacter sp. GA68 22324
## X20_HeLa_WT_60hpi 8424975 Klebsiella pneumoniae 4599
## kraken_matrix_bacterial
## X02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## X04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## X06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## X08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## X18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## X20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## hisat_genome_input_reads_hg38_115 hisat_genome_input_reads_tcruzi_all hisat_genome_single_concordant_hg38_115
## X02_HeLa_control_60h 31723102 566570 27374698
## X04_HeLa_WT_60hpi 30831462 6549465 21550886
## X06_HeLa_KO7_60hpi NA 8523572 NA
## X08_HeLa_Cas_60hpi 30953413 7587777 20831115
## X18_HeLa_control_60h 28104898 522062 24646849
## X20_HeLa_WT_60hpi 32916331 8748466 21560373
## hisat_genome_single_concordant_tcruzi_all hisat_genome_multi_concordant_hg38_115 hisat_genome_multi_concordant_tcruzi_all
## X02_HeLa_control_60h 533 3781834 148
## X04_HeLa_WT_60hpi 3958505 2731111 1769606
## X06_HeLa_KO7_60hpi 5187623 NA 2326419
## X08_HeLa_Cas_60hpi 4581882 2534521 2096222
## X18_HeLa_control_60h 1779 2935987 580
## X20_HeLa_WT_60hpi 5359267 2607492 2409017
## hisat_genome_single_all_hg38_115 hisat_genome_single_all_tcruzi_all hisat_genome_multi_all_hg38_115
## X02_HeLa_control_60h 393579 31940 147888
## X04_HeLa_WT_60hpi 386791 191224 125185
## X06_HeLa_KO7_60hpi NA 219958 NA
## X08_HeLa_Cas_60hpi 370232 198309 118754
## X18_HeLa_control_60h 371885 44606 118560
## X20_HeLa_WT_60hpi 394781 254343 124747
## hisat_genome_multi_all_tcruzi_all hisat_unmapped_hg38_115 hisat_unmapped_tcruzi_all hisat_genome_percent_log_hg38_115
## X02_HeLa_control_60h 26257 485321 1073519 99.24
## X04_HeLa_WT_60hpi 98192 12501300 1335114 79.73
## X06_HeLa_KO7_60hpi 120720 NA 1655578 NA
## X08_HeLa_Cas_60hpi 105444 14599664 1495987 76.42
## X18_HeLa_control_60h 22098 474391 972594 99.16
## X20_HeLa_WT_60hpi 122425 16893802 1557970 74.34
## hisat_genome_percent_log_tcruzi_all hisat_alignment_hg38_115
## X02_HeLa_control_60h 5.26 preprocessing/02_HeLa_control_60h/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## X04_HeLa_WT_60hpi 89.81 preprocessing/04_HeLa_WT_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## X06_HeLa_KO7_60hpi 90.29 preprocessing/06_HeLa_KO7_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## X08_HeLa_Cas_60hpi 90.14 preprocessing/08_HeLa_Cas_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## X18_HeLa_control_60h 6.85 preprocessing/18_HeLa_control_60h/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## X20_HeLa_WT_60hpi 91.10 preprocessing/20_HeLa_WT_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## hisat_alignment_tcruzi_all salmon_percent_hg38_115
## X02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam 45.40
## X04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam 35.05
## X06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam 33.15
## X08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam 33.47
## X18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam 43.10
## X20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam 34.33
## salmon_percent_tcruzi_all salmon_observed_genes_hg38_115 salmon_observed_genes_tcruzi_all
## X02_HeLa_control_60h NA 47839 0
## X04_HeLa_WT_60hpi NA 46509 0
## X06_HeLa_KO7_60hpi NA 48117 0
## X08_HeLa_Cas_60hpi NA 46351 0
## X18_HeLa_control_60h NA 47978 0
## X20_HeLa_WT_60hpi NA 47985 0
## input_r1 input_r2
## X02_HeLa_control_60h unprocessed/02_HeLa_control_60h_2_S1_R1_001.fastq.gz unprocessed/02_HeLa_control_60h_2_S1_R2_001.fastq.gz
## X04_HeLa_WT_60hpi unprocessed/04_HeLa_WT_60hpi_2_S2_R1_001.fastq.gz unprocessed/04_HeLa_WT_60hpi_2_S2_R2_001.fastq.gz
## X06_HeLa_KO7_60hpi unprocessed/06_HeLa_KO7_60hpi_2_S3_R1_001.fastq.gz unprocessed/06_HeLa_KO7_60hpi_2_S3_R2_001.fastq.gz
## X08_HeLa_Cas_60hpi unprocessed/08_HeLa_Cas_60hpi_2_S4_R1_001.fastq.gz unprocessed/08_HeLa_Cas_60hpi_2_S4_R2_001.fastq.gz
## X18_HeLa_control_60h unprocessed/18_HeLa_control_60h_2_S5_R1_001.fastq.gz unprocessed/18_HeLa_control_60h_2_S5_R2_001.fastq.gz
## X20_HeLa_WT_60hpi unprocessed/20_HeLa_WT_60hpi_2_S6_R1_001.fastq.gz unprocessed/20_HeLa_WT_60hpi_2_S6_R2_001.fastq.gz
## hisat_count_table_hg38_115
## X02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## X04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## X06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## X08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## X18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## X20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## hisat_count_table_tcruzi_all
## X02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## X04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## X06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## X08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## X18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## X20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## salmon_count_table_hg38_115 salmon_count_table_tcruzi_all
## X02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031salmon_hg38_115_CDS/quant.sf NA
## X04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031salmon_hg38_115_CDS/quant.sf NA
## X06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031salmon_hg38_115_CDS/quant.sf NA
## X08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031salmon_hg38_115_CDS/quant.sf NA
## X18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031salmon_hg38_115_CDS/quant.sf NA
## X20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031salmon_hg38_115_CDS/quant.sf NA
color_choices <- list(
"hs" = list(
"AB10" = "#086448",
"cas" = "#702601",
"control" = "#454178",
"ko7" = "#870649",
"positive" = "#46060E",
"wt" = "#785C01"),
"tc" = list(
"AB10" = "#0DA877",
"cas" = "#BA3F01",
"control" = "#7771D1",
"ko7" = "#BF086A",
"positive" = "#8F0C1E",
"wt" = "#AF8401"))These colors are bad, the human are too dark and lose their contrast with respect to each other. I should get Najib/April/Amalie to help define better.
hs_se <- create_se(new_meta[["new_meta"]], gene_info = hs_annot[["gene_annotations"]],
file_column = "hisat_count_table_hg38_115") %>%
set_conditions(fact = "background") %>%
set_batches(fact = "exp_number") %>%
set_colors(color_choices[["hs"]])## Reading the sample metadata.
## Checking the state of the condition column.
## Checking the state of the batch column.
## Checking the condition factor.
## The sample definitions comprises: 14 rows(samples) and 44 columns(metadata fields).
## Matched 21562 annotations and counts.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the summarized experiment to 'se.rda'.
## The final summarized experiment has 21571 rows and 44 columns.
## The numbers of samples by condition are:
##
## AB10 cas control ko7 positive wt
## 3 1 3 3 1 3
## The number of samples by batch are:
##
## e1 e2 e3 undef
## 4 4 5 1
tc_se <- create_se(new_meta[["new_meta"]], gene_info = tc_annot,
file_column = "hisat_count_table_tcruzi_all") %>%
set_conditions(fact = "background") %>%
set_batches(fact = "exp_number") %>%
set_colors(color_choices[["tc"]])## Reading the sample metadata.
## Checking the state of the condition column.
## Checking the state of the batch column.
## Checking the condition factor.
## The sample definitions comprises: 14 rows(samples) and 44 columns(metadata fields).
## Matched 23304 annotations and counts.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the summarized experiment to 'se.rda'.
## The final summarized experiment has 25100 rows and 44 columns.
## The numbers of samples by condition are:
##
## AB10 cas control ko7 positive wt
## 3 1 3 3 1 3
## The number of samples by batch are:
##
## e1 e2 e3 undef
## 4 4 5 1
I picked off another TODO in here, I changed plot_nonzero to more intelligently set the text annotation.
## The following samples have less than 16315 genes.
## [1] "02_HL_c_60" "18_HL_c_60" "34_HL_c_60" "pos_ctrl"
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the hpgltools package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## A non-zero genes plot of 14 samples.
## These samples have an average 1.475 CPM coverage and 15473 genes observed, ranging from 36 to
## 22625.
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## The samples (and read coverage) removed when filtering 5000 non-zero genes are:
## 02_HeLa_control_60h 18_HeLa_control_60h 34_HeLa_control_60h pos_ctrl
## 122 663 107 48
## 02_HeLa_control_60h 18_HeLa_control_60h 34_HeLa_control_60h pos_ctrl
## 81 523 91 36
## Samples removed: 81, 523, 91, 36
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## A non-zero genes plot of 10 samples.
## These samples have an average 2.064 CPM coverage and 21590 genes observed, ranging from 20421 to
## 22625.
## Library sizes of 14 samples,
## ranging from 14,183,768 to 29,076,224.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## A non-zero genes plot of 14 samples.
## These samples have an average 20.55 CPM coverage and 15472 genes observed, ranging from 15027 to
## 17380.
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## 85373 entries are 0. We are on a log scale, adding 1 to the data.
## Plot describing the gene distribution from a dataset.
## Library sizes of 10 samples,
## ranging from 791,179 to 3,382,177.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## A non-zero genes plot of 10 samples.
## These samples have an average 2.064 CPM coverage and 21590 genes observed, ranging from 20421 to
## 22625.
## 35096 entries are 0. We are on a log scale, adding 1 to the data.
## Plot describing the gene distribution from a dataset.
## Removing samples with less than 3 replicates.
## Removed: 08_HeLa_Cas_60hpi, pos_ctrl.
tc_replicated <- subset_se(tc_se, min_replicates = 3, fact = "condition") %>%
subset_se(nonzero = 10000)## Removing samples with less than 3 replicates.
## Removed: 08_HeLa_Cas_60hpi.
## No samples have fewer than 10000 observed genes.
Here are a couple of variance partition invocations. Once we have other metadata, this will be more useful. Note, this will only work once the non-replicated conditions are removed (control and cas).
## The model of ~ condition + batch has 6 levels and rank 6
##
## The model of ~ condition + batch has 5 levels and rank 5
## Warning in .fitExtractVarPartModel(exprObj, formula, data, REML = REML, : Model failed for 11 responses.
## See errors with attr(., 'errors')
##
I think we probably should not be surprised at the amount of variance attributed to the batch due to the very large difference in coverage between experiment #3 and 1/2.
Perform our default PCA plot along with a combat version.
hs_norm <- normalize(hs_replicated, transform = "log2", convert = "cpm",
norm = "quant", filter = TRUE)## Removing 9809 low-count genes (11762 remaining).
## Setting 1672 entries to zero.
## A heatmap of pairwise sample distances ranging from:
## 16.2110615399822 to 91.122185751637.
## A heatmap of pairwise sample correlations ranging from:
## 0.916078357207824 to 0.997343768762044.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, control, ko7, wt
## Shapes are defined by e1, e2, e3.
hs_nb <- normalize(hs_replicated, transform = "log2", convert = "cpm",
filter = TRUE, batch = "svaseq")## Removing 9809 low-count genes (11762 remaining).
## transform_counts: Found 25 values less than 0.
## transform_counts: Found 25 values equal to 0, adding 1 to the matrix.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, control, ko7, wt
## Shapes are defined by e1, e2, e3.
hs_cb <- normalize(hs_replicated, transform = "log2", convert = "cpm",
filter = TRUE, batch = "combat")## Removing 9809 low-count genes (11762 remaining).
## transform_counts: Found 84 values less than 0.
## transform_counts: Found 89 values equal to 0, adding 1 to the matrix.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, control, ko7, wt
## Shapes are defined by e1, e2, e3.
## Removing 4102 low-count genes (20998 remaining).
## transform_counts: Found 52 values equal to 0, adding 1 to the matrix.
## A heatmap of pairwise sample distances ranging from:
## 62.9109619364137 to 117.665159976867.
## A heatmap of pairwise sample correlations ranging from:
## 0.905660328467753 to 0.973093779177062.
high_genes <- rownames(norm_boxplot[["high_outlier_df"]])
fData(tc_se)[high_genes, ][["annot_transcript_product"]]## [1] "alpha tubulin, putative" "undefined"
## [3] "undefined" "undefined"
## [5] "undefined" "undefined"
## [7] "undefined" "Voltage-dependent calcium channel subunit, putative"
## [9] "enolase" "zinc finger CCCH domain containing protein 11"
## [11] "60S ribosomal protein L6, putative" "clathrin heavy chain, putative"
## [13] "ribosomal protein l35a, putative" "conserved protein"
## [15] "cyclophilin a, putative" "hypothetical protein, conserved"
## [17] "ABC transporter, putative" "trans-sialidase, Group II, putative"
## [19] "ABC transporter, putative" "zinc finger CCCH domain containing protein 11"
## [21] "polyadenylate-binding protein, putative" "metallo-peptidase, Clan MF, Family M17"
## [23] "ubiquitin-protein ligase-like, putative" "60S ribosomal protein L19, putative"
## [25] "60S ribosomal protein L23a, putative" "RNA-binding protein 42 (RNA-binding motif protein 42), putative"
## [27] "CCR4-NOT transcription complex subunit 1" "dynein heavy chain, putative"
## [29] "ABC transporter, putative" "mitochondrial RNA binding complex 1 subunit, putative"
## [31] "60S ribosomal protein L10a, putative" "hypothetical protein"
## [33] "cell division cycle protein 20, putative"
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, cas, ko7, wt
## Shapes are defined by e1, e2, e3.
tc_rnorm <- normalize(tc_replicated, transform = "log2", convert = "cpm",
norm = "quant", filter = TRUE)## Removing 4164 low-count genes (20936 remaining).
## transform_counts: Found 47 values equal to 0, adding 1 to the matrix.
## A heatmap of pairwise sample distances ranging from:
## 63.0136687923128 to 117.863511218509.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, ko7, wt
## Shapes are defined by e1, e2, e3.
tc_rbnorm <- normalize(tc_replicated, transform = "log2", convert = "cpm",
filter = TRUE, batch = "svaseq")## Removing 4164 low-count genes (20936 remaining).
## transform_counts: Found 1252 values less than 0.
## transform_counts: Found 1252 values equal to 0, adding 1 to the matrix.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, ko7, wt
## Shapes are defined by e1, e2, e3.
I am not thinking we will see many genes of interest.
hs_keepers <- list(
"ab_vs_control" = c("AB10", "control"),
"ko_vs_control" = c("ko7", "control"),
"ko_vs_wt" = c("ko7", "wt"),
"ab_vs_wt" = c("AB10", "wt"),
"ab_vs_ko" = c("AB10", "ko7"))
hs_de <- all_pairwise(hs_replicated, filter = TRUE, model_fstring = "~ 0 + condition",
model_svs = "svaseq")## AB10 control ko7 wt
## 3 3 3 3
## Removing 9809 low-count genes (11762 remaining).
## Basic step 0/3: Normalizing data.
## Basic step 0/3: Converting data.
## I think this is failing? SummarizedExperiment
## Basic step 0/3: Transforming data.
## Setting 1603 entries to zero.
## This received a matrix of SVs.
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## conditions
## AB10 control ko7 wt
## 3 3 3 3
## conditions
## AB10 control ko7 wt
## 3 3 3 3
## conditions
## AB10 control ko7 wt
## 3 3 3 3
## A pairwise differential expression with results from: basic, deseq, ebseq, edger, limma, noiseq.
## This used a surrogate/batch estimate from: svaseq.
## The primary analysis performed 6 comparisons.
## Deleting the file excel/hs_tables.xlsx before writing the tables.
## Looking for subscript invalid names, start of extract_keepers.
## Looking for subscript invalid names, end of extract_keepers.
## A set of combined differential expression results.
## table deseq_sigup deseq_sigdown edger_sigup edger_sigdown limma_sigup limma_sigdown
## 1 control_vs_AB10-inverted 16 29 25 43 2 1
## 2 ko7_vs_control 82 20 104 28 4 0
## 3 wt_vs_ko7-inverted 0 3 0 4 0 0
## 4 wt_vs_AB10-inverted 0 1 0 1 0 0
## 5 ko7_vs_AB10-inverted 0 9 0 16 0 0
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the UpSetR package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the UpSetR package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Plot describing unique/shared genes in a differential expression table.
## Deleting the file excel/hs_sig.xlsx before writing the tables.
## A set of genes deemed significant according to limma, edger, deseq, ebseq, basic.
## The parameters defining significant were:
## LFC cutoff: 1 adj P cutoff: 0.05
## limma_up limma_down edger_up edger_down deseq_up deseq_down ebseq_up ebseq_down basic_up basic_down
## ab_vs_control 2 1 25 43 16 29 11 0 0 0
## ko_vs_control 4 0 104 28 82 20 25 1 0 0
## ko_vs_wt 0 0 0 4 0 3 0 1 0 0
## ab_vs_wt 0 0 0 1 0 1 0 0 0 0
## ab_vs_ko 0 0 0 16 0 9 0 0 0 0
While it is true there are not a tremendous number of genes, at least some of the groups are interesting.
## BP CC CORUM HP HPA KEGG MIRNA MF REAC TF WP
## ab_vs_control_up 0 5 0 0 0 0 1 1 1 0 0
## ab_vs_control_down 4 1 0 0 0 0 0 4 0 0 0
## ko_vs_control_up 64 2 0 0 0 2 1 15 5 21 4
## ko_vs_control_down 12 5 1 0 0 1 1 8 2 0 0
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • as.Date = as.Date
## • yscale_mapping = yscale_mapping
## • show.legend = FALSE
## ℹ Did you misspell an argument name?
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## $bar
##
## $cnet
##
## $dot
##
## $go
## Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider increasing max.overlaps
##
## $heat
##
## $map
##
## $ss
## NULL
##
## $termsim
## #
## # over-representation test
## #
## #...@organism UNKNOWN
## #...@ontology BP
## #...@gene chr [1:82] "ENSG00000049249" "ENSG00000115008" "ENSG00000128965" "ENSG00000139269" "ENSG00000170909" "ENSG00000130487" "ENSG00000106823" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...49 enriched terms found
## 'data.frame': 49 obs. of 9 variables:
## $ ID : chr "GO:0034103" "GO:0045071" "GO:0006952" "GO:0043207" ...
## $ Description: chr "regulation of tissue remodeling" "negative regulation of viral genome replication" "defense response" "response to external biotic stimulus" ...
## $ GeneRatio : chr "6/76" "6/57" "23/1853" "21/1595" ...
## $ BgRatio : chr "76/59" "57/80" "1853/80" "1595/80" ...
## $ pvalue : num 0.000126 0.000139 0.000417 0.000616 0.00063 ...
## $ p.adjust : num 0.000126 0.000139 0.000417 0.000616 0.00063 ...
## $ qvalue : num 0.000126 0.000139 0.000417 0.000616 0.00063 ...
## $ geneID : chr "ENSG00000170909/ENSG00000136244/ENSG00000136235/ENSG00000162733/ENSG00000154175/ENSG00000118503" "ENSG00000271503/ENSG00000157601/ENSG00000115267/ENSG00000172183/ENSG00000135114/ENSG00000187608" "ENSG00000115008/ENSG00000271503/ENSG00000124882/ENSG00000073756/ENSG00000157601/ENSG00000176046/ENSG00000136244"| __truncated__ "ENSG00000115008/ENSG00000271503/ENSG00000124882/ENSG00000073756/ENSG00000157601/ENSG00000136244/ENSG00000100342"| __truncated__ ...
## $ Count : int 6 6 23 21 2 2 2 2 16 18 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
##
##
## $tree
##
## $up
##
## $vol
## NULL
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
## Arguments in `...` must be used.
## ✖ Problematic arguments:
## • as.Date = as.Date
## • yscale_mapping = yscale_mapping
## • show.legend = FALSE
## ℹ Did you misspell an argument name?
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## $bar
##
## $cnet
##
## $dot
##
## $go
## Warning: ggrepel: 98 unlabeled data points (too many overlaps). Consider increasing max.overlaps
##
## $heat
##
## $map
##
## $ss
## NULL
##
## $termsim
## #
## # over-representation test
## #
## #...@organism UNKNOWN
## #...@ontology BP
## #...@gene chr [1:20] "ENSG00000174600" "ENSG00000187689" "ENSG00000160201" "ENSG00000118194" "ENSG00000186340" "ENSG00000115457" "ENSG00000150594" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...11 enriched terms found
## 'data.frame': 11 obs. of 9 variables:
## $ ID : chr "GO:0043567" "GO:0010286" "GO:0070370" "GO:0070434" ...
## $ Description: chr "regulation of insulin-like growth factor receptor signaling pathway" "heat acclimation" "cellular heat acclimation" "positive regulation of nucleotide-binding oligomerization domain containing 2 signaling pathway" ...
## $ GeneRatio : chr "3/23" "2/5" "2/5" "2/6" ...
## $ BgRatio : chr "23/17" "5/14" "5/14" "6/14" ...
## $ pvalue : num 0.0011 0.00587 0.00587 0.00881 0.01233 ...
## $ p.adjust : num 0.0011 0.00587 0.00587 0.00881 0.01233 ...
## $ qvalue : num 0.0011 0.00587 0.00587 0.00881 0.01233 ...
## $ geneID : chr "ENSG00000115457/ENSG00000115461/ENSG00000017427" "ENSG00000204388/ENSG00000204389" "ENSG00000204388/ENSG00000204389" "ENSG00000204388/ENSG00000204389" ...
## $ Count : int 3 2 2 2 2 3 2 2 2 4 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
##
##
## $tree
##
## $up
##
## $vol
## NULL
I think it is noteworthy that there is a difference in T cell proliferation.
## Error: unable to find an inherited method for function 'conditions' for signature 'object = "SummarizedExperiment"'
tc_keepers <- list(
"ab_vs_wt" = c("AB10", "wt"),
"ko_vs_wt" = c("ko7", "wt"),
"ab_vs_ko" = c("AB10", "ko7"))
tc_de <- all_pairwise(tc_replicated, filter = TRUE, model_fstring = "~ 0 + condition",
model_svs = "svaseq")## AB10 ko7 wt
## 3 3 3
## Removing 4164 low-count genes (20936 remaining).
## Basic step 0/3: Normalizing data.
## Basic step 0/3: Converting data.
## I think this is failing? SummarizedExperiment
## Basic step 0/3: Transforming data.
## Setting 6919 entries to zero.
## This received a matrix of SVs.
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## conditions
## AB10 ko7 wt
## 3 3 3
## conditions
## AB10 ko7 wt
## 3 3 3
## conditions
## AB10 ko7 wt
## 3 3 3
## A pairwise differential expression with results from: basic, deseq, ebseq, edger, limma, noiseq.
## This used a surrogate/batch estimate from: svaseq.
## The primary analysis performed 3 comparisons.
## Deleting the file excel/tc_tables.xlsx before writing the tables.
## Looking for subscript invalid names, start of extract_keepers.
## Looking for subscript invalid names, end of extract_keepers.
## A set of combined differential expression results.
## table deseq_sigup deseq_sigdown edger_sigup edger_sigdown limma_sigup limma_sigdown
## 1 wt_vs_AB10-inverted 39 349 101 603 92 300
## 2 wt_vs_ko7-inverted 48 44 115 147 83 60
## 3 ko7_vs_AB10-inverted 11 259 44 428 18 220
## Plot describing unique/shared genes in a differential expression table.
## Deleting the file excel/tc_sig.xlsx before writing the tables.
## A set of genes deemed significant according to limma, edger, deseq, ebseq, basic.
## The parameters defining significant were:
## LFC cutoff: 1 adj P cutoff: 0.05
## limma_up limma_down edger_up edger_down deseq_up deseq_down ebseq_up ebseq_down basic_up basic_down
## ab_vs_wt 92 300 101 603 39 349 51 277 0 0
## ko_vs_wt 83 60 115 147 48 44 114 39 0 0
## ab_vs_ko 18 220 44 428 11 259 8 222 0 0
We cannot use gProfiler2 with the parasite because it is not a reference species; but other ontology methods are not constrained thus. In the case of clusterProfiler, there is another constraint, I do not have a single orgDB object which comprises Esmer/NonEsmer/Unassigned; as a result I must attempt the ontology search on the haplotypes separately.
ko_wt_up <- tc_sig[["deseq"]][["ups"]][["ko_vs_wt"]]
ko_wt_down <- tc_sig[["deseq"]][["downs"]][["ko_vs_wt"]]
ko_wt_all <- tc_tables[["data"]][["ko_vs_wt"]]
tc_esmer_up_cp <- simple_clusterprofiler(
ko_wt_up, de_table = ko_wt_all, orgdb = esmer_db, orgdb_to = "GID",
organism = "tcruzi", excel = "excel/ko_wt_up_cp_esmer.xlsx")## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (31.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : There were 36 pathways for which P-values were not
## calculated properly due to unbalanced (positive and negative) gene-level statistic values. For such pathways pval, padj, NES, log2err are set
## to NA. You can try to increase the value of the argument nPermSimple (for example set it nPermSimple = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some of the pathways the P-values were likely
## overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some pathways, in reality P-values are less than
## 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## Deleting the file excel/ko_wt_up_cp_esmer.xlsx before writing the tables.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
tc_nonesmer_up_cp <- simple_clusterprofiler(
ko_wt_up, de_table = ko_wt_all, orgdb = nonesmer_db, orgdb_to = "GID",
organism = "tcruzi", excel = "excel/ko_wt_up_cp_nonesmer.xlsx")## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (30.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : There were 49 pathways for which P-values were not
## calculated properly due to unbalanced (positive and negative) gene-level statistic values. For such pathways pval, padj, NES, log2err are set
## to NA. You can try to increase the value of the argument nPermSimple (for example set it nPermSimple = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some of the pathways the P-values were likely
## overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some pathways, in reality P-values are less than
## 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
tc_unas_up_cp <- simple_clusterprofiler(
ko_wt_up, de_table = ko_wt_all, orgdb = unas_db, orgdb_to = "GID",
organism = "tcruzi")## --> No gene can be mapped....
## --> Expected input gene ID: TcCLB.507223.10,TcCLB.508331.30,TcCLB.506189.10,TcCLB.508491.9,TcCLB.508331.50,TcCLB.510783.11
## --> return NULL...
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (67.59% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : There were 22 pathways for which P-values were not
## calculated properly due to unbalanced (positive and negative) gene-level statistic values. For such pathways pval, padj, NES, log2err are set
## to NA. You can try to increase the value of the argument nPermSimple (for example set it nPermSimple = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some of the pathways the P-values were likely
## overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some pathways, in reality P-values are less than
## 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
tc_esmer_up_cp
tc_esmer_down_cp <- simple_clusterprofiler(
ko_wt_down, de_table = ko_wt_all, orgdb = esmer_db, orgdb_to = "GID",
organism = "tcruzi", excel = "excel/ko_wt_down_cp_esmer.xlsx")## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (31.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : There were 26 pathways for which P-values were not
## calculated properly due to unbalanced (positive and negative) gene-level statistic values. For such pathways pval, padj, NES, log2err are set
## to NA. You can try to increase the value of the argument nPermSimple (for example set it nPermSimple = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some of the pathways the P-values were likely
## overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some pathways, in reality P-values are less than
## 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 671.90, TcCLB.50 ... TcCLB.511921.140 is truncated.
## Number of characters exeed the limit of 32767.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
tc_nonesmer_down_cp <- simple_clusterprofiler(
ko_wt_down, de_table = ko_wt_all, orgdb = nonesmer_db, orgdb_to = "GID",
organism = "tcruzi", excel = "excel/ko_wt_down_cp_nonesmer.xlsx")## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
##
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (30.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : There were 56 pathways for which P-values were not
## calculated properly due to unbalanced (positive and negative) gene-level statistic values. For such pathways pval, padj, NES, log2err are set
## to NA. You can try to increase the value of the argument nPermSimple (for example set it nPermSimple = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some of the pathways the P-values were likely
## overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some pathways, in reality P-values are less than
## 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : CLB.506965.20, T ... TcCLB.511921.140 is truncated.
## Number of characters exeed the limit of 32767.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
tc_unas_down_cp <- simple_clusterprofiler(
ko_wt_down, de_table = ko_wt_all, orgdb = unas_db, orgdb_to = "GID",
organism = "tcruzi")## --> No gene can be mapped....
## --> Expected input gene ID: TcCLB.508331.50,TcCLB.509961.90,TcCLB.509367.51,TcCLB.457101.20,TcCLB.507153.10,TcCLB.510783.11
## --> return NULL...
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
##
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (67.59% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : There were 25 pathways for which P-values were not
## calculated properly due to unbalanced (positive and negative) gene-level statistic values. For such pathways pval, padj, NES, log2err are set
## to NA. You can try to increase the value of the argument nPermSimple (for example set it nPermSimple = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some of the pathways the P-values were likely
## overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some pathways, in reality P-values are less than
## 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
tc_esmer_down_cp
length_db <- as.data.frame(rowData(tc_se))
length_db[["gid"]] <- rownames(length_db)
length_db <- length_db[, c("gid", "width")]
tc_up_gs <- simple_goseq(ko_wt_up, go_db = tc_go, length_db = length_db, min_xref = 10)## Found 29 go_db genes and 48 length_db genes out of 48.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • as.Date = as.Date
## • yscale_mapping = yscale_mapping
## • show.legend = FALSE
## ℹ Did you misspell an argument name?
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?