Let us check out some new cruzi infections following the deletion of a specific gene.
I thought I also did the interrogation of the CLBrener transcriptome, but that appears untrue. I think I may have forgotten to copy the genome in place…
We received an email flagging the following genes as CRISPR/Cas9 targets for the knockouts. I therefore would like to have screenshots of each of these regions to show what differences are observable between the three strains. Note that the lower coverage of the last few samples may mean that we need to stick to the first group.
expected_lower <- c("TcCLB.508173.120", "TcCLB.509495.30", "TcCLB.510055.20", "TcCLB.506961.25",
"TcCLB.510787.10", "TcCLB.511667.30", "TcCLB.507085.30", "TcCLB.507427.10",
"TcCLB.508913.25", "TcCLB.508857.30", "TcCLB.503993.10", "TcCLB.511323.10",
"TcCLB.508089.10", "TcCLB.508717.60", "TcCLB.506975.80", "TcCLB.505931.30",
"TcCLB.507979.30", "TcCLB.509817.50", "TcCLB.506841.20")I have a pretty new genome downloaded (202509), so I will (for now) just let my annotation function grab whatever it thinks is reasonable. It chose the 202410 set. Seems good to me.
## The biomart annotations file already exists, loading from it.
tc_annot <- load_gff_annotations("~/libraries/genome/gff/tcruzi_all.gff",
type = "mRNA", id_col = "Parent")## Returning a df with 24 columns and 23305 rows.
rownames(tc_annot) <- gsub(x = make.names(tc_annot[["Name"]], unique = TRUE),
pattern = "\\.\\d+$", replacement = "")
esmer_db <- "org.Tcruzi.CL.Brener.Esmeraldo.like.v68.eg.db"
library(esmer_db, character.only = TRUE)## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:hpgltools':
##
## annotation<-, conditions, conditions<-
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind, colnames, dirname, do.call,
## duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply,
## Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, saveRDS, setdiff, table, tapply, union, unique, unsplit,
## which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with 'browseVignettes()'. To cite
## Bioconductor, see 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:glue':
##
## trim
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
##
esmer_db <- get0(esmer_db)
all_keytypes <- keytypes(esmer_db)
wanted_idx <- grepl(x = all_keytypes, pattern = "^ANNOT_")
wanted_fields <- all_keytypes[wanted_idx]
nonesmer_db <- "org.Tcruzi.CL.Brener.Non.Esmeraldo.like.v68.eg.db"
unas_db <- "org.Tcruzi.CL.Brener.v68.eg.db"
tc_esmer <- load_orgdb_annotations(esmer_db, keytype = "gid", fields = wanted_fields)## Unable to find CDSNAME, setting it to ANNOT_EXTERNAL_DB_NAME.
## Unable to find CDSCHROM in the db, removing it.
## Unable to find CDSSTRAND in the db, removing it.
## Unable to find CDSSTART in the db, removing it.
## Unable to find CDSEND in the db, removing it.
## Extracted all gene ids.
## Attempting to select: ANNOT_EXTERNAL_DB_NAME, GENE_TYPE, ANNOT_AA_SEQUENCE_ID, ANNOT_ANNOTATED_GO_COMPONENT, ANNOT_ANNOTATED_GO_FUNCTION, ANNOT_ANNOTATED_GO_ID_COMPONENT, ANNOT_ANNOTATED_GO_ID_FUNCTION, ANNOT_ANNOTATED_GO_ID_PROCESS, ANNOT_ANNOTATED_GO_PROCESS, ANNOT_ANTICODON, ANNOT_APOLLO_LINK_OUT, ANNOT_APOLLO_TRANSCRIPT_DESCRIPTION, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_CODING_END, ANNOT_CODING_START, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_END_MAX, ANNOT_EXON_COUNT, ANNOT_EXTERNAL_DB_NAME, ANNOT_EXTERNAL_DB_VERSION, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_CONTEXT_END, ANNOT_GENE_CONTEXT_START, ANNOT_GENE_END_MAX, ANNOT_GENE_END_MAX_TEXT, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ENTREZ_LINK_DISPLAYTEXT, ANNOT_GENE_ENTREZ_LINK_URL, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_START_MIN, ANNOT_GENE_START_MIN_TEXT, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GENOMIC_SEQUENCE_LENGTH, ANNOT_GENUS_SPECIES, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_DEPRECATED, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MAP_LOCATION, ANNOT_MCMC_LOCATION, ANNOT_MOLECULAR_WEIGHT, ANNOT_NCBI_TAX_ID, ANNOT_ORTHOMCL_LINK, ANNOT_OVERVIEW, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PRIMARY_KEY, ANNOT_PROB_MAP, ANNOT_PROB_MCMC, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_PROTEIN_SOURCE_ID, ANNOT_PSEUDO_STRING, ANNOT_SEQUENCE_DATABASE_NAME, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SNPOVERVIEW, ANNOT_SO_ID, ANNOT_SO_TERM_DEFINITION, ANNOT_SO_TERM_NAME, ANNOT_SO_VERSION, ANNOT_START_MIN, ANNOT_STRAND, ANNOT_STRAND_PLUS_MINUS, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_IDS, ANNOT_UNIPROT_LINKS
## 'select()' returned 1:1 mapping between keys and columns
##
## Unable to find CDSNAME, setting it to ANNOT_EXTERNAL_DB_NAME.
## Unable to find CDSCHROM in the db, removing it.
## Unable to find CDSSTRAND in the db, removing it.
## Unable to find CDSSTART in the db, removing it.
## Unable to find CDSEND in the db, removing it.
## Extracted all gene ids.
## Attempting to select: ANNOT_EXTERNAL_DB_NAME, GENE_TYPE, ANNOT_AA_SEQUENCE_ID, ANNOT_ANNOTATED_GO_COMPONENT, ANNOT_ANNOTATED_GO_FUNCTION, ANNOT_ANNOTATED_GO_ID_COMPONENT, ANNOT_ANNOTATED_GO_ID_FUNCTION, ANNOT_ANNOTATED_GO_ID_PROCESS, ANNOT_ANNOTATED_GO_PROCESS, ANNOT_ANTICODON, ANNOT_APOLLO_LINK_OUT, ANNOT_APOLLO_TRANSCRIPT_DESCRIPTION, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_CODING_END, ANNOT_CODING_START, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_END_MAX, ANNOT_EXON_COUNT, ANNOT_EXTERNAL_DB_NAME, ANNOT_EXTERNAL_DB_VERSION, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_CONTEXT_END, ANNOT_GENE_CONTEXT_START, ANNOT_GENE_END_MAX, ANNOT_GENE_END_MAX_TEXT, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ENTREZ_LINK_DISPLAYTEXT, ANNOT_GENE_ENTREZ_LINK_URL, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_START_MIN, ANNOT_GENE_START_MIN_TEXT, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GENOMIC_SEQUENCE_LENGTH, ANNOT_GENUS_SPECIES, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_DEPRECATED, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MAP_LOCATION, ANNOT_MCMC_LOCATION, ANNOT_MOLECULAR_WEIGHT, ANNOT_NCBI_TAX_ID, ANNOT_ORTHOMCL_LINK, ANNOT_OVERVIEW, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PRIMARY_KEY, ANNOT_PROB_MAP, ANNOT_PROB_MCMC, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_PROTEIN_SOURCE_ID, ANNOT_PSEUDO_STRING, ANNOT_SEQUENCE_DATABASE_NAME, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SNPOVERVIEW, ANNOT_SO_ID, ANNOT_SO_TERM_DEFINITION, ANNOT_SO_TERM_NAME, ANNOT_SO_VERSION, ANNOT_START_MIN, ANNOT_STRAND, ANNOT_STRAND_PLUS_MINUS, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_IDS, ANNOT_UNIPROT_LINKS
## 'select()' returned 1:1 mapping between keys and columns
##
## Unable to find CDSNAME, setting it to ANNOT_EXTERNAL_DB_NAME.
## Unable to find CDSCHROM in the db, removing it.
## Unable to find CDSSTRAND in the db, removing it.
## Unable to find CDSSTART in the db, removing it.
## Unable to find CDSEND in the db, removing it.
## Extracted all gene ids.
## Attempting to select: ANNOT_EXTERNAL_DB_NAME, GENE_TYPE, ANNOT_AA_SEQUENCE_ID, ANNOT_ANNOTATED_GO_COMPONENT, ANNOT_ANNOTATED_GO_FUNCTION, ANNOT_ANNOTATED_GO_ID_COMPONENT, ANNOT_ANNOTATED_GO_ID_FUNCTION, ANNOT_ANNOTATED_GO_ID_PROCESS, ANNOT_ANNOTATED_GO_PROCESS, ANNOT_ANTICODON, ANNOT_APOLLO_LINK_OUT, ANNOT_APOLLO_TRANSCRIPT_DESCRIPTION, ANNOT_CDS, ANNOT_CDS_LENGTH, ANNOT_CHROMOSOME, ANNOT_CODING_END, ANNOT_CODING_START, ANNOT_EC_NUMBERS, ANNOT_EC_NUMBERS_DERIVED, ANNOT_END_MAX, ANNOT_EXON_COUNT, ANNOT_EXTERNAL_DB_NAME, ANNOT_EXTERNAL_DB_VERSION, ANNOT_FIVE_PRIME_UTR_LENGTH, ANNOT_GENE_CONTEXT_END, ANNOT_GENE_CONTEXT_START, ANNOT_GENE_END_MAX, ANNOT_GENE_END_MAX_TEXT, ANNOT_GENE_ENTREZ_ID, ANNOT_GENE_ENTREZ_LINK_DISPLAYTEXT, ANNOT_GENE_ENTREZ_LINK_URL, ANNOT_GENE_EXON_COUNT, ANNOT_GENE_HTS_NONCODING_SNPS, ANNOT_GENE_HTS_NONSYN_SYN_RATIO, ANNOT_GENE_HTS_NONSYNONYMOUS_SNPS, ANNOT_GENE_HTS_STOP_CODON_SNPS, ANNOT_GENE_HTS_SYNONYMOUS_SNPS, ANNOT_GENE_LOCATION_TEXT, ANNOT_GENE_NAME, ANNOT_GENE_ORTHOLOG_NUMBER, ANNOT_GENE_ORTHOMCL_NAME, ANNOT_GENE_PARALOG_NUMBER, ANNOT_GENE_PREVIOUS_IDS, ANNOT_GENE_PRODUCT, ANNOT_GENE_START_MIN, ANNOT_GENE_START_MIN_TEXT, ANNOT_GENE_TOTAL_HTS_SNPS, ANNOT_GENE_TRANSCRIPT_COUNT, ANNOT_GENE_TYPE, ANNOT_GENOMIC_SEQUENCE_LENGTH, ANNOT_GENUS_SPECIES, ANNOT_HAS_MISSING_TRANSCRIPTS, ANNOT_INTERPRO_DESCRIPTION, ANNOT_INTERPRO_ID, ANNOT_IS_DEPRECATED, ANNOT_IS_PSEUDO, ANNOT_ISOELECTRIC_POINT, ANNOT_LOCATION_TEXT, ANNOT_MAP_LOCATION, ANNOT_MCMC_LOCATION, ANNOT_MOLECULAR_WEIGHT, ANNOT_NCBI_TAX_ID, ANNOT_ORTHOMCL_LINK, ANNOT_OVERVIEW, ANNOT_PFAM_DESCRIPTION, ANNOT_PFAM_ID, ANNOT_PIRSF_DESCRIPTION, ANNOT_PIRSF_ID, ANNOT_PREDICTED_GO_COMPONENT, ANNOT_PREDICTED_GO_FUNCTION, ANNOT_PREDICTED_GO_ID_COMPONENT, ANNOT_PREDICTED_GO_ID_FUNCTION, ANNOT_PREDICTED_GO_ID_PROCESS, ANNOT_PREDICTED_GO_PROCESS, ANNOT_PRIMARY_KEY, ANNOT_PROB_MAP, ANNOT_PROB_MCMC, ANNOT_PROSITEPROFILES_DESCRIPTION, ANNOT_PROSITEPROFILES_ID, ANNOT_PROTEIN_LENGTH, ANNOT_PROTEIN_SEQUENCE, ANNOT_PROTEIN_SOURCE_ID, ANNOT_PSEUDO_STRING, ANNOT_SEQUENCE_DATABASE_NAME, ANNOT_SEQUENCE_ID, ANNOT_SIGNALP_PEPTIDE, ANNOT_SMART_DESCRIPTION, ANNOT_SMART_ID, ANNOT_SNPOVERVIEW, ANNOT_SO_ID, ANNOT_SO_TERM_DEFINITION, ANNOT_SO_TERM_NAME, ANNOT_SO_VERSION, ANNOT_START_MIN, ANNOT_STRAND, ANNOT_STRAND_PLUS_MINUS, ANNOT_SUPERFAMILY_DESCRIPTION, ANNOT_SUPERFAMILY_ID, ANNOT_THREE_PRIME_UTR_LENGTH, ANNOT_TIGRFAM_DESCRIPTION, ANNOT_TIGRFAM_ID, ANNOT_TM_COUNT, ANNOT_TRANS_FOUND_PER_GENE_INTERNAL, ANNOT_TRANSCRIPT_INDEX_PER_GENE, ANNOT_TRANSCRIPT_LENGTH, ANNOT_TRANSCRIPT_LINK, ANNOT_TRANSCRIPT_PRODUCT, ANNOT_TRANSCRIPT_SEQUENCE, ANNOT_TRANSCRIPTS_FOUND_PER_GENE, ANNOT_UNIPROT_IDS, ANNOT_UNIPROT_LINKS
## 'select()' returned 1:1 mapping between keys and columns
tc_more <- rbind(tc_esmer$genes, tc_nonesmer$genes, tc_unas$genes)
tc_annot <- merge(tc_annot, tc_more, by = "row.names")
rownames(tc_annot) <- tc_annot[["gid"]]
tc_annot[["gid"]] <- NULL
dim(tc_annot)## [1] 23304 135
## This is an orgdb, good.
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## This is an orgdb, good.
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## This is an orgdb, good.
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
I asked for one from Najib/Amalie but unless I am mistaken it has not arrived. That is not a problem, given two helpful things: April provides one, I also named the directories so that the sample IDs are built in; so I will just make a fake one for now and then merge in whatever I get from them…
sample_sheet <- "sample_sheets/all_samples.xlsx"
plot_meta_sankey(as.data.frame(extract_metadata(sample_sheet)),
factors = c("background", "exp_number"))## Did not find the condition column in the sample sheet.
## Filling it in as undefined.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## Checking the state of the condition column.
## Checking the state of the batch column.
## Checking the condition factor.
## Warning: attributes are not identical across measure variables; they will be dropped
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the ggsankey package.
## Please report the issue at <https://github.com/davidsjoberg/ggsankey/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## A sankey plot describing the metadata of 14 samples,
## including 19 out of 0 nodes and traversing metadata factors:
## background, exp_number.
Let us see how well my preprocess gatherer does…
## Did not find the condition column in the sample sheet.
## Filling it in as undefined.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## Checking the state of the condition column.
## Checking the state of the batch column.
## Checking the condition factor.
## Warning in dispatch_regex_search(meta, search, replace, input_file_spec, : NAs introduced by coercion
## Writing new metadata to: sample_sheets/all_samples_modified.xlsx
## Deleting the file sample_sheets/all_samples_modified.xlsx before writing the tables.
## sampleid samplenumber celltype background hpi exp_number amount_in_10ul
## X02_HeLa_control_60h 02_HeLa_control_60h 2 HeLa control t60h e1 183
## X04_HeLa_WT_60hpi 04_HeLa_WT_60hpi 4 HeLa wt t60h e1 304
## X06_HeLa_KO7_60hpi 06_HeLa_KO7_60hpi 6 HeLa ko7 t60h e1 298
## X08_HeLa_Cas_60hpi 08_HeLa_Cas_60hpi 8 HeLa cas t60h e1 284
## X18_HeLa_control_60h 18_HeLa_control_60h 18 HeLa control t60h e2 62
## X20_HeLa_WT_60hpi 20_HeLa_WT_60hpi 20 HeLa wt t60h e2 228
## amount_fact condition batch sampleid_backup trimomatic_input
## X02_HeLa_control_60h low undefined undefined 02_HeLa_control_60h 34421670
## X04_HeLa_WT_60hpi mid undefined undefined 04_HeLa_WT_60hpi 33338315
## X06_HeLa_KO7_60hpi mid undefined undefined 06_HeLa_KO7_60hpi 36904955
## X08_HeLa_Cas_60hpi mid undefined undefined 08_HeLa_Cas_60hpi 34230672
## X18_HeLa_control_60h low undefined undefined 18_HeLa_control_60h 31154298
## X20_HeLa_WT_60hpi mid undefined undefined 20_HeLa_WT_60hpi 35726918
## trimomatic_output trimomatic_percent fastqc_pct_gc kraken_bacterial_classified
## X02_HeLa_control_60h 31723102 0.922 52 147699
## X04_HeLa_WT_60hpi 30831462 0.925 50 285754
## X06_HeLa_KO7_60hpi 34168992 0.926 50 414463
## X08_HeLa_Cas_60hpi 30953413 0.904 50 309973
## X18_HeLa_control_60h 28104898 0.902 51 147359
## X20_HeLa_WT_60hpi 32916331 0.921 50 323491
## kraken_bacterial_unclassified kraken_first_bacterial_species
## X02_HeLa_control_60h 418871 Porphyrobacter sp. GA68
## X04_HeLa_WT_60hpi 6263711 Mycoplasmopsis arginini
## X06_HeLa_KO7_60hpi 8109109 Mycoplasmopsis arginini
## X08_HeLa_Cas_60hpi 7277804 Mycoplasmopsis arginini
## X18_HeLa_control_60h 374703 Porphyrobacter sp. GA68
## X20_HeLa_WT_60hpi 8424975 Klebsiella pneumoniae
## kraken_first_bacterial_species_reads
## X02_HeLa_control_60h 34515
## X04_HeLa_WT_60hpi 20649
## X06_HeLa_KO7_60hpi 95574
## X08_HeLa_Cas_60hpi 22086
## X18_HeLa_control_60h 22324
## X20_HeLa_WT_60hpi 4599
## kraken_matrix_bacterial
## X02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## X04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## X06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## X08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## X18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## X20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031kraken_bacteria/kraken_report_matrix.tsv
## hisat_genome_input_reads_hg38_115 hisat_genome_input_reads_tcruzi_all
## X02_HeLa_control_60h 31723102 566570
## X04_HeLa_WT_60hpi 30831462 6549465
## X06_HeLa_KO7_60hpi NA 8523572
## X08_HeLa_Cas_60hpi 30953413 7587777
## X18_HeLa_control_60h 28104898 522062
## X20_HeLa_WT_60hpi 32916331 8748466
## hisat_genome_single_concordant_hg38_115 hisat_genome_single_concordant_tcruzi_all
## X02_HeLa_control_60h 27374698 533
## X04_HeLa_WT_60hpi 21550886 3958505
## X06_HeLa_KO7_60hpi NA 5187623
## X08_HeLa_Cas_60hpi 20831115 4581882
## X18_HeLa_control_60h 24646849 1779
## X20_HeLa_WT_60hpi 21560373 5359267
## hisat_genome_multi_concordant_hg38_115 hisat_genome_multi_concordant_tcruzi_all
## X02_HeLa_control_60h 3781834 148
## X04_HeLa_WT_60hpi 2731111 1769606
## X06_HeLa_KO7_60hpi NA 2326419
## X08_HeLa_Cas_60hpi 2534521 2096222
## X18_HeLa_control_60h 2935987 580
## X20_HeLa_WT_60hpi 2607492 2409017
## hisat_genome_single_all_hg38_115 hisat_genome_single_all_tcruzi_all
## X02_HeLa_control_60h 393579 31940
## X04_HeLa_WT_60hpi 386791 191224
## X06_HeLa_KO7_60hpi NA 219958
## X08_HeLa_Cas_60hpi 370232 198309
## X18_HeLa_control_60h 371885 44606
## X20_HeLa_WT_60hpi 394781 254343
## hisat_genome_multi_all_hg38_115 hisat_genome_multi_all_tcruzi_all
## X02_HeLa_control_60h 147888 26257
## X04_HeLa_WT_60hpi 125185 98192
## X06_HeLa_KO7_60hpi NA 120720
## X08_HeLa_Cas_60hpi 118754 105444
## X18_HeLa_control_60h 118560 22098
## X20_HeLa_WT_60hpi 124747 122425
## hisat_unmapped_hg38_115 hisat_unmapped_tcruzi_all hisat_genome_percent_log_hg38_115
## X02_HeLa_control_60h 485321 1073519 99.24
## X04_HeLa_WT_60hpi 12501300 1335114 79.73
## X06_HeLa_KO7_60hpi NA 1655578 NA
## X08_HeLa_Cas_60hpi 14599664 1495987 76.42
## X18_HeLa_control_60h 474391 972594 99.16
## X20_HeLa_WT_60hpi 16893802 1557970 74.34
## hisat_genome_percent_log_tcruzi_all
## X02_HeLa_control_60h 5.26
## X04_HeLa_WT_60hpi 89.81
## X06_HeLa_KO7_60hpi 90.29
## X08_HeLa_Cas_60hpi 90.14
## X18_HeLa_control_60h 6.85
## X20_HeLa_WT_60hpi 91.10
## hisat_alignment_hg38_115
## X02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## X04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## X06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## X08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## X18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## X20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome.bam
## hisat_alignment_tcruzi_all
## X02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam
## X04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam
## X06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam
## X08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam
## X18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam
## X20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome.bam
## salmon_percent_hg38_115 salmon_percent_tcruzi_all salmon_observed_genes_hg38_115
## X02_HeLa_control_60h 45.40 NA 47839
## X04_HeLa_WT_60hpi 35.05 NA 46509
## X06_HeLa_KO7_60hpi 33.15 NA 48117
## X08_HeLa_Cas_60hpi 33.47 NA 46351
## X18_HeLa_control_60h 43.10 NA 47978
## X20_HeLa_WT_60hpi 34.33 NA 47985
## salmon_observed_genes_tcruzi_all
## X02_HeLa_control_60h 0
## X04_HeLa_WT_60hpi 0
## X06_HeLa_KO7_60hpi 0
## X08_HeLa_Cas_60hpi 0
## X18_HeLa_control_60h 0
## X20_HeLa_WT_60hpi 0
## input_r1
## X02_HeLa_control_60h unprocessed/02_HeLa_control_60h_2_S1_R1_001.fastq.gz
## X04_HeLa_WT_60hpi unprocessed/04_HeLa_WT_60hpi_2_S2_R1_001.fastq.gz
## X06_HeLa_KO7_60hpi unprocessed/06_HeLa_KO7_60hpi_2_S3_R1_001.fastq.gz
## X08_HeLa_Cas_60hpi unprocessed/08_HeLa_Cas_60hpi_2_S4_R1_001.fastq.gz
## X18_HeLa_control_60h unprocessed/18_HeLa_control_60h_2_S5_R1_001.fastq.gz
## X20_HeLa_WT_60hpi unprocessed/20_HeLa_WT_60hpi_2_S6_R1_001.fastq.gz
## input_r2
## X02_HeLa_control_60h unprocessed/02_HeLa_control_60h_2_S1_R2_001.fastq.gz
## X04_HeLa_WT_60hpi unprocessed/04_HeLa_WT_60hpi_2_S2_R2_001.fastq.gz
## X06_HeLa_KO7_60hpi unprocessed/06_HeLa_KO7_60hpi_2_S3_R2_001.fastq.gz
## X08_HeLa_Cas_60hpi unprocessed/08_HeLa_Cas_60hpi_2_S4_R2_001.fastq.gz
## X18_HeLa_control_60h unprocessed/18_HeLa_control_60h_2_S5_R2_001.fastq.gz
## X20_HeLa_WT_60hpi unprocessed/20_HeLa_WT_60hpi_2_S6_R2_001.fastq.gz
## hisat_count_table_hg38_115
## X02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## X04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## X06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## X08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## X18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## X20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031hisat_hg38_115/hg38_115_genome-paired_s2_gene_ID_fcounts.csv.xz
## hisat_count_table_tcruzi_all
## X02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## X04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## X06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## X08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## X18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## X20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031hisat_tcruzi_all/tcruzi_all_genome-paired_s2_gene_ID_fcounts.csv.xz
## salmon_count_table_hg38_115
## X02_HeLa_control_60h preprocessing/02_HeLa_control_60h/outputs/20251031salmon_hg38_115_CDS/quant.sf
## X04_HeLa_WT_60hpi preprocessing/04_HeLa_WT_60hpi/outputs/20251031salmon_hg38_115_CDS/quant.sf
## X06_HeLa_KO7_60hpi preprocessing/06_HeLa_KO7_60hpi/outputs/20251031salmon_hg38_115_CDS/quant.sf
## X08_HeLa_Cas_60hpi preprocessing/08_HeLa_Cas_60hpi/outputs/20251031salmon_hg38_115_CDS/quant.sf
## X18_HeLa_control_60h preprocessing/18_HeLa_control_60h/outputs/20251031salmon_hg38_115_CDS/quant.sf
## X20_HeLa_WT_60hpi preprocessing/20_HeLa_WT_60hpi/outputs/20251031salmon_hg38_115_CDS/quant.sf
## salmon_count_table_tcruzi_all
## X02_HeLa_control_60h NA
## X04_HeLa_WT_60hpi NA
## X06_HeLa_KO7_60hpi NA
## X08_HeLa_Cas_60hpi NA
## X18_HeLa_control_60h NA
## X20_HeLa_WT_60hpi NA
color_choices <- list(
"hs" = list(
"AB10" = "#086448",
"cas" = "#702601",
"control" = "#454178",
"ko7" = "#870649",
"positive" = "#46060E",
"wt" = "#785C01"),
"tc" = list(
"AB10" = "#0DA877",
"cas" = "#BA3F01",
"control" = "#7771D1",
"ko7" = "#BF086A",
"positive" = "#8F0C1E",
"wt" = "#AF8401"))These colors are bad, the human are too dark and lose their contrast with respect to each other. I should get Najib/April/Amalie to help define better.
hs_se <- create_se(new_meta[["new_meta"]], gene_info = hs_annot[["gene_annotations"]],
file_column = "hisat_count_table_hg38_115") %>%
set_conditions(fact = "background") %>%
set_batches(fact = "exp_number") %>%
set_colors(color_choices[["hs"]])## Reading the sample metadata.
## Checking the state of the condition column.
## Checking the state of the batch column.
## Checking the condition factor.
## The sample definitions comprises: 14 rows(samples) and 44 columns(metadata fields).
## Matched 21562 annotations and counts.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the summarized experiment to 'se.rda'.
## The final summarized experiment has 21571 rows and 44 columns.
## The numbers of samples by condition are:
##
## AB10 cas control ko7 positive wt
## 3 1 3 3 1 3
## The number of samples by batch are:
##
## e1 e2 e3 undef
## 4 4 5 1
tc_se <- create_se(new_meta[["new_meta"]], gene_info = tc_annot,
file_column = "hisat_count_table_tcruzi_all") %>%
set_conditions(fact = "background") %>%
set_batches(fact = "exp_number") %>%
set_colors(color_choices[["tc"]])## Reading the sample metadata.
## Checking the state of the condition column.
## Checking the state of the batch column.
## Checking the condition factor.
## The sample definitions comprises: 14 rows(samples) and 44 columns(metadata fields).
## Matched 23304 annotations and counts.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the summarized experiment to 'se.rda'.
## The final summarized experiment has 25100 rows and 44 columns.
## The numbers of samples by condition are:
##
## AB10 cas control ko7 positive wt
## 3 1 3 3 1 3
## The number of samples by batch are:
##
## e1 e2 e3 undef
## 4 4 5 1
I picked off another TODO in here, I changed plot_nonzero to more intelligently set the text annotation.
## The following samples have less than 16315 genes.
## [1] "02_HL_c_60" "18_HL_c_60" "34_HL_c_60" "pos_ctrl"
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the hpgltools package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## A non-zero genes plot of 14 samples.
## These samples have an average 1.475 CPM coverage and 15473 genes observed, ranging from 36 to
## 22625.
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## The samples (and read coverage) removed when filtering 5000 non-zero genes are:
## 02_HeLa_control_60h 18_HeLa_control_60h 34_HeLa_control_60h pos_ctrl
## 122 663 107 48
## 02_HeLa_control_60h 18_HeLa_control_60h 34_HeLa_control_60h pos_ctrl
## 81 523 91 36
## Samples removed: 81, 523, 91, 36
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## A non-zero genes plot of 10 samples.
## These samples have an average 2.064 CPM coverage and 21590 genes observed, ranging from 20421 to
## 22625.
## Library sizes of 14 samples,
## ranging from 14,183,768 to 29,076,224.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## A non-zero genes plot of 14 samples.
## These samples have an average 20.55 CPM coverage and 15472 genes observed, ranging from 15027 to
## 17380.
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider increasing max.overlaps
## 85373 entries are 0. We are on a log scale, adding 1 to the data.
## Plot describing the gene distribution from a dataset.
## Library sizes of 10 samples,
## ranging from 791,179 to 3,382,177.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## A non-zero genes plot of 10 samples.
## These samples have an average 2.064 CPM coverage and 21590 genes observed, ranging from 20421 to
## 22625.
## 35096 entries are 0. We are on a log scale, adding 1 to the data.
## Plot describing the gene distribution from a dataset.
## Removing samples with less than 3 replicates.
## Removed: 08_HeLa_Cas_60hpi, pos_ctrl.
tc_replicated <- subset_se(tc_se, min_replicates = 3, fact = "condition") %>%
subset_se(nonzero = 10000)## Removing samples with less than 3 replicates.
## Removed: 08_HeLa_Cas_60hpi.
## No samples have fewer than 10000 observed genes.
Here are a couple of variance partition invocations. Once we have other metadata, this will be more useful. Note, this will only work once the non-replicated conditions are removed (control and cas).
## The model of ~ condition + batch has 6 levels and rank 6
##
## The model of ~ condition + batch has 5 levels and rank 5
## Warning in .fitExtractVarPartModel(exprObj, formula, data, REML = REML, : Model failed for 11 responses.
## See errors with attr(., 'errors')
##
I think we probably should not be surprised at the amount of variance attributed to the batch due to the very large difference in coverage between experiment #3 and 1/2.
Perform our default PCA plot along with a combat version.
hs_norm <- normalize(hs_replicated, transform = "log2", convert = "cpm",
norm = "quant", filter = TRUE)## Removing 9809 low-count genes (11762 remaining).
## Setting 1672 entries to zero.
## A heatmap of pairwise sample distances ranging from:
## 16.2110615399822 to 91.122185751637.
## A heatmap of pairwise sample correlations ranging from:
## 0.916078357207824 to 0.997343768762044.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, control, ko7, wt
## Shapes are defined by e1, e2, e3.
hs_nb <- normalize(hs_replicated, transform = "log2", convert = "cpm",
filter = TRUE, batch = "svaseq")## Removing 9809 low-count genes (11762 remaining).
## transform_counts: Found 25 values less than 0.
## transform_counts: Found 25 values equal to 0, adding 1 to the matrix.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, control, ko7, wt
## Shapes are defined by e1, e2, e3.
hs_cb <- normalize(hs_replicated, transform = "log2", convert = "cpm",
filter = TRUE, batch = "combat")## Removing 9809 low-count genes (11762 remaining).
## transform_counts: Found 84 values less than 0.
## transform_counts: Found 89 values equal to 0, adding 1 to the matrix.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, control, ko7, wt
## Shapes are defined by e1, e2, e3.
## Removing 4102 low-count genes (20998 remaining).
## transform_counts: Found 52 values equal to 0, adding 1 to the matrix.
## A heatmap of pairwise sample distances ranging from:
## 62.9109619364137 to 117.665159976867.
## A heatmap of pairwise sample correlations ranging from:
## 0.905660328467753 to 0.973093779177062.
high_genes <- rownames(norm_boxplot[["high_outlier_df"]])
fData(tc_se)[high_genes, ][["annot_transcript_product"]]## [1] "alpha tubulin, putative"
## [2] "undefined"
## [3] "undefined"
## [4] "undefined"
## [5] "undefined"
## [6] "undefined"
## [7] "undefined"
## [8] "Voltage-dependent calcium channel subunit, putative"
## [9] "enolase"
## [10] "zinc finger CCCH domain containing protein 11"
## [11] "60S ribosomal protein L6, putative"
## [12] "clathrin heavy chain, putative"
## [13] "ribosomal protein l35a, putative"
## [14] "conserved protein"
## [15] "cyclophilin a, putative"
## [16] "hypothetical protein, conserved"
## [17] "ABC transporter, putative"
## [18] "trans-sialidase, Group II, putative"
## [19] "ABC transporter, putative"
## [20] "zinc finger CCCH domain containing protein 11"
## [21] "polyadenylate-binding protein, putative"
## [22] "metallo-peptidase, Clan MF, Family M17"
## [23] "ubiquitin-protein ligase-like, putative"
## [24] "60S ribosomal protein L19, putative"
## [25] "60S ribosomal protein L23a, putative"
## [26] "RNA-binding protein 42 (RNA-binding motif protein 42), putative"
## [27] "CCR4-NOT transcription complex subunit 1"
## [28] "dynein heavy chain, putative"
## [29] "ABC transporter, putative"
## [30] "mitochondrial RNA binding complex 1 subunit, putative"
## [31] "60S ribosomal protein L10a, putative"
## [32] "hypothetical protein"
## [33] "cell division cycle protein 20, putative"
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, cas, ko7, wt
## Shapes are defined by e1, e2, e3.
tc_rnorm <- normalize(tc_replicated, transform = "log2", convert = "cpm",
norm = "quant", filter = TRUE)## Removing 4164 low-count genes (20936 remaining).
## transform_counts: Found 47 values equal to 0, adding 1 to the matrix.
## A heatmap of pairwise sample distances ranging from:
## 63.0136687923128 to 117.863511218509.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, ko7, wt
## Shapes are defined by e1, e2, e3.
tc_rbnorm <- normalize(tc_replicated, transform = "log2", convert = "cpm",
filter = TRUE, batch = "svaseq")## Removing 4164 low-count genes (20936 remaining).
## transform_counts: Found 1252 values less than 0.
## transform_counts: Found 1252 values equal to 0, adding 1 to the matrix.
## The result of performing a fast_svd dimension reduction.
## The x-axis is PC1 and the y-axis is PC2
## Colors are defined by AB10, ko7, wt
## Shapes are defined by e1, e2, e3.
I am not thinking we will see many genes of interest.
hs_keepers <- list(
"ab_vs_control" = c("AB10", "control"),
"ko_vs_control" = c("ko7", "control"),
"ko_vs_wt" = c("ko7", "wt"),
"ab_vs_wt" = c("AB10", "wt"),
"ab_vs_ko" = c("AB10", "ko7"))
hs_de <- all_pairwise(hs_replicated, filter = TRUE, model_fstring = "~ 0 + condition",
model_svs = "svaseq")## AB10 control ko7 wt
## 3 3 3 3
## Removing 9809 low-count genes (11762 remaining).
## Basic step 0/3: Normalizing data.
## Basic step 0/3: Converting data.
## I think this is failing? SummarizedExperiment
## Basic step 0/3: Transforming data.
## Setting 1603 entries to zero.
## This received a matrix of SVs.
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## conditions
## AB10 control ko7 wt
## 3 3 3 3
## conditions
## AB10 control ko7 wt
## 3 3 3 3
## conditions
## AB10 control ko7 wt
## 3 3 3 3
## A pairwise differential expression with results from: basic, deseq, ebseq, edger, limma, noiseq.
## This used a surrogate/batch estimate from: svaseq.
## The primary analysis performed 6 comparisons.
## Deleting the file excel/hs_tables.xlsx before writing the tables.
## Looking for subscript invalid names, start of extract_keepers.
## Looking for subscript invalid names, end of extract_keepers.
## A set of combined differential expression results.
## table deseq_sigup deseq_sigdown edger_sigup edger_sigdown limma_sigup limma_sigdown
## 1 control_vs_AB10-inverted 16 29 25 43 2 1
## 2 ko7_vs_control 82 20 104 28 4 0
## 3 wt_vs_ko7-inverted 0 3 0 4 0 0
## 4 wt_vs_AB10-inverted 0 1 0 1 0 0
## 5 ko7_vs_AB10-inverted 0 9 0 16 0 0
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the UpSetR package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## ℹ The deprecated feature was likely used in the UpSetR package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Plot describing unique/shared genes in a differential expression table.
## Deleting the file excel/hs_sig.xlsx before writing the tables.
## A set of genes deemed significant according to limma, edger, deseq, ebseq, basic.
## The parameters defining significant were:
## LFC cutoff: 1 adj P cutoff: 0.05
## limma_up limma_down edger_up edger_down deseq_up deseq_down ebseq_up ebseq_down basic_up
## ab_vs_control 2 1 25 43 16 29 11 0 0
## ko_vs_control 4 0 104 28 82 20 25 1 0
## ko_vs_wt 0 0 0 4 0 3 0 1 0
## ab_vs_wt 0 0 0 1 0 1 0 0 0
## ab_vs_ko 0 0 0 16 0 9 0 0 0
## basic_down
## ab_vs_control 0
## ko_vs_control 0
## ko_vs_wt 0
## ab_vs_wt 0
## ab_vs_ko 0
While it is true there are not a tremendous number of genes, at least some of the groups are interesting.
## BP CC CORUM HP HPA KEGG MIRNA MF REAC TF WP
## ab_vs_control_up 0 5 0 0 0 0 1 1 1 0 0
## ab_vs_control_down 4 1 0 0 0 0 0 4 0 0 0
## ko_vs_control_up 64 2 0 0 0 2 1 15 5 21 4
## ko_vs_control_down 12 5 1 0 0 1 1 8 2 0 0
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • as.Date = as.Date
## • yscale_mapping = yscale_mapping
## • show.legend = FALSE
## ℹ Did you misspell an argument name?
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## $bar
##
## $cnet
##
## $dot
##
## $go
## Warning: ggrepel: 13 unlabeled data points (too many overlaps). Consider increasing max.overlaps
##
## $heat
##
## $map
##
## $ss
## NULL
##
## $termsim
## #
## # over-representation test
## #
## #...@organism UNKNOWN
## #...@ontology BP
## #...@gene chr [1:82] "ENSG00000049249" "ENSG00000115008" "ENSG00000128965" "ENSG00000139269" "ENSG00000170909" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...49 enriched terms found
## 'data.frame': 49 obs. of 9 variables:
## $ ID : chr "GO:0034103" "GO:0045071" "GO:0006952" "GO:0043207" ...
## $ Description: chr "regulation of tissue remodeling" "negative regulation of viral genome replication" "defense response" "response to external biotic stimulus" ...
## $ GeneRatio : chr "6/76" "6/57" "23/1853" "21/1595" ...
## $ BgRatio : chr "76/59" "57/80" "1853/80" "1595/80" ...
## $ pvalue : num 0.000126 0.000139 0.000417 0.000616 0.00063 ...
## $ p.adjust : num 0.000126 0.000139 0.000417 0.000616 0.00063 ...
## $ qvalue : num 0.000126 0.000139 0.000417 0.000616 0.00063 ...
## $ geneID : chr "ENSG00000170909/ENSG00000136244/ENSG00000136235/ENSG00000162733/ENSG00000154175/ENSG00000118503" "ENSG00000271503/ENSG00000157601/ENSG00000115267/ENSG00000172183/ENSG00000135114/ENSG00000187608" "ENSG00000115008/ENSG00000271503/ENSG00000124882/ENSG00000073756/ENSG00000157601/ENSG00000176046/ENSG00000136244"| __truncated__ "ENSG00000115008/ENSG00000271503/ENSG00000124882/ENSG00000073756/ENSG00000157601/ENSG00000136244/ENSG00000100342"| __truncated__ ...
## $ Count : int 6 6 23 21 2 2 2 2 16 18 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
##
##
## $tree
##
## $up
##
## $vol
## NULL
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
## Arguments in `...` must be used.
## ✖ Problematic arguments:
## • as.Date = as.Date
## • yscale_mapping = yscale_mapping
## • show.legend = FALSE
## ℹ Did you misspell an argument name?
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## $bar
##
## $cnet
##
## $dot
##
## $go
## Warning: ggrepel: 98 unlabeled data points (too many overlaps). Consider increasing max.overlaps
##
## $heat
##
## $map
##
## $ss
## NULL
##
## $termsim
## #
## # over-representation test
## #
## #...@organism UNKNOWN
## #...@ontology BP
## #...@gene chr [1:20] "ENSG00000174600" "ENSG00000187689" "ENSG00000160201" "ENSG00000118194" "ENSG00000186340" ...
## #...pvalues adjusted by 'BH' with cutoff <0.05
## #...11 enriched terms found
## 'data.frame': 11 obs. of 9 variables:
## $ ID : chr "GO:0043567" "GO:0010286" "GO:0070370" "GO:0070434" ...
## $ Description: chr "regulation of insulin-like growth factor receptor signaling pathway" "heat acclimation" "cellular heat acclimation" "positive regulation of nucleotide-binding oligomerization domain containing 2 signaling pathway" ...
## $ GeneRatio : chr "3/23" "2/5" "2/5" "2/6" ...
## $ BgRatio : chr "23/17" "5/14" "5/14" "6/14" ...
## $ pvalue : num 0.0011 0.00587 0.00587 0.00881 0.01233 ...
## $ p.adjust : num 0.0011 0.00587 0.00587 0.00881 0.01233 ...
## $ qvalue : num 0.0011 0.00587 0.00587 0.00881 0.01233 ...
## $ geneID : chr "ENSG00000115457/ENSG00000115461/ENSG00000017427" "ENSG00000204388/ENSG00000204389" "ENSG00000204388/ENSG00000204389" "ENSG00000204388/ENSG00000204389" ...
## $ Count : int 3 2 2 2 2 3 2 2 2 4 ...
## #...Citation
## G Yu. Thirteen years of clusterProfiler. The Innovation. 2024, 5(6):100722
##
##
## $tree
##
## $up
##
## $vol
## NULL
I think it is noteworthy that there is a difference in T cell proliferation.
## Error: unable to find an inherited method for function 'conditions' for signature 'object = "SummarizedExperiment"'
tc_keepers <- list(
"ab_vs_wt" = c("AB10", "wt"),
"ko_vs_wt" = c("ko7", "wt"),
"ab_vs_ko" = c("AB10", "ko7"))
tc_de <- all_pairwise(tc_replicated, filter = TRUE, model_fstring = "~ 0 + condition",
model_svs = "svaseq")## AB10 ko7 wt
## 3 3 3
## Removing 4164 low-count genes (20936 remaining).
## Basic step 0/3: Normalizing data.
## Basic step 0/3: Converting data.
## I think this is failing? SummarizedExperiment
## Basic step 0/3: Transforming data.
## Setting 6919 entries to zero.
## This received a matrix of SVs.
## converting counts to integer mode
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## conditions
## AB10 ko7 wt
## 3 3 3
## conditions
## AB10 ko7 wt
## 3 3 3
## conditions
## AB10 ko7 wt
## 3 3 3
## A pairwise differential expression with results from: basic, deseq, ebseq, edger, limma, noiseq.
## This used a surrogate/batch estimate from: svaseq.
## The primary analysis performed 3 comparisons.
## Deleting the file excel/tc_tables.xlsx before writing the tables.
## Looking for subscript invalid names, start of extract_keepers.
## Looking for subscript invalid names, end of extract_keepers.
## A set of combined differential expression results.
## table deseq_sigup deseq_sigdown edger_sigup edger_sigdown limma_sigup limma_sigdown
## 1 wt_vs_AB10-inverted 39 349 101 603 92 300
## 2 wt_vs_ko7-inverted 48 44 115 147 83 60
## 3 ko7_vs_AB10-inverted 11 259 44 428 18 220
## Plot describing unique/shared genes in a differential expression table.
## Deleting the file excel/tc_sig.xlsx before writing the tables.
## A set of genes deemed significant according to limma, edger, deseq, ebseq, basic.
## The parameters defining significant were:
## LFC cutoff: 1 adj P cutoff: 0.05
## limma_up limma_down edger_up edger_down deseq_up deseq_down ebseq_up ebseq_down basic_up
## ab_vs_wt 92 300 101 603 39 349 51 277 0
## ko_vs_wt 83 60 115 147 48 44 114 39 0
## ab_vs_ko 18 220 44 428 11 259 8 222 0
## basic_down
## ab_vs_wt 0
## ko_vs_wt 0
## ab_vs_ko 0
We cannot use gProfiler2 with the parasite because it is not a reference species; but other ontology methods are not constrained thus. In the case of clusterProfiler, there is another constraint, I do not have a single orgDB object which comprises Esmer/NonEsmer/Unassigned; as a result I must attempt the ontology search on the haplotypes separately.
ko_wt_up <- tc_sig[["deseq"]][["ups"]][["ko_vs_wt"]]
ko_wt_down <- tc_sig[["deseq"]][["downs"]][["ko_vs_wt"]]
ko_wt_all <- tc_tables[["data"]][["ko_vs_wt"]]
tc_esmer_up_cp <- simple_clusterprofiler(
ko_wt_up, de_table = ko_wt_all, orgdb = esmer_db, orgdb_to = "GID",
organism = "tcruzi", excel = "excel/ko_wt_up_cp_esmer.xlsx")## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (31.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : There were 25
## pathways for which P-values were not calculated properly due to unbalanced (positive and negative)
## gene-level statistic values. For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some of the
## pathways the P-values were likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some pathways, in
## reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## Deleting the file excel/ko_wt_up_cp_esmer.xlsx before writing the tables.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
tc_nonesmer_up_cp <- simple_clusterprofiler(
ko_wt_up, de_table = ko_wt_all, orgdb = nonesmer_db, orgdb_to = "GID",
organism = "tcruzi", excel = "excel/ko_wt_up_cp_nonesmer.xlsx")## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (30.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : There were 70
## pathways for which P-values were not calculated properly due to unbalanced (positive and negative)
## gene-level statistic values. For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some of the
## pathways the P-values were likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some pathways, in
## reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## Deleting the file excel/ko_wt_up_cp_nonesmer.xlsx before writing the tables.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
tc_unas_up_cp <- simple_clusterprofiler(
ko_wt_up, de_table = ko_wt_all, orgdb = unas_db, orgdb_to = "GID",
organism = "tcruzi")## --> No gene can be mapped....
## --> Expected input gene ID: TcCLB.457101.20,TcCLB.509961.90,TcCLB.507153.10,TcCLB.510783.11,TcCLB.411235.9,TcCLB.505051.20
## --> return NULL...
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (67.59% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : There were 34
## pathways for which P-values were not calculated properly due to unbalanced (positive and negative)
## gene-level statistic values. For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some of the
## pathways the P-values were likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some pathways, in
## reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
tc_esmer_up_cp
tc_esmer_down_cp <- simple_clusterprofiler(
ko_wt_down, de_table = ko_wt_all, orgdb = esmer_db, orgdb_to = "GID",
organism = "tcruzi", excel = "excel/ko_wt_down_cp_esmer.xlsx")## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (31.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : There were 25
## pathways for which P-values were not calculated properly due to unbalanced (positive and negative)
## gene-level statistic values. For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some of the
## pathways the P-values were likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some pathways, in
## reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## Deleting the file excel/ko_wt_down_cp_esmer.xlsx before writing the tables.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 10, TcCLB.409625 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : 671.90, TcCLB.50 ... TcCLB.511921.140 is truncated.
## Number of characters exeed the limit of 32767.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
tc_nonesmer_down_cp <- simple_clusterprofiler(
ko_wt_down, de_table = ko_wt_all, orgdb = nonesmer_db, orgdb_to = "GID",
organism = "tcruzi", excel = "excel/ko_wt_down_cp_nonesmer.xlsx")## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
##
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (30.08% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : There were 36
## pathways for which P-values were not calculated properly due to unbalanced (positive and negative)
## gene-level statistic values. For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some of the
## pathways the P-values were likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some pathways, in
## reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
## Deleting the file excel/ko_wt_down_cp_nonesmer.xlsx before writing the tables.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : TcCLB.409625.10 ... TcCLB.511923.60 is truncated.
## Number of characters exeed the limit of 32767.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in wb$writeData(df = x, colNames = TRUE, sheet = sheet, startRow = startRow, : CLB.506965.20, T ... TcCLB.511921.140 is truncated.
## Number of characters exeed the limit of 32767.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
tc_unas_down_cp <- simple_clusterprofiler(
ko_wt_down, de_table = ko_wt_all, orgdb = unas_db, orgdb_to = "GID",
organism = "tcruzi")## --> No gene can be mapped....
## --> Expected input gene ID: TcCLB.505051.20,TcCLB.507153.10,TcCLB.506189.10,TcCLB.509961.90,TcCLB.470399.10,TcCLB.507697.10
## --> return NULL...
## using 'fgsea' for GSEA analysis, please cite Korotkevich et al (2019).
##
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (67.59% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : There were 20
## pathways for which P-values were not calculated properly due to unbalanced (positive and negative)
## gene-level statistic values. For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple = 10000)
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some of the
## pathways the P-values were likely overestimated. For such pathways log2err is set to NA.
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize = minSize, : For some pathways, in
## reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## leading edge analysis...
## done...
tc_esmer_down_cp
length_db <- as.data.frame(rowData(tc_se))
length_db[["gid"]] <- rownames(length_db)
length_db <- length_db[, c("gid", "width")]
tc_up_gs <- simple_goseq(ko_wt_up, go_db = tc_go, length_db = length_db, min_xref = 10)## Found 29 go_db genes and 48 length_db genes out of 48.
## Testing that go categories are defined.
## Removing undefined categories.
## Gathering synonyms.
## Gathering category definitions.
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • as.Date = as.Date
## • yscale_mapping = yscale_mapping
## • show.legend = FALSE
## ℹ Did you misspell an argument name?
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Warning in fortify(object, showCategory = showCategory, by = x, ...): Arguments in `...` must be used.
## ✖ Problematic argument:
## • by = x
## ℹ Did you misspell an argument name?
Now check the position of the expected lower expression genes in the context of all genes compared to wt.
## Pull the ko_wt_all table and see where expected_lower compares.