This dataset contains multiple experiments.
load_gff_annotations("reference/paeruginosa_pa14.gff", id_col="gene_id") pa14_gff <-
## Trying attempt: rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo = TRUE)
## Had a successful gff import with rtracklayer::import.gff3(gff, sequenceRegionsAsSeqinfo = TRUE)
## Returning a df with 16 columns and 11946 rows.
rownames(pa14_gff) <- pa14_gff[["gene_id"]]
## The Alias column has PA14_00010
as.data.frame(load_microbesonline_annotations("PA14")) pa14_microbes <-
## Found 1 entry.
## Pseudomonas aeruginosa UCBPP-PA14Proteobacteria2006-11-22yes105972208963
## The species being downloaded is: Pseudomonas aeruginosa UCBPP-PA14
## Downloading: http://www.microbesonline.org/cgi-bin/genomeInfo.cgi?tId=208963;export=tab
## The sysName column has PA14_0010
merge(pa14_gff, pa14_microbes, by.x="Alias", by.y="sysName")
pa14_annot <-rownames(pa14_annot) <- pa14_annot[["gene_id"]]
load_microbesonline_go(species="PA14") pa14_go <-
## Found 1 entry.
## Pseudomonas aeruginosa UCBPP-PA14Proteobacteria2006-11-22yes105972208963
## The species being downloaded is: Pseudomonas aeruginosa UCBPP-PA14 and is being downloaded as 208963.tab.
Given the above annotations, now lets pull in the counts.
I am switching to the sheet all_samples_modified_gcd.xlsx for the moment because I added a space in the strain name for the gcd sampl
create_expt("sample_sheets/all_samples_modified_gcd.xlsx",
pa14_expt <-gene_info=pa14_annot, file_column="hisatcounttable")
## Reading the sample metadata.
## Did not find the condition column in the sample sheet.
## Filling it in as undefined.
## Did not find the batch column in the sample sheet.
## Filling it in as undefined.
## The sample definitions comprises: 105 rows(samples) and 31 columns(metadata fields).
## Matched 5972 annotations and counts.
## Bringing together the count matrix and gene information.
## Some annotations were lost in merging, setting them to 'undefined'.
## Saving the expressionset to 'expt.rda'.
## The final expressionset has 5979 rows and 105 columns.
While we are at it, lets drop the two sad samples.
plot_libsize(pa14_expt)
pa14_libsize <-$plot pa14_libsize
plot_nonzero(pa14_expt)
pa14_nonzero <-$plot pa14_nonzero
## Warning: ggrepel: 100 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
subset_expt(pa14_expt, nonzero=5500) pa14_expt <-
## The samples (and read coverage) removed when filtering 5500 non-zero genes are:
## SM040 SM048
## 43984 316632
## subset_expt(): There were 105, now there are 103 samples.
normalize_expt(sk_expt, transform="log2", convert="cpm", norm="quant", filter=TRUE) sk_norm <-
## Error in normalize_expt(sk_expt, transform = "log2", convert = "cpm", : object 'sk_expt' not found
plot_pca(sk_norm)$plot
## Error in plot_pca(sk_norm): object 'sk_norm' not found
gsub(x=rownames(pData(pa14_expt)), pattern="^(..).*$", replacement="\\1")
initials_factor <-pData(pa14_expt)[["initials"]] <- as.factor(initials_factor)
subset_expt(pa14_expt, subset="initials=='SK'") sk_expt <-
## subset_expt(): There were 103, now there are 45 samples.
subset_expt(sk_expt, subset="media!='LB'") %>%
alginate_expt <- set_expt_batches(fact="bioreplicate") %>%
set_expt_conditions(fact="media")
## subset_expt(): There were 45, now there are 36 samples.
subset_expt(sk_expt, subset="media=='LB'") %>%
rph_expt <- set_expt_batches(fact="bioreplicate") %>%
set_expt_conditions(fact="strains") %>%
sanitize_expt_metadata()
## subset_expt(): There were 45, now there are 9 samples.
There are two media, an inducer, and multiple methods of reducing expression…
normalize_expt(alginate_expt, transform="log2",
alg_norm <-convert="cpm", norm="quant", filter=TRUE)
## Removing 18 low-count genes (5961 remaining).
## transform_counts: Found 7 values equal to 0, adding 1 to the matrix.
plot_pca(alg_norm)$plot
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
pData(alginate_expt)[["condition"]]
conditions <- grepl(pattern="IPTG", x=conditions)
induce_bool <- rep("not_induced", length(induce_bool))
induce_factor <- "IPTG"
induce_factor[induce_bool] <-
grepl(pattern="Eb", x=conditions)
reduce_bool <- rep("not_reduced", length(reduce_bool))
reduce_factor <- grepl(pattern="Eb", x=conditions)
eb_samples <- "Eb"
reduce_factor[eb_samples] <- grepl(pattern="EbO", x=conditions)
ebo_samples <- "EbO"
reduce_factor[ebo_samples] <- grepl(pattern="EbS", x=conditions)
ebs_samples <- "EbS"
reduce_factor[ebs_samples] <-
conditions
media_factor <- grepl(pattern="LB", x=conditions)
lb_bool <- "LB"
media_factor[lb_bool] <-!lb_bool] <- "SCFM"
media_factor[
pData(alginate_expt)[["media_add"]] <- pData(alginate_expt)[["media"]]
pData(alginate_expt)[["media"]] <- media_factor
pData(alginate_expt)[["induced"]] <- as.factor(induce_factor)
pData(alginate_expt)[["reduced"]] <- as.factor(reduce_factor)
set_expt_conditions(alginate_expt, fact="media") %>%
alginate_media <- set_expt_batches(fact="bioreplicate")
normalize_expt(alginate_media, transform="log2",
alginate_media_norm <-convert="cpm", norm="quant", filter=TRUE)
## Removing 18 low-count genes (5961 remaining).
## transform_counts: Found 7 values equal to 0, adding 1 to the matrix.
plot_pca(alginate_media_norm)$plot
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
set_expt_conditions(alginate_expt, fact="induced") %>%
alginate_induce <- set_expt_batches(fact="bioreplicate")
normalize_expt(alginate_induce, transform="log2",
alginate_induce_norm <-convert="cpm", norm="quant", filter=TRUE)
## Removing 18 low-count genes (5961 remaining).
## transform_counts: Found 7 values equal to 0, adding 1 to the matrix.
plot_pca(alginate_induce_norm)$plot
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
set_expt_conditions(alginate_expt, fact="reduced") %>%
alginate_reduce <- set_expt_batches(fact="bioreplicate")
normalize_expt(alginate_reduce, transform="log2",
alginate_reduce_norm <-convert="cpm", norm="quant", filter=TRUE)
## Removing 18 low-count genes (5961 remaining).
## transform_counts: Found 7 values equal to 0, adding 1 to the matrix.
plot_pca(alginate_reduce_norm)$plot
## Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
paste0(pData(alginate_expt)[["induced"]], "_",
both_fact <-pData(alginate_expt)[["reduced"]])
pData(alginate_expt)[["induce_reduce"]] <- both_fact
set_expt_conditions(alginate_expt, fact="induce_reduce") %>%
alginate_both <- set_expt_batches(fact="bioreplicate")
normalize_expt(alginate_both, transform="log2", batch="svaseq",
alginate_both_norm <-convert="cpm", norm="quant", filter=TRUE)
## Warning in normalize_expt(alginate_both, transform = "log2", batch = "svaseq", :
## Quantile normalization and sva do not always play well together.
## Removing 18 low-count genes (5961 remaining).
## batch_counts: Before batch/surrogate estimation, 7 entries are x==0: 0%.
## batch_counts: Before batch/surrogate estimation, 2144 entries are 0<x<1: 1%.
## Setting 60 low elements to zero.
## transform_counts: Found 60 values equal to 0, adding 1 to the matrix.
plot_pca(alginate_both_norm)$plot
normalize_expt(rph_expt, transform="log2",
rph_norm <-convert="cpm", norm="quant", filter=TRUE)
## Removing 71 low-count genes (5908 remaining).
## transform_counts: Found 4 values equal to 0, adding 1 to the matrix.
plot_pca(rph_norm)$plot
all_pairwise(rph_expt, model_batch=TRUE, filter=TRUE) rph_de <-
## Plotting a PCA before surrogate/batch inclusion.
## Using limma's removeBatchEffect to visualize with(out) batch inclusion.
## Finished running DE analyses, collecting outputs.
## Comparing analyses.
combine_de_tables(
rph_tables <-
rph_de,excel=glue::glue("excel/rph_tables-v{ver}.xlsx"))
## Deleting the file excel/rph_tables-v20220131.xlsx before writing the tables.
extract_significant_genes(
rph_sig <-
rph_tables,excel=glue::glue("excel/rph_sig-v{ver}.xlsx"))
## Deleting the file excel/rph_sig-v20220131.xlsx before writing the tables.
::pander(sessionInfo())
pandermessage(paste0("This is hpgltools commit: ", get_git_commit()))
paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
this_save <-message(paste0("Saving to ", this_save))
sm(saveme(filename=this_save)) tmp <-