In my continued effort to better document my work, I am rerunning my 2015 analyses of our S. pyogenes 5448 tnseq. Hopefully little effort will be required. One important caveat: this document used the precursor to the hpgltools: ‘myr’, which was a not-terrible first effort for an R package. But it was of much lower quality than its successor.
Looking further down, fairly significant changes will need to be made to get later analyses to work. So rather than delete what is here, I am going to create new code blocks following every existing code block with material which represents the current state of my work.
Here we can see how I used to load myr and its accessory code. This has been improved.
## Time-stamp: <Tue Dec 2 17:07:18 2014 Ashton Trey Belew (abelew@gmail.com)>
rm(list=ls())
load("RData.0")
library(myr)
autoloads()
save(list = ls(all=TRUE), file="RData.0")
These options have also been significantly improved since I wrote the original document.
opts_knit$set(fig.width=800/192, fig.height=800/192, dpi=192, error=TRUE, stop_on_error=FALSE)
save(list = ls(all=TRUE), file="RData")
render("tnseq_v0.rmd", output_format="html_document")
#knit("rnaseq_mga.rmd")
render("tnseq_v0.rmd", output_format="pdf_document")
## A reminder for emacs (ess-resynch)
This also has been improved, it is now maintained within hpgltools.
theme_set(theme_bw(base_size = 10))
I am super curious to see how much of this old code works! I am thinking that I will just add some library invocations to try to change as little as possible of the original code.
library(rtracklayer) ## import.gff3
library(XLConnect) ## loadWorkBook and such
library(Biobase) ## Creating annotateddataframes
library(Rsamtools) ## FaFile
library(ggplot2)
library(edgeR)
library(limma)
## annotations_5448 = import.gff3("reference/genbank/mgas_5005.gff", asRangedData=FALSE)
annotations_5448 = import.gff3("reference/genbank/mgas_5005.gff") ## asRangedData has been deprecated since I wrote this.
annotation_5448_info = as.data.frame(annotations_5448)
rownames(annotation_5448_info) = make.names(annotation_5448_info$locus_tag, unique=TRUE)
genes_5448 = annotation_5448_info[annotation_5448_info$type=="gene",]
rownames(genes_5448) = make.names(genes_5448$locus_tag, unique=TRUE)
gene_annotations_5448 = subset(genes_5448, select = c("start", "end", "width", "strand", "gene", "locus_tag", "old_locus_tag"))
short_annotations_5448 = gene_annotations_5448[,c("gene", "locus_tag", "old_locus_tag")]
write_xls(short_annotations_5448, "genes", rowname="ID", file="excel/5448_data_v0.xls")
microbes_5448 = read.csv(file="reference/microbesonline/MGAS_5005_annotations.tab.gz", header=1, sep="\t")
microbes_go_5448 = microbes_5448[,c("sysName","GO")]
go_entries_5448 = strsplit(as.character(microbes_go_5448$GO), split=",", perl=TRUE)
microbes_go_oneperrow_5448 = data.frame(name = rep(microbes_go_5448$sysName, sapply(go_entries_5448, length)), GO = unlist(go_entries_5448))
microbes_go_5448 = microbes_go_oneperrow_5448
rm(microbes_go_oneperrow_5448)
rm(go_entries_5448)
## These are used for gene ontology stuff...
microbes_lengths_5448 = microbes_5448[,c("sysName", "start","stop")]
microbes_lengths_5448$length = abs(microbes_5448$start - microbes_5448$stop)
microbes_lengths_5448 = microbes_lengths_5448[,c("sysName","length")]
tooltip_data_5448 = genes_5448
tooltip_data_5448 = tooltip_data_5448[,c("gene", "locus_tag")]
tooltip_data_5448$tooltip = paste(tooltip_data_5448$gene, tooltip_data_5448$locus_tag, sep=": ")
tooltip_data_5448$tooltip = gsub("\\+", " ", tooltip_data_5448$tooltip)
tooltip_data_5448 = tooltip_data_5448[-1]
tooltip_data_5448 = tooltip_data_5448[-1]
head(tooltip_data_5448)
## tooltip
## M5005_Spy0001 dnaA: M5005_Spy0001
## M5005_Spy0002 dnaN: M5005_Spy0002
## M5005_Spy0003 NA: M5005_Spy0003
## M5005_Spy0004 NA: M5005_Spy0004
## M5005_Spy0005 pth: M5005_Spy0005
## M5005_Spy0006 trcF: M5005_Spy0006
library_colors = list("9"="yellow4",
"11"="darkgreen",
"12"="red",
"34"="darkblue")
time_colors = list(t0="white",
t1="lightgray",
t2="darkgray",
t3="black")
sample_definitions_5448 = read.csv(file="all_samples_5448_v0.csv", sep=",")
sample_definitions_5448 = sample_definitions_5448[grepl('^HPGL', sample_definitions_5448$Sample.ID, perl=TRUE),]
sample_definitions_5448$colors = library_colors[ as.character(sample_definitions_5448$Library) ]
sample_definitions_5448$counts = paste("data/count_tables/05v0M1l20gen_", sample_definitions_5448$Strain, "_",sample_definitions_5448$Replicate, sample_definitions_5448$Time, "_genome.count.gz", sep="")
sample_definitions_5448 = as.data.frame(sample_definitions_5448)
rownames(sample_definitions_5448) = sample_definitions_5448$Sample.ID
all_count_tables_5448 = my_read_files(as.character(sample_definitions_5448$Sample.ID), as.character(sample_definitions_5448$counts))
all_count_matrix_5448 = as.matrix(all_count_tables_5448)
gene_info_5448 = all_count_matrix_5448[rownames(all_count_matrix_5448) %in% genes_5448$locus_tag,]
all_count_matrix_5448 = all_count_matrix_5448[rownames(all_count_matrix_5448) %in% genes_5448$locus_tag,]
metadata_5448 = new("AnnotatedDataFrame", data.frame(sample=sample_definitions_5448$Sample.ID,
condition=sample_definitions_5448$Time,
batch=sample_definitions_5448$Library,
strain=sample_definitions_5448$Strain,
color=as.character(sample_definitions_5448$colors),
counts=sample_definitions_5448$counts))
sampleNames(metadata_5448) = colnames(all_count_matrix_5448)
feature_data_5448 = new("AnnotatedDataFrame", as.data.frame(gene_info_5448))
featureNames(feature_data_5448) = rownames(all_count_matrix_5448)
experiment_5448 = new("ExpressionSet", exprs=all_count_matrix_5448,
phenoData=metadata_5448, featureData=feature_data_5448)
print(experiment_5448)
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 1926 features, 16 samples
## element names: exprs
## protocolData: none
## phenoData
## sampleNames: HPGL0259 HPGL0260 ... HPGL0274 (16 total)
## varLabels: sample condition ... counts (6 total)
## varMetadata: labelDescription
## featureData
## featureNames: M5005_Spy0001 M5005_Spy0002 ... M5005_SpyT0067
## (1926 total)
## fvarLabels: HPGL0259 HPGL0260 ... HPGL0274 (16 total)
## fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation:
summary(exprs(experiment_5448))
## HPGL0259 HPGL0260 HPGL0261 HPGL0262
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 111 1st Qu.: 185 1st Qu.: 36 1st Qu.: 0
## Median : 612 Median : 1849 Median : 467 Median : 64
## Mean : 1433 Mean : 5922 Mean : 4840 Mean : 4030
## 3rd Qu.: 1519 3rd Qu.: 4636 3rd Qu.: 1398 3rd Qu.: 262
## Max. :237128 Max. :1402918 Max. :2951209 Max. :5625610
## HPGL0263 HPGL0264 HPGL0265 HPGL0266
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 1 1st Qu.: 0 1st Qu.: 0 1st Qu.: 1
## Median : 52 Median : 32 Median : 3 Median : 13
## Mean : 4278 Mean : 4898 Mean : 4314 Mean : 5936
## 3rd Qu.: 1801 3rd Qu.: 1297 3rd Qu.: 945 3rd Qu.: 1062
## Max. :149218 Max. :444931 Max. :288152 Max. :709399
## HPGL0267 HPGL0268 HPGL0269 HPGL0270
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0 1st Qu.: 1 1st Qu.: 0 1st Qu.: 0
## Median : 145 Median : 162 Median : 86 Median : 16
## Mean : 4403 Mean : 5614 Mean : 4890 Mean : 6657
## 3rd Qu.: 1520 3rd Qu.: 1734 3rd Qu.: 957 3rd Qu.: 294
## Max. :1578442 Max. :1915635 Max. :2516667 Max. :6723426
## HPGL0271 HPGL0272 HPGL0273 HPGL0274
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0 1st Qu.: 25 1st Qu.: 0 1st Qu.: 1
## Median : 78 Median : 576 Median : 48 Median : 32
## Mean : 289 Mean : 6819 Mean : 4977 Mean : 5931
## 3rd Qu.: 321 3rd Qu.: 2311 3rd Qu.: 371 3rd Qu.: 150
## Max. :31570 Max. :4770693 Max. :2709879 Max. :2992917
head(fData(experiment_5448))
## HPGL0259 HPGL0260 HPGL0261 HPGL0262 HPGL0263 HPGL0264
## M5005_Spy0001 1 115 34 0 1 0
## M5005_Spy0002 48 102 87 40 0 0
## M5005_Spy0003 169 521 44 0 0 0
## M5005_Spy0004 1165 2126 877 510 3 23
## M5005_Spy0005 0 5 1 0 3 0
## M5005_Spy0006 10774 34771 15317 7244 4341 2834
## HPGL0265 HPGL0266 HPGL0267 HPGL0268 HPGL0269 HPGL0270
## M5005_Spy0001 0 3 2 2 0 0
## M5005_Spy0002 2 2 8 6 1 4
## M5005_Spy0003 4 0 0 1 0 1
## M5005_Spy0004 1 7 768 716 1029 409
## M5005_Spy0005 0 0 2 2 1 1
## M5005_Spy0006 5032 6313 7945 21140 9994 2537
## HPGL0271 HPGL0272 HPGL0273 HPGL0274
## M5005_Spy0001 0 20 1 1
## M5005_Spy0002 0 43 1 1
## M5005_Spy0003 0 0 0 0
## M5005_Spy0004 176 924 261 301
## M5005_Spy0005 39 0 2 0
## M5005_Spy0006 1309 14618 2390 2729
head(pData(experiment_5448))
## sample condition batch strain color
## HPGL0259 HPGL0259 t0 9 5448 yellow4
## HPGL0260 HPGL0260 t1 9 5448 yellow4
## HPGL0261 HPGL0261 t2 9 5448 yellow4
## HPGL0262 HPGL0262 t3 9 5448 yellow4
## HPGL0263 HPGL0263 t0 11 5448 darkgreen
## HPGL0264 HPGL0264 t1 11 5448 darkgreen
## counts
## HPGL0259 data/count_tables/05v0M1l20gen_5448_l1t0_genome.count.gz
## HPGL0260 data/count_tables/05v0M1l20gen_5448_l1t1_genome.count.gz
## HPGL0261 data/count_tables/05v0M1l20gen_5448_l1t2_genome.count.gz
## HPGL0262 data/count_tables/05v0M1l20gen_5448_l1t3_genome.count.gz
## HPGL0263 data/count_tables/05v0M1l20gen_5448_l2t0_genome.count.gz
## HPGL0264 data/count_tables/05v0M1l20gen_5448_l2t1_genome.count.gz
raw_data_5448 = exprs(experiment_5448)
write_xls(raw_data_5448, "raw_data", rowname="row.names", file="excel/5448_data_v0.xls")
gas_rawseq_5448 = FaFile("reference/genbank/mgas_5005.fasta")
cds_entries_5448 = import.gff3("reference/genbank/mgas_5005.gff")
cds_entries_5448 = subset(cds_entries_5448, type=="gene")
names(cds_entries_5448) = rownames(cds_entries_5448)
gas_cds_5448 = getSeq(gas_rawseq_5448, cds_entries_5448)
names(gas_cds_5448) = cds_entries_5448$locus_tag
pattern = "TA"
dict = PDict(pattern, max.mismatch=0)
result_5448 = vcountPDict(dict, gas_cds_5448)
num_tas_5448 = data.frame(name=names(gas_cds_5448), tas=as.data.frame(t(result_5448)))
Wow, so far I only needed to add some library() and remove asRangedData. Interestingly, what is coming has been pulled into hpgltools and standardized.
plot_saturation = function(file) {
## Test arguments
## file = "data/essentiality/mh_ess/5448/tas_t0v0.txt"
## End test arguments
table = read.table(file=file, header=1)
second_file = paste(file, "-inter", sep="")
second_table = read.table(file=second_file, header=1)
table = rbind(table, second_table)
data_list = as.numeric(table$reads)
max_reads = max(data_list, na.rm=TRUE)
log2_data_list = as.numeric(log2(table$reads + 1))
data_plot = my_histogram(log2_data_list, bins=500)
data_plot = data_plot + scale_x_continuous(limits=c(0,6)) + scale_y_continuous(limits=c(0,2))
print(sprintf("The maximum value is: %s ", max_reads))
print(summary(data_list))
raw = table(unlist(data_list))
num_zeros = raw[as.numeric(names(raw)) == 0]
num_zeros
num_gt_ones = raw[as.numeric(names(raw)) >= 1]
num_gt_one = sum(num_gt_ones)
num_gt_one
num_gt_twos = raw[as.numeric(names(raw)) >= 2]
num_gt_two = sum(num_gt_twos)
num_gt_two
num_gt_fours = raw[as.numeric(names(raw)) >= 4]
num_gt_four = sum(num_gt_fours)
num_gt_four
num_gt_eights = raw[as.numeric(names(raw)) >= 8]
num_gt_eight = sum(num_gt_eights)
num_gt_eight
num_gt_sixteens = raw[as.numeric(names(raw)) >= 16]
num_gt_sixteen = sum(num_gt_sixteens)
num_gt_sixteen
num_gt_thirtytwos = raw[as.numeric(names(raw)) >= 32]
num_gt_thirtytwo = sum(num_gt_thirtytwos)
num_gt_thirtytwo
saturation_ratio_1 = num_gt_one / num_zeros
print(sprintf("Saturation ratio of 1s to 0s is: %s", saturation_ratio_1))
saturation_ratio_8 = num_gt_eight / num_zeros
print(sprintf("Saturation ratio of 8 to 0s is: %s", saturation_ratio_8))
saturation_ratio_32 = num_gt_thirtytwo / num_zeros
print(sprintf("Saturation ratio of 32 to 0s is: %s", saturation_ratio_32))
print(sprintf("The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: %s %s %s %s %s %s %s %s %s %s", num_zeros, num_gt_one, num_gt_two, num_gt_four, num_gt_eight, num_gt_sixteen, num_gt_thirtytwo, saturation_ratio_1, saturation_ratio_8, saturation_ratio_32))
return(data_plot)
}
### The following information will be dumped into ta_summary.csv
## Start with the combined libraries
plot_saturation("data/essentiality/mh_ess/5448/tas_t0v0.txt")
## [1] "The maximum value is: 1201324 "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 192 20 1201324
## [1] "Saturation ratio of 1s to 0s is: 0.666616652287533"
## [1] "Saturation ratio of 8 to 0s is: 0.525963714567938"
## [1] "Saturation ratio of 32 to 0s is: 0.338972454580692"
## [1] "The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: 79977 53314 48250 45092 42065 36865 27110 0.666616652287533 0.525963714567938 0.338972454580692"
## Warning: Removed 18489 rows containing non-finite values (stat_bin).
## Warning: Removed 18489 rows containing non-finite values (stat_density).
## Warning: Removed 2 rows containing missing values (geom_bar).
plot_saturation("data/essentiality/mh_ess/5448/tas_t1v0.txt")
## [1] "The maximum value is: 2348282 "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 5 412 67 2348282
## [1] "Saturation ratio of 1s to 0s is: 1.52933697673536"
## [1] "Saturation ratio of 8 to 0s is: 1.18649664123876"
## [1] "Saturation ratio of 32 to 0s is: 0.845060533606589"
## [1] "The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: 52698 80593 74428 69199 62526 54309 44533 1.52933697673536 1.18649664123876 0.845060533606589"
## Warning: Removed 34206 rows containing non-finite values (stat_bin).
## Warning: Removed 34206 rows containing non-finite values (stat_density).
## Warning: Removed 2 rows containing missing values (geom_bar).
plot_saturation("data/essentiality/mh_ess/5448/tas_t2v0.txt")
## [1] "The maximum value is: 2123076 "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 346 16 2123076
## [1] "Saturation ratio of 1s to 0s is: 0.900762923351159"
## [1] "Saturation ratio of 8 to 0s is: 0.612249554367201"
## [1] "Saturation ratio of 32 to 0s is: 0.363065953654189"
## [1] "The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: 70125 63166 56967 51116 42934 33923 25460 0.900762923351159 0.612249554367201 0.363065953654189"
## Warning: Removed 17609 rows containing non-finite values (stat_bin).
## Warning: Removed 17609 rows containing non-finite values (stat_density).
## Warning: Removed 2 rows containing missing values (geom_bar).
plot_saturation("data/essentiality/mh_ess/5448/tas_t3v0.txt")
## [1] "The maximum value is: 6867466 "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 425 0 6867466
## [1] "Saturation ratio of 1s to 0s is: 0.296264599764653"
## [1] "Saturation ratio of 8 to 0s is: 0.182296478551353"
## [1] "Saturation ratio of 32 to 0s is: 0.118898732823091"
## [1] "The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: 102827 30464 24716 21800 18745 15547 12226 0.296264599764653 0.182296478551353 0.118898732823091"
## Warning: Removed 7990 rows containing non-finite values (stat_bin).
## Warning: Removed 7990 rows containing non-finite values (stat_density).
## Warning: Removed 1 rows containing missing values (geom_bar).
plot_saturation("data/essentiality/mh_ess/nz131/tas_t0v0.txt")
## [1] "The maximum value is: 45032 "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 13 10 45032
## [1] "Saturation ratio of 1s to 0s is: 0.487562357891879"
## [1] "Saturation ratio of 8 to 0s is: 0.387552177011572"
## [1] "Saturation ratio of 32 to 0s is: 0.196027194262508"
## [1] "The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: 88401 43101 39254 36826 34260 29125 17329 0.487562357891879 0.387552177011572 0.196027194262508"
## Warning: Removed 6483 rows containing non-finite values (stat_bin).
## Warning: Removed 6483 rows containing non-finite values (stat_density).
## Warning: Removed 1 rows containing missing values (geom_bar).
plot_saturation("data/essentiality/mh_ess/nz131/tas_t1v0.txt")
## [1] "The maximum value is: 658647 "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 9 166 55 658647
## [1] "Saturation ratio of 1s to 0s is: 1.77993404363267"
## [1] "Saturation ratio of 8 to 0s is: 1.45317520717064"
## [1] "Saturation ratio of 32 to 0s is: 0.932606122103839"
## [1] "The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: 47304 84198 79307 75717 68741 57308 44116 1.77993404363267 1.45317520717064 0.932606122103839"
## Warning: Removed 30281 rows containing non-finite values (stat_bin).
## Warning: Removed 30281 rows containing non-finite values (stat_density).
## Warning: Removed 1 rows containing missing values (geom_bar).
plot_saturation("data/essentiality/mh_ess/nz131/tas_t2v0.txt")
## [1] "The maximum value is: 2287096 "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 84 2 2287096
## [1] "Saturation ratio of 1s to 0s is: 0.573197430283889"
## [1] "Saturation ratio of 8 to 0s is: 0.170429123449258"
## [1] "Saturation ratio of 32 to 0s is: 0.0511670195839165"
## [1] "The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: 83589 47913 35787 23800 14246 7879 4277 0.573197430283889 0.170429123449258 0.0511670195839165"
## Warning: Removed 2374 rows containing non-finite values (stat_bin).
## Warning: Removed 2374 rows containing non-finite values (stat_density).
## Warning: Removed 2 rows containing missing values (geom_bar).
plot_saturation("data/essentiality/mh_ess/nz131/tas_t3v0.txt")
## [1] "The maximum value is: 13835514 "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 282 0 13835514
## [1] "Saturation ratio of 1s to 0s is: 0.155157722748794"
## [1] "Saturation ratio of 8 to 0s is: 0.055350099702211"
## [1] "Saturation ratio of 32 to 0s is: 0.020160050597774"
## [1] "The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: 113839 17663 11487 8993 6301 3587 2295 0.155157722748794 0.055350099702211 0.020160050597774"
## Warning: Removed 1579 rows containing non-finite values (stat_bin).
## Warning: Removed 1579 rows containing non-finite values (stat_density).
## Warning: Removed 1 rows containing missing values (geom_bar).
plot_saturation("data/essentiality/mh_ess/alabama/tas_t0v0.txt")
## [1] "The maximum value is: 3253458 "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 93 0 3253458
## [1] "Saturation ratio of 1s to 0s is: 0.0646378350548637"
## [1] "Saturation ratio of 8 to 0s is: 0.0312851948386266"
## [1] "Saturation ratio of 32 to 0s is: 0.0230958586736171"
## [1] "The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: 124308 8035 5194 4433 3889 3340 2871 0.0646378350548637 0.0312851948386266 0.0230958586736171"
## Warning: Removed 2425 rows containing non-finite values (stat_bin).
## Warning: Removed 2425 rows containing non-finite values (stat_density).
## Warning: Removed 2 rows containing missing values (geom_bar).
plot_saturation("data/essentiality/mh_ess/alabama/tas_t1v0.txt")
## [1] "The maximum value is: 983836 "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 89 0 983836
## [1] "Saturation ratio of 1s to 0s is: 0.0528815554989817"
## [1] "Saturation ratio of 8 to 0s is: 0.024296715885947"
## [1] "Saturation ratio of 32 to 0s is: 0.0177571283095723"
## [1] "The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: 125696 6647 4152 3474 3054 2612 2232 0.0528815554989817 0.024296715885947 0.0177571283095723"
## Warning: Removed 1916 rows containing non-finite values (stat_bin).
## Warning: Removed 1916 rows containing non-finite values (stat_density).
## Warning: Removed 2 rows containing missing values (geom_bar).
plot_saturation("data/essentiality/mh_ess/alabama/tas_t2v0.txt")
## [1] "The maximum value is: 4300087 "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 43 0 4300087
## [1] "Saturation ratio of 1s to 0s is: 0.0474648979785668"
## [1] "Saturation ratio of 8 to 0s is: 0.0128773368369398"
## [1] "Saturation ratio of 32 to 0s is: 0.00768524527883748"
## [1] "The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: 126346 5997 2623 1926 1627 1299 971 0.0474648979785668 0.0128773368369398 0.00768524527883748"
## Warning: Removed 691 rows containing non-finite values (stat_bin).
## Warning: Removed 691 rows containing non-finite values (stat_density).
## Warning: Removed 1 rows containing missing values (geom_bar).
plot_saturation("data/essentiality/mh_ess/alabama/tas_t3v0.txt")
## [1] "The maximum value is: 5482386 "
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 62 0 5482386
## [1] "Saturation ratio of 1s to 0s is: 0.10607516861539"
## [1] "Saturation ratio of 8 to 0s is: 0.0133304360180859"
## [1] "Saturation ratio of 32 to 0s is: 0.00526531328614053"
## [1] "The number of zeros, ones, twos, fours, eights, sixteens, thirtytwos, saturation1, saturation8, saturation32 are: 119651 12692 5253 2654 1595 971 630 0.10607516861539 0.0133304360180859 0.00526531328614053"
## Warning: Removed 422 rows containing non-finite values (stat_bin).
## Warning: Removed 422 rows containing non-finite values (stat_density).
## Warning: Removed 1 rows containing missing values (geom_bar).
qqplot.data <- function (vec) {
### following four lines from base R's qqline()
y <- quantile(vec[!is.na(vec)], c(0.25, 0.75))
x <- qnorm(c(0.25, 0.75))
slope <- diff(y)/diff(x)
int <- y[1L] - slope * x[1L]
d <- data.frame(resids = vec)
ggplot(d, aes(sample = resids)) + stat_qq() + geom_abline(slope = slope, intercept = int)
}
ggQQ = function(lm) {
# extract standardized residuals from the fit
d <- data.frame(std.resid = rstandard(lm))
# calculate 1Q/4Q line
y <- quantile(d$std.resid[!is.na(d$std.resid)], c(0.25, 0.75))
x <- qnorm(c(0.25, 0.75))
slope <- diff(y)/diff(x)
int <- y[1L] - slope * x[1L]
p <- ggplot(data=d, aes(sample=std.resid)) +
stat_qq(shape=1, size=3) + # open circles
labs(title="Normal Q-Q", # plot title
x="Theoretical Quantiles", # x-axis label
y="Standardized Residuals") + # y-axis label
geom_abline(slope = slope, intercept = int, linetype="dashed") # dashed reference line
return(p)
}
## This section wants the output from essentiality_tas.pl, which I deleted...
##qq_test = time0_tas[,c("forward","reverse")]
##qq_test$forward = log2(qq_test$forward + 1)
##qq_test$reverse = log2(qq_test$reverse + 1)
##qqplot(time0_tas_5448$forward, time1_tas_5448$forward)
##qqplot(qq_test$forward, qq_test$reverse)
##qq_df = data.frame(t0=time0_tas_5448$forward, t1=time1_tas_5448$forward)
####params = as.list(fitdistr(qq_df, "t")$estimate)
##qqplot.data(qq_df)
##ggQQ(lm(t0~t1,data=qq_df))
##ggQQ(lm(forward~reverse, data=qq_test))
These plotting functions are certain to fail, as they have all been rewritten heavily.
all_expt_5448 = expt_subset(experiment_5448, "")
all_design_5448 = data.frame(all_expt_5448$design)
all_design_5448$longnames = paste(all_design_5448$strain, all_design_5448$batch, all_design_5448$condition, sep="_")
metrics_5448 = graph_metrics(all_expt_5448, out_type="cpm", norm_type="quant", filter="log2", cormethod="spearman", names=all_design_5448$longnames)
metrics_5448$norm_pcaplot
head(exprs(all_expt_5448$expressionset))
head(gene_annotations_5448)
library(hpgltools)
colnames(all_design_5448)[[1]] <- "sampleid"
tt <- plot_pca(all_expt_5448, design=all_design_5448)
correlations_5448 = my_cor(exprs(all_expt_5448$expressionset), method="spearman")
write_xls(correlations_5448, "sp_cor_prenorm", rowname="row.names", file="excel/5448_data_v0.xls")
library(cbcbSEQ)
test2_5448 = my_norm(expt=all_expt_5448, out_type="cp_seq_m", norm_type="quant", filter="none", fasta="reference/genbank/mgas_5005.fasta", gff="reference/genbank/mgas_5005.gff")
head(test2_5448$counts)
id_gene_annotations_5448 = gene_annotations_5448
rownames(id_gene_annotations_5448) = make.names(id_gene_annotations_5448$locus_tag, unique=TRUE)
no_time3_5448 = expt_subset(all_expt_5448, "condition!='t3'")
notime3_metrics_5448 = graph_metrics(no_time3_5448, out_type="rpkm", norm_type="quant", filter="log2", cormethod="spearman", annotations=id_gene_annotations_5448)
notime3_metrics_5448$norm_corheat
notime3_metrics_5448$norm_pcaplot
all_qrpkm_5448 = my_norm(expt=all_expt_5448, norm="quant", filter="log2", filter_low=FALSE, out_type="rpkm", annotations=id_gene_annotations_5448)$counts
##all_qrpkm = my_norm(expt=all_expt, norm="quant", filter="log2", filter_low=FALSE, out_type="rpkm", annotations=id_gene_annotations)$counts
write_xls(all_qrpkm_5448, "norm_data", file="excel/5448_data_v0.xls", rowname="row.names")
all_norm_expt_5448 = all_expt_5448
correlations_5448 = my_cor(all_qrpkm_5448, method="spearman")
write_xls(correlations_5448, "sp_cor_norm", rowname="row.names", file="excel/5448_data_v0.xls")
## I did that so that I can replace the expressionset with the
## normalized data more easily...
normalized_expressionset_5448 = all_norm_expt_5448$expressionset
exprs(normalized_expressionset_5448) = all_qrpkm_5448
all_norm_expt_5448$expressionset = normalized_expressionset_5448
my_boxplot(expt=all_norm_expt_5448, names=all_design_5448$longnames)
my_disheat(expt=all_norm_expt_5448, names=all_design_5448$longnames, row="condition")
my_corheat(expt=all_norm_expt_5448, names=all_design_5448$longnames, row="condition")
In order to redo the above work, I need to generate a valid expressionset. Sadly, my first efforts at this were imperfect – not terrible or anything, but incomplete.
library(hpgltools)
meta <- pData(experiment_5448)
new_5448 <- create_expt(metadata=pData(experiment_5448),
gene_info=annotations_5448$genes,
count_dataframe=exprs(experiment_5448),
sample_column="sample")
## Reading the sample metadata.
## The sample definitions comprises: 16, 7 rows, columns.
## Matched 1926 annotations and counts.
## Bringing together the count matrix and gene information.
new_norm <- normalize_expt(new_5448, normalize="quant", convert="cpm",
filter=TRUE, transform="log2")
## This function will replace the expt$expressionset slot with:
## log2(cpm(hpgl(data)))
## It backs up the current data into a slot named:
## expt$backup_expressionset. It will also save copies of each step along the way
## in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
## when invoking limma. The appropriate libsize is the non-log(cpm(normalized)).
## This is most likely kept at:
## 'new_expt$normalized$intermediate_counts$normalization$libsizes'
## A copy of this may also be found at:
## new_expt$best_libsize
## Leaving the data unnormalized. This is necessary for DESeq, but
## EdgeR/limma might benefit from normalization. Good choices include quantile,
## size-factor, tmm, etc.
## Not correcting the count-data for batch effects. If batch is
## included in EdgerR/limma's model, then this is probably wise; but in extreme
## batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: hpgl
## Removing 273 low-count genes (1653 remaining).
## Step 2: not normalizing the data.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 3984 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
new_metrics <- hpgltools::graph_metrics(new_5448)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## The scale difference between the smallest and largest
## libraries is > 10. Assuming a log10 scale is better, set scale=FALSE if not.
## Graphing a boxplot.
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, adding 1 to the data.
## Changed 7426 zero count features.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0. We are on log scale, setting them to 0.5.
## Changed 7426 zero count features.
## Plotting the representation of the top-n genes.
## Printing a color to condition legend.
new_norm_metrics <- hpgltools::graph_metrics(new_norm)
## Graphing number of non-zero genes with respect to CPM by library.
## Graphing library sizes.
## Graphing a boxplot.
## Graphing a correlation heatmap.
## Graphing a standard median correlation.
## Performing correlation.
## Graphing a distance heatmap.
## Graphing a standard median distance.
## Performing distance.
## Graphing a PCA plot.
## Graphing a T-SNE plot.
## Plotting a density plot.
## Plotting the representation of the top-n genes.
## Printing a color to condition legend.
new_metrics$libsize
new_metrics$nonzero
new_norm_metrics$pcaplot
new_norm_metrics$tsneplot
## Looks like I can plot pretty much anything for this data once again, yay!
## I am not sure what are good things to plot anymore though, so I might leave
## it at this unless I get to chat with Yoann and see what would be useful to
## revisit.
## Compare library 1, t0 t1
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t0')|(batch=='9' & condition=='t1')")$expressionset)
pdf(file="figures/comparisons/intertime/lib1t0_vs_lib1t1_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l1t0t1v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 <- subset_expt(new_norm,
subset="(batch=='9' & condition=='t0')|(batch=='9' & condition=='t1')")
data <- exprs(comp_5448)
sc <- plot_linear_scatter(data)
## Used Bon Ferroni corrected t test(s) between columns.
sc$scatter
sc$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 90, df = 1700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9032 0.9195
## sample estimates:
## cor
## 0.9117
## Compare library 1, t0 t2
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t0')|(batch=='9' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/intertime/lib1t0_vs_lib1t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l1t0t2v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 <- subset_expt(new_norm,
subset="(batch=='9' & condition=='t0')|(batch=='9' & condition=='t2')")
data <- exprs(comp_5448)
sc <- plot_linear_scatter(data)
## Used Bon Ferroni corrected t test(s) between columns.
sc$scatter
sc$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 63, df = 1700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8250 0.8535
## sample estimates:
## cor
## 0.8398
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t0')|(batch=='9' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/intertime/lib1t0_vs_lib1t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l1t0t3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 <- subset_expt(new_norm,
subset="(batch=='9' & condition=='t0')|(batch=='9' & condition=='t3')")
data <- exprs(comp_5448)
sc <- plot_linear_scatter(data)
## Used Bon Ferroni corrected t test(s) between columns.
sc$scatter
sc$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 44, df = 1700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.715 0.759
## sample estimates:
## cor
## 0.7378
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t1')|(batch=='9' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/intertime/lib1t1_vs_lib1t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l1t1t2v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
png(file="figures/l1t1t2_hist.png")
sc$both_histogram + scale_y_continuous(limits=c(0,0.25))
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 <- subset_expt(new_norm,
subset="(batch=='9' & condition=='t1')|(batch=='9' & condition=='t2')")
data <- exprs(comp_5448)
sc <- plot_linear_scatter(data)
## Used Bon Ferroni corrected t test(s) between columns.
sc$scatter
sc$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 120, df = 1700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9433 0.9530
## sample estimates:
## cor
## 0.9484
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t1')|(batch=='9' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/intertime/lib1t1_vs_lib1t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l1t1t3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 <- subset_expt(new_norm,
subset="(batch=='9' & condition=='t1')|(batch=='9' & condition=='t3')")
data <- exprs(comp_5448)
sc <- plot_linear_scatter(data)
## Used Bon Ferroni corrected t test(s) between columns.
sc$scatter
sc$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 53, df = 1700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7721 0.8083
## sample estimates:
## cor
## 0.7909
Ok, so I demonstrated I can remake all my old plots, but I am not sure how much I care beyond that. I think that demonstrates that I can in fact reproduce my old results.
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t2')|(batch=='9' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/intertime/lib1t2_vs_lib1t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l1t2t3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t0')|(batch=='11' & condition=='t1')")$expressionset)
pdf(file="figures/comparisons/intertime/lib2t0_vs_lib2t1_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l2t0t1v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t0')|(batch=='11' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/intertime/lib2t0_vs_lib2t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l2t0t2v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t0')|(batch=='11' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/intertime/lib2t0_vs_lib2t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l2t0t3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t1')|(batch=='11' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/intertime/lib2t1_vs_lib2t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l2t1t2v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t1')|(batch=='11' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/intertime/lib2t1_vs_lib2t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l2t1t3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t2')|(batch=='11' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/intertime/lib2t2_vs_lib2t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l2t2t3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='12' & condition=='t0')|(batch=='12' & condition=='t1')")$expressionset)
pdf(file="figures/comparisons/intertime/lib3t0_vs_lib3t1_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l3t0t1v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='12' & condition=='t0')|(batch=='12' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/intertime/lib3t0_vs_lib3t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l3t0t2v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='12' & condition=='t0')|(batch=='12' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/intertime/lib3t0_vs_lib3t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l3t0t3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='12' & condition=='t1')|(batch=='12' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/intertime/lib3t1_vs_lib3t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l3t1t2v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='12' & condition=='t1')|(batch=='12' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/intertime/lib3t1_vs_lib3t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l3t1t3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='12' & condition=='t2')|(batch=='12' & condition=='t3')")$expressionset)
## Error in expt_subset(all_norm_expt_5448, "(batch=='12' & condition=='t2')|(batch=='12' & condition=='t3')"): object 'all_norm_expt_5448' not found
pdf(file="figures/comparisons/intertime/lib3t2_vs_lib3t3_scatter_v0.pdf")
## Error in Cairo(width, height, file, "pdf", pointsize = pointsize, bg = bg, : could not find function "Cairo"
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l3t2t3v0.html")
## Error in df[, c(1, 2)]: incorrect number of dimensions
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
## $plot
##
## $data_summary
## first second
## Min. : 0.00 Min. : 0.000
## 1st Qu.: 5.94 1st Qu.: 0.331
## Median : 7.74 Median : 3.899
## Mean : 7.18 Mean : 3.483
## 3rd Qu.: 8.85 3rd Qu.: 5.360
## Max. :16.91 Max. :19.467
##
## $uncor_t
##
## Pairwise comparisons using t tests with pooled SD
##
## data: play_all[["expression"]] and play_all[["cond"]]
##
## first
## second <2e-16
##
## P value adjustment method: none
##
## $bon_t
##
## Pairwise comparisons using t tests with pooled SD
##
## data: play_all[["expression"]] and play_all[["cond"]]
##
## first
## second <2e-16
##
## P value adjustment method: bonferroni
dev.off()
## X11cairo
## 2
sc$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 53, df = 1700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7721 0.8083
## sample estimates:
## cor
## 0.7909
sc$lm_model
##
## Call:
## robustbase::lmrob(formula = second ~ first, data = df, method = "SMDM")
##
## Coefficients:
## (Intercept) first
## -2.472 0.833
sc$lm_summary
##
## Call:
## robustbase::lmrob(formula = second ~ first, data = df, method = "SMDM")
## \--> method = "SMDM"
## Residuals:
## Min 1Q Median 3Q Max
## -4.869 -0.935 0.140 0.948 7.860
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.4715 0.1184 -20.9 <2e-16 ***
## first 0.8327 0.0156 53.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Robust residual standard error: 1.53
## Multiple R-squared: 0.641, Adjusted R-squared: 0.641
## Convergence in 8 IRWLS iterations
##
## Robustness weights:
## 1176 weights are ~= 1. The remaining 477 ones are summarized as
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.067 0.763 0.895 0.831 0.966 0.999
## Algorithmic parameters:
## tuning.chi1 tuning.chi2 tuning.chi3 tuning.chi4
## -0.5000000000000 1.5000000000000 NA 0.5000000000000
## bb tuning.psi1 tuning.psi2 tuning.psi3
## 0.5000000000000 -0.5000000000000 1.5000000000000 0.9500000000000
## tuning.psi4 refine.tol rel.tol solve.tol
## NA 0.0000001000000 0.0000001000000 0.0000001000000
## eps.outlier eps.x warn.limit.reject warn.limit.meanrw
## 0.0000604960678 0.0000000000308 0.5000000000000 0.5000000000000
## nResample max.it best.r.s k.fast.s k.max
## 500 50 2 1 200
## maxit.scale trace.lev mts compute.rd numpoints
## 200 0 1000 0 10
## fast.s.large.n
## 2000
## psi subsampling cov
## "lqq" "nonsingular" ".vcov.w"
## compute.outlier.stats
## "SMDM"
## seed : int(0)
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='34' & condition=='t0')|(batch=='34' & condition=='t1')")$expressionset)
## Error in expt_subset(all_norm_expt_5448, "(batch=='34' & condition=='t0')|(batch=='34' & condition=='t1')"): object 'all_norm_expt_5448' not found
pdf(file="figures/comparisons/intertime/lib4t0_vs_lib4t1_scatter_v0.pdf")
## Error in Cairo(width, height, file, "pdf", pointsize = pointsize, bg = bg, : could not find function "Cairo"
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l4t0t1v0.html")
## Error in df[, c(1, 2)]: incorrect number of dimensions
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
## $plot
##
## $data_summary
## first second
## Min. : 0.00 Min. : 0.000
## 1st Qu.: 5.94 1st Qu.: 0.331
## Median : 7.74 Median : 3.899
## Mean : 7.18 Mean : 3.483
## 3rd Qu.: 8.85 3rd Qu.: 5.360
## Max. :16.91 Max. :19.467
##
## $uncor_t
##
## Pairwise comparisons using t tests with pooled SD
##
## data: play_all[["expression"]] and play_all[["cond"]]
##
## first
## second <2e-16
##
## P value adjustment method: none
##
## $bon_t
##
## Pairwise comparisons using t tests with pooled SD
##
## data: play_all[["expression"]] and play_all[["cond"]]
##
## first
## second <2e-16
##
## P value adjustment method: bonferroni
dev.off()
## X11cairo
## 2
sc$correlation
##
## Pearson's product-moment correlation
##
## data: df[, 1] and df[, 2]
## t = 53, df = 1700, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7721 0.8083
## sample estimates:
## cor
## 0.7909
sc$lm_model
##
## Call:
## robustbase::lmrob(formula = second ~ first, data = df, method = "SMDM")
##
## Coefficients:
## (Intercept) first
## -2.472 0.833
sc$lm_summary
##
## Call:
## robustbase::lmrob(formula = second ~ first, data = df, method = "SMDM")
## \--> method = "SMDM"
## Residuals:
## Min 1Q Median 3Q Max
## -4.869 -0.935 0.140 0.948 7.860
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.4715 0.1184 -20.9 <2e-16 ***
## first 0.8327 0.0156 53.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Robust residual standard error: 1.53
## Multiple R-squared: 0.641, Adjusted R-squared: 0.641
## Convergence in 8 IRWLS iterations
##
## Robustness weights:
## 1176 weights are ~= 1. The remaining 477 ones are summarized as
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.067 0.763 0.895 0.831 0.966 0.999
## Algorithmic parameters:
## tuning.chi1 tuning.chi2 tuning.chi3 tuning.chi4
## -0.5000000000000 1.5000000000000 NA 0.5000000000000
## bb tuning.psi1 tuning.psi2 tuning.psi3
## 0.5000000000000 -0.5000000000000 1.5000000000000 0.9500000000000
## tuning.psi4 refine.tol rel.tol solve.tol
## NA 0.0000001000000 0.0000001000000 0.0000001000000
## eps.outlier eps.x warn.limit.reject warn.limit.meanrw
## 0.0000604960678 0.0000000000308 0.5000000000000 0.5000000000000
## nResample max.it best.r.s k.fast.s k.max
## 500 50 2 1 200
## maxit.scale trace.lev mts compute.rd numpoints
## 200 0 1000 0 10
## fast.s.large.n
## 2000
## psi subsampling cov
## "lqq" "nonsingular" ".vcov.w"
## compute.outlier.stats
## "SMDM"
## seed : int(0)
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='34' & condition=='t0')|(batch=='34' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/intertime/lib4t0_vs_lib4t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l4t0t2v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='34' & condition=='t0')|(batch=='34' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/intertime/lib4t0_vs_lib4t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l4t0t3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='34' & condition=='t1')|(batch=='34' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/intertime/lib4t1_vs_lib4t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l4t1t2v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='34' & condition=='t1')|(batch=='34' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/intertime/lib4t1_vs_lib4t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l4t1t3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='34' & condition=='t2')|(batch=='34' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/intertime/lib4t2_vs_lib4t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/l4t2t3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
## Compare t0, lib1 lib2
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t0')|(batch=='11' & condition=='t0')")$expressionset)
pdf(file="figures/comparisons/interlib/lib1t0_vs_lib2t0_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t0l1l2v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t0')|(batch=='12' & condition=='t0')")$expressionset)
pdf(file="figures/comparisons/interlib/lib1t0_vs_lib3t0_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t0l1l3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t0')|(batch=='34' & condition=='t0')")$expressionset)
pdf(file="figures/comparisons/interlib/lib1t0_vs_lib4t0_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t0l1l4v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t0')|(batch=='12' & condition=='t0')")$expressionset)
pdf(file="figures/comparisons/interlib/lib2t0_vs_lib3t0_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t0l2l3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t0')|(batch=='34' & condition=='t0')")$expressionset)
pdf(file="figures/comparisons/interlib/lib2t0_vs_lib4t0_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t0l2l4v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='12' & condition=='t0')|(batch=='34' & condition=='t0')")$expressionset)
pdf(file="figures/comparisons/interlib/lib3t0_vs_lib4t0_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t0l3l4v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t1')|(batch=='11' & condition=='t1')")$expressionset)
pdf(file="figures/comparisons/interlib/lib1t1_vs_lib2t1_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t1l1l2v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t1')|(batch=='12' & condition=='t1')")$expressionset)
pdf(file="figures/comparisons/interlib/lib1t1_vs_lib3t1_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t1l1l3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t1')|(batch=='34' & condition=='t1')")$expressionset)
pdf(file="figures/comparisons/interlib/lib1t1_vs_lib4t1_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t1l1l4v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t1')|(batch=='12' & condition=='t1')")$expressionset)
pdf(file="figures/comparisons/interlib/lib2t1_vs_lib3t1_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t1l2l3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t1')|(batch=='34' & condition=='t1')")$expressionset)
pdf(file="figures/comparisons/interlib/lib2t1_vs_lib4t1_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t1l2l4v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='12' & condition=='t1')|(batch=='34' & condition=='t1')")$expressionset)
pdf(file="figures/comparisons/interlib/lib3t1_vs_lib4t1_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t1l3l4v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t2')|(batch=='11' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/interlib/lib1t2_vs_lib2t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t2l1l2v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t2')|(batch=='12' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/interlib/lib1t2_vs_lib3t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t2l1l3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t2')|(batch=='34' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/interlib/lib1t2_vs_lib4t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t2l1l4v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t2')|(batch=='12' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/interlib/lib2t2_vs_lib3t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t2l2l3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t2')|(batch=='34' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/interlib/lib2t2_vs_lib4t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t2l2l4v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='12' & condition=='t2')|(batch=='34' & condition=='t2')")$expressionset)
pdf(file="figures/comparisons/interlib/lib3t2_vs_lib4t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t2l3l4v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t3')|(batch=='11' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/interlib/lib1t3_vs_lib2t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t3l1l2v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t3')|(batch=='12' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/interlib/lib1t3_vs_lib3t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t3l1l3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='9' & condition=='t3')|(batch=='34' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/interlib/lib1t3_vs_lib4t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t3l1l4v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t3')|(batch=='12' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/interlib/lib2t3_vs_lib3t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t3l2l3v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='11' & condition=='t3')|(batch=='34' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/interlib/lib2t3_vs_lib4t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t3l2l4v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
comp_5448 = exprs(expt_subset(all_norm_expt_5448, "(batch=='12' & condition=='t3')|(batch=='34' & condition=='t3')")$expressionset)
pdf(file="figures/comparisons/interlib/lib3t3_vs_lib4t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_5448, cormethod="spearman", gvis_filename="html/t3l3l4v0.html")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
I want to do some simple voom/limma comparisons of the times: 1. t0 to t1 2. t0 to t2 3. t0 to t3 4. t1 to t2 5. t1 to t3 6. t2 to t3
comp_5448 = expt_subset(all_norm_expt_5448, "condition=='t0' | condition=='t1'")
t0t1 = simple_comparison(comp_5448, sheet="t0t1", workbook="excel/5448_comparisons_v0.xls", basename="html/t0t1v0", tooltip_data=tooltip_data_5448)
pdf(file="figures/comparisons/combined_libs/t0t1v0.pdf")
t0t1$contrast_histogram
t0t1$amean_histogram
t0t1$pvalue_histogram
t0t1$ma_plot
t0t1$volcano_plot
png(file="figures/combined_t0t1v0.png")
t0t1$coefficient_scatter
dev.off()
t0t1$coefficient_x
t0t1$coefficient_y
t0t1$coefficient_both
dev.off()
comp_5448 = expt_subset(all_norm_expt_5448, "condition=='t0' | condition=='t2'")
t0t2 = simple_comparison(comp_5448, sheet="t0t2", workbook="excel/5448_comparisons_v0.xls", basename="html/t0t2v0", tooltip_data=tooltip_data_5448)
pdf(file="figures/comparisons/combined_libs/t0t2v0.pdf")
t0t2$contrast_histogram
t0t2$amean_histogram
t0t2$pvalue_histogram
t0t2$ma_plot
t0t2$volcano_plot
t0t2$coefficient_scatter
t0t2$coefficient_x
t0t2$coefficient_y
t0t2$coefficient_both
dev.off()
comp_5448 = expt_subset(all_norm_expt_5448, "condition=='t0' | condition=='t3'")
t0t3 = simple_comparison(comp_5448, sheet="t0t3", workbook="excel/5448_comparisons_v0.xls", basename="html/t0t3v0", tooltip_data=tooltip_data_5448)
pdf(file="figures/comparisons/combined_libs/t0t3v0.pdf")
t0t3$contrast_histogram
t0t3$amean_histogram
t0t3$pvalue_histogram
t0t3$ma_plot
t0t3$volcano_plot
t0t3$coefficient_scatter
t0t3$coefficient_x
t0t3$coefficient_y
t0t3$coefficient_both
dev.off()
comp_5448 = expt_subset(all_norm_expt_5448, "condition=='t1' | condition=='t2'")
t1t2 = simple_comparison(comp_5448, sheet="t1t2", workbook="excel/5448_comparisons_v0.xls", basename="html/t1t2v0", tooltip_data=tooltip_data_5448)
##t1t2 = simple_comparison(comp_5448, sheet="t1t2", workbook="excel/5448_comparisons_v0.xls")
pdf(file="figures/comparisons/combined_libs/t1t2v0.pdf")
t1t2$contrast_histogram
t1t2$amean_histogram
t1t2$pvalue_histogram
t1t2$ma_plot
t1t2$volcano_plot
t1t2$coefficient_scatter
t1t2$coefficient_x
t1t2$coefficient_y
t1t2$coefficient_both
dev.off()
comp_5448 = expt_subset(all_norm_expt_5448, "condition=='t1' | condition=='t3'")
t1t3 = simple_comparison(comp_5448, sheet="t1t3", workbook="excel/5448_comparisons_v0.xls", basename="html/t1t3v0", tooltip_data=tooltip_data_5448)
pdf(file="figures/comparisons/combined_libs/t1t3v0.pdf")
t1t3$contrast_histogram
t1t3$amean_histogram
t1t3$pvalue_histogram
t1t3$ma_plot
t1t3$volcano_plot
t1t3$coefficient_scatter
t1t3$coefficient_x
t1t3$coefficient_y
t1t3$coefficient_both
dev.off()
comp_5448 = expt_subset(all_norm_expt_5448, "condition=='t2' | condition=='t3'")
t2t3 = simple_comparison(comp_5448, sheet="t2t3", workbook="excel/5448_comparisons_v0.xls", basename="html/t2t3", tooltip_data=tooltip_data_5448)
pdf(file="figures/comparisons/combined_libs/t2t3v0.pdf")
t2t3$contrast_histogram
t2t3$amean_histogram
t2t3$pvalue_histogram
t2t3$ma_plot
t2t3$volcano_plot
t2t3$coefficient_scatter
t2t3$coefficient_x
t2t3$coefficient_y
t2t3$coefficient_both
dev.off()
It seems to me that those genes which go up/down in hits from times 1 to 2 are likely of interest Once simple task to perform is a simple goseq analysis of them.
```{r t1t2goseq, eval=FALSE summary(t1t2$downsignificant) head(annotation_5448_info) head(microbes_5448)
up_significant_table01 = subset(t0t1\(upsignificant, logFC > 0.8) up_significant_table01\)ID = rownames(up_significant_table01) down_significant_table01 = subset(t0t1\(downsignificant, logFC < -0.8) down_significant_table01\)ID = rownames(down_significant_table01)
up_significant_table02 = subset(t0t2\(upsignificant, logFC > 0.8) up_significant_table02\)ID = rownames(up_significant_table02) down_significant_table02 = subset(t0t2\(downsignificant, logFC < -0.8) down_significant_table02\)ID = rownames(down_significant_table02)
up_significant_table03 = subset(t0t3\(upsignificant, logFC > 0.8) up_significant_table03\)ID = rownames(up_significant_table03) down_significant_table03 = subset(t0t3\(downsignificant, logFC < -0.8) down_significant_table03\)ID = rownames(down_significant_table03)
up_significant_table12 = subset(t1t2\(upsignificant, logFC > 0.8) up_significant_table12\)ID = rownames(up_significant_table12) down_significant_table12 = subset(t1t2\(downsignificant, logFC < -0.8) down_significant_table12\)ID = rownames(down_significant_table12)
up_significant_table13 = subset(t1t3\(upsignificant, logFC > 0.8) up_significant_table13\)ID = rownames(up_significant_table13) down_significant_table13 = subset(t1t3\(downsignificant, logFC < -0.8) down_significant_table13\)ID = rownames(down_significant_table13)
gene_lengths_5448 = annotation_5448_info[,c(“locus_tag”,“width”)] colnames(gene_lengths_5448) = c(“ID”,“width”) ## The third item colnames(microbes_go_5448) = c(“ID”,“GO”) microbes_go_5448\(ID = gsub("Spy_", "Spy", microbes_go_5448\)ID)
up_go01 = dirty_go(up_significant_table01, lengths=gene_lengths_5448, goids=microbes_go_5448, adjust=NULL, organism=“gas_5005”, include_cnetplots=FALSE) up_go01\(pvalue_plot up_go01\)mf_group_barplot up_go01\(mf_enriched_barplot down_go01 = dirty_go(down_significant_table01, lengths=gene_lengths_5448, goids=microbes_go_5448, adjust=NULL, organism="gas_5005") down_go01\)pvalue_plot down_go01\(mf_group_barplot down_go01\)mf_enriched_barplot
up_go02 = dirty_go(up_significant_table02, lengths=gene_lengths_5448, goids=microbes_go_5448, adjust=NULL, organism=“gas_5005”, include_cnetplots=FALSE) up_go02\(pvalue_plot up_go02\)mf_group_barplot up_go02\(mf_enriched_barplot down_go02 = dirty_go(down_significant_table02, lengths=gene_lengths_5448, goids=microbes_go_5448, adjust=NULL, organism="gas_5005") down_go02\)pvalue_plot down_go02\(mf_group_barplot down_go02\)mf_enriched_barplot
up_go03 = dirty_go(up_significant_table03, lengths=gene_lengths_5448, goids=microbes_go_5448, adjust=NULL, organism=“gas_5005”, include_cnetplots=FALSE) up_go03\(pvalue_plot up_go03\)mf_group_barplot up_go03\(mf_enriched_barplot down_go03 = dirty_go(down_significant_table03, lengths=gene_lengths_5448, goids=microbes_go_5448, adjust=NULL, organism="gas_5005") down_go03\)pvalue_plot down_go03\(mf_group_barplot down_go03\)mf_enriched_barplot
up_go12 = dirty_go(up_significant_table12, lengths=gene_lengths_5448, goids=microbes_go_5448, adjust=NULL, organism=“gas_5005”, include_cnetplots=FALSE) summary(up_go12) up_go12\(pvalue_plot up_go12\)mf_group_barplot up_go12\(mf_enriched_barplot down_go12 = dirty_go(down_significant_table12, lengths=gene_lengths_5448, goids=microbes_go_5448, adjust=NULL, organism="gas_5005") down_go12\)pvalue_plot down_go12\(mf_group_barplot down_go12\)mf_enriched_barplot
up_go13 = dirty_go(up_significant_table13, lengths=gene_lengths_5448, goids=microbes_go_5448, adjust=NULL, organism=“gas_5005”, include_cnetplots=FALSE) summary(up_go13) up_go13\(pvalue_plot up_go13\)mf_group_barplot up_go13\(mf_enriched_barplot down_go13 = dirty_go(down_significant_table13, lengths=gene_lengths_5448, goids=microbes_go_5448, adjust=NULL, organism="gas_5005") down_go13\)pvalue_plot down_go13\(mf_group_barplot down_go13\)mf_enriched_barplot ```
plot_essentiality = function(file) {
ess = read.csv(file=file, comment.char="#", sep="\t", header=FALSE)
colnames(ess) = c("gene","orf_hits","orf_tas","max_run","max_run_span","posterior_zbar","call")
ess = ess[with(ess, order(posterior_zbar)), ]
ess = subset(ess, posterior_zbar > -1)
ess = transform(ess, rank=ave(posterior_zbar, FUN=function(x) order(x,decreasing=FALSE)))
zbar_plot = ggplot(data=ess, aes(x=rank, y=posterior_zbar)) +
geom_point(stat="identity", size=2) +
geom_hline(color="grey", yintercept=0.0371) +
geom_hline(color="grey", yintercept=0.9902) +
theme_bw()
span_df = ess[,c("max_run","max_run_span")]
span_plot = my_linear_scatter(span_df)
returns = list(zbar=zbar_plot, scatter=span_plot$scatter)
return(returns)
}
plot_essentiality("data/essentiality/mh_ess/5448/t0v0_essentiality_m1.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t1v0_essentiality_m1.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t2v0_essentiality_m1.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t3v0_essentiality_m1.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t0v0_essentiality_m2.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t1v0_essentiality_m2.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t2v0_essentiality_m2.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t3v0_essentiality_m2.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t0v0_essentiality_m4.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t1v0_essentiality_m4.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t2v0_essentiality_m4.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t3v0_essentiality_m4.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t0v0_essentiality_m8.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t1v0_essentiality_m8.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t2v0_essentiality_m8.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t3v0_essentiality_m8.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t0v0_essentiality_m16.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t1v0_essentiality_m16.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t2v0_essentiality_m16.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t3v0_essentiality_m16.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t0v0_essentiality_m32.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t1v0_essentiality_m32.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t2v0_essentiality_m32.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t3v0_essentiality_m32.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t0v0_essentiality_m1.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t1v0_essentiality_m1.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t2v0_essentiality_m1.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t3v0_essentiality_m1.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t0v0_essentiality_m2.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t1v0_essentiality_m2.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t2v0_essentiality_m2.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t3v0_essentiality_m2.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t0v0_essentiality_m4.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t1v0_essentiality_m4.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t2v0_essentiality_m4.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t3v0_essentiality_m4.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t0v0_essentiality_m8.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t1v0_essentiality_m8.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t2v0_essentiality_m8.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t3v0_essentiality_m8.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t0v0_essentiality_m16.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t1v0_essentiality_m16.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t2v0_essentiality_m16.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t3v0_essentiality_m16.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t0v0_essentiality_m32.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t1v0_essentiality_m32.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t2v0_essentiality_m32.csv")
plot_essentiality("data/essentiality/mh_ess/nz131/t3v0_essentiality_m32.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t0v0_essentiality_m2.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t1v0_essentiality_m2.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t2v0_essentiality_m2.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t3v0_essentiality_m2.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t0v0_essentiality_m4.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t1v0_essentiality_m4.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t2v0_essentiality_m4.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t3v0_essentiality_m4.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t0v0_essentiality_m8.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t1v0_essentiality_m8.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t2v0_essentiality_m8.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t3v0_essentiality_m8.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t0v0_essentiality_m16.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t1v0_essentiality_m16.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t2v0_essentiality_m16.csv")
plot_essentiality("data/essentiality/mh_ess/5448/t3v0_essentiality_m16.csv")
zbars_5448 = c("data/essentiality/mh_ess/5448/t0v0_essentiality_m1.csv", "data/essentiality/mh_ess/5448/t0v0_essentiality_m2.csv", "data/essentiality/mh_ess/5448/t0v0_essentiality_m4.csv", "data/essentiality/mh_ess/5448/t0v0_essentiality_m8.csv", "data/essentiality/mh_ess/5448/t0v0_essentiality_m16.csv", "data/essentiality/mh_ess/5448/t0v0_essentiality_m32.csv")
zbar_df = data.frame()
count = 0
for (z in zbars_5448) {
count = count + 1
ess = read.csv(file=z, comment.char="#", sep="\t", header=FALSE)
colnames(ess) = c("gene","orf_hits","orf_tas","max_run","max_run_span","posterior_zbar","call")
print(summary(ess$posterior_zbar))
ess = ess[with(ess, order(posterior_zbar)), ]
ess = subset(ess, posterior_zbar > -1)
ess = transform(ess, rank=ave(posterior_zbar, FUN=function(x) order(x,decreasing=FALSE)))
if (count == 1) {
zbar_df = ess[,c("rank","posterior_zbar")]
print(head(zbar_df))
} else {
zbar_df[, z] = as.vector(ess$posterior_zbar)
}
}
colnames(zbar_df) = c("rank","one","two","four","eight","sixteen","thirtytwo")
ggplot(data=zbar_df) +
geom_point(stat="identity", colour="yellow", size=2, aes(x=rank, y=one)) +
geom_point(stat="identity", colour="orange", size=2, aes(x=rank, y=two)) +
geom_point(stat="identity", colour="red", size=2, aes(x=rank, y=four)) +
geom_point(stat="identity", colour="green", size=2, aes(x=rank, y=eight)) +
geom_point(stat="identity", colour="blue", size=2, aes(x=rank, y=sixteen)) +
geom_point(stat="identity", colour="black", size=2, aes(x=rank, y=thirtytwo)) +
geom_hline(color="grey", yintercept=0.0371) +
geom_hline(color="grey", yintercept=0.9902) +
theme_bw()
zbars_5448 = c("data/essentiality/mh_ess/5448/t1v0_essentiality_m1.csv", "data/essentiality/mh_ess/5448/t1v0_essentiality_m2.csv", "data/essentiality/mh_ess/5448/t1v0_essentiality_m4.csv", "data/essentiality/mh_ess/5448/t1v0_essentiality_m8.csv", "data/essentiality/mh_ess/5448/t1v0_essentiality_m16.csv","data/essentiality/mh_ess/5448/t1v0_essentiality_m32.csv")
zbar_df = data.frame()
count = 0
for (z in zbars_5448) {
count = count + 1
ess = read.csv(file=z, comment.char="#", sep="\t", header=FALSE)
colnames(ess) = c("gene","orf_hits","orf_tas","max_run","max_run_span","posterior_zbar","call")
print(summary(ess$posterior_zbar))
ess = ess[with(ess, order(posterior_zbar)), ]
ess = subset(ess, posterior_zbar > -1)
ess = transform(ess, rank=ave(posterior_zbar, FUN=function(x) order(x,decreasing=FALSE)))
if (count == 1) {
zbar_df = ess[,c("rank","posterior_zbar")]
print(head(zbar_df))
} else {
zbar_df[, z] = as.vector(ess$posterior_zbar)
}
}
colnames(zbar_df) = c("rank","one","two","four","eight","sixteen", "thirtytwo")
ggplot(data=zbar_df) +
geom_point(stat="identity", colour="yellow", size=2, aes(x=rank, y=one)) +
geom_point(stat="identity", colour="orange", size=2, aes(x=rank, y=two)) +
geom_point(stat="identity", colour="red", size=2, aes(x=rank, y=four)) +
geom_point(stat="identity", colour="green", size=2, aes(x=rank, y=eight)) +
geom_point(stat="identity", colour="blue", size=2, aes(x=rank, y=sixteen)) +
geom_point(stat="identity", colour="black", size=2, aes(x=rank, y=thirtytwo)) +
geom_hline(color="grey", yintercept=0.0371) +
geom_hline(color="grey", yintercept=0.9902) +
theme_bw()
zbars_5448 = c("data/essentiality/mh_ess/5448/t2v0_essentiality_m2.csv", "data/essentiality/mh_ess/5448/t2v0_essentiality_m4.csv", "data/essentiality/mh_ess/5448/t2v0_essentiality_m8.csv", "data/essentiality/mh_ess/5448/t2v0_essentiality_m16.csv")
zbar_df = data.frame()
count = 0
for (z in zbars_5448) {
count = count + 1
ess = read.csv(file=z, comment.char="#", sep="\t", header=FALSE)
colnames(ess) = c("gene","orf_hits","orf_tas","max_run","max_run_span","posterior_zbar","call")
print(summary(ess$posterior_zbar))
ess = ess[with(ess, order(posterior_zbar)), ]
ess = subset(ess, posterior_zbar > -1)
ess = transform(ess, rank=ave(posterior_zbar, FUN=function(x) order(x,decreasing=FALSE)))
if (count == 1) {
zbar_df = ess[,c("rank","posterior_zbar")]
print(head(zbar_df))
} else {
zbar_df[, z] = as.vector(ess$posterior_zbar)
}
}
colnames(zbar_df) = c("rank","two","four","eight","sixteen")
ggplot(data=zbar_df) +
geom_point(stat="identity", colour="red", size=2, aes(x=rank, y=two)) +
geom_point(stat="identity", colour="blue", size=2, aes(x=rank, y=four)) +
geom_point(stat="identity", colour="black", size=2, aes(x=rank, y=eight)) +
geom_point(stat="identity", colour="green", size=2, aes(x=rank, y=sixteen)) +
geom_hline(color="grey", yintercept=0.0371) +
geom_hline(color="grey", yintercept=0.9902) +
theme_bw()
zbars_5448 = c("data/essentiality/mh_ess/5448/t3v0_essentiality_m2.csv", "data/essentiality/mh_ess/5448/t3v0_essentiality_m4.csv", "data/essentiality/mh_ess/5448/t3v0_essentiality_m8.csv", "data/essentiality/mh_ess/5448/t3v0_essentiality_m16.csv")
zbar_df = data.frame()
count = 0
for (z in zbars_5448) {
count = count + 1
ess = read.csv(file=z, comment.char="#", sep="\t", header=FALSE)
colnames(ess) = c("gene","orf_hits","orf_tas","max_run","max_run_span","posterior_zbar","call")
print(summary(ess$posterior_zbar))
ess = ess[with(ess, order(posterior_zbar)), ]
ess = subset(ess, posterior_zbar > -1)
ess = transform(ess, rank=ave(posterior_zbar, FUN=function(x) order(x,decreasing=FALSE)))
if (count == 1) {
zbar_df = ess[,c("rank","posterior_zbar")]
print(head(zbar_df))
} else {
zbar_df[, z] = as.vector(ess$posterior_zbar)
}
}
colnames(zbar_df) = c("rank","two","four","eight","sixteen")
ggplot(data=zbar_df) +
geom_point(stat="identity", colour="red", size=2, aes(x=rank, y=two)) +
geom_point(stat="identity", colour="blue", size=2, aes(x=rank, y=four)) +
geom_point(stat="identity", colour="black", size=2, aes(x=rank, y=eight)) +
geom_point(stat="identity", colour="green", size=2, aes(x=rank, y=sixteen)) +
geom_hline(color="grey", yintercept=0.0371) +
geom_hline(color="grey", yintercept=0.9902) +
theme_bw()
## Repeat for NZ131
zbars_nz131 = c("data/essentiality/mh_ess/nz131/t0v0_essentiality_m1.csv", "data/essentiality/mh_ess/nz131/t0v0_essentiality_m2.csv", "data/essentiality/mh_ess/nz131/t0v0_essentiality_m4.csv", "data/essentiality/mh_ess/nz131/t0v0_essentiality_m8.csv", "data/essentiality/mh_ess/nz131/t0v0_essentiality_m16.csv", "data/essentiality/mh_ess/nz131/t0v0_essentiality_m32.csv")
zbar_df = data.frame()
count = 0
for (z in zbars_nz131) {
count = count + 1
ess = read.csv(file=z, comment.char="#", sep="\t", header=FALSE)
colnames(ess) = c("gene","orf_hits","orf_tas","max_run","max_run_span","posterior_zbar","call")
print(summary(ess$posterior_zbar))
ess = ess[with(ess, order(posterior_zbar)), ]
ess = subset(ess, posterior_zbar > -1)
ess = transform(ess, rank=ave(posterior_zbar, FUN=function(x) order(x,decreasing=FALSE)))
if (count == 1) {
zbar_df = ess[,c("rank","posterior_zbar")]
print(head(zbar_df))
} else {
zbar_df[, z] = as.vector(ess$posterior_zbar)
}
}
colnames(zbar_df) = c("rank","one","two","four","eight","sixteen","thirtytwo")
ggplot(data=zbar_df) +
geom_point(stat="identity", colour="yellow", size=2, aes(x=rank, y=one)) +
geom_point(stat="identity", colour="orange", size=2, aes(x=rank, y=two)) +
geom_point(stat="identity", colour="red", size=2, aes(x=rank, y=four)) +
geom_point(stat="identity", colour="green", size=2, aes(x=rank, y=eight)) +
geom_point(stat="identity", colour="blue", size=2, aes(x=rank, y=sixteen)) +
geom_point(stat="identity", colour="black", size=2, aes(x=rank, y=thirtytwo)) +
geom_hline(color="grey", yintercept=0.0371) +
geom_hline(color="grey", yintercept=0.9902) +
theme_bw()
zbars_nz131 = c("data/essentiality/mh_ess/nz131/t1v0_essentiality_m1.csv", "data/essentiality/mh_ess/nz131/t1v0_essentiality_m2.csv", "data/essentiality/mh_ess/nz131/t1v0_essentiality_m4.csv", "data/essentiality/mh_ess/nz131/t1v0_essentiality_m8.csv", "data/essentiality/mh_ess/nz131/t1v0_essentiality_m16.csv", "data/essentiality/mh_ess/nz131/t1v0_essentiality_m32.csv")
zbar_df = data.frame()
count = 0
for (z in zbars_nz131) {
count = count + 1
ess = read.csv(file=z, comment.char="#", sep="\t", header=FALSE)
colnames(ess) = c("gene","orf_hits","orf_tas","max_run","max_run_span","posterior_zbar","call")
print(summary(ess$posterior_zbar))
ess = ess[with(ess, order(posterior_zbar)), ]
ess = subset(ess, posterior_zbar > -1)
ess = transform(ess, rank=ave(posterior_zbar, FUN=function(x) order(x,decreasing=FALSE)))
if (count == 1) {
zbar_df = ess[,c("rank","posterior_zbar")]
print(head(zbar_df))
} else {
zbar_df[, z] = as.vector(ess$posterior_zbar)
}
}
colnames(zbar_df) = c("rank","one","two","four","eight","sixteen","thirtytwo")
ggplot(data=zbar_df) +
geom_point(stat="identity", colour="yellow", size=2, aes(x=rank, y=one)) +
geom_point(stat="identity", colour="orange", size=2, aes(x=rank, y=two)) +
geom_point(stat="identity", colour="red", size=2, aes(x=rank, y=four)) +
geom_point(stat="identity", colour="green", size=2, aes(x=rank, y=eight)) +
geom_point(stat="identity", colour="blue", size=2, aes(x=rank, y=sixteen)) +
geom_point(stat="identity", colour="black", size=2, aes(x=rank, y=thirtytwo)) +
geom_hline(color="grey", yintercept=0.0371) +
geom_hline(color="grey", yintercept=0.9902) +
theme_bw()
annotations_nz = import.gff3("reference/genbank/mgas_nz131.gff", asRangedData=FALSE)
annotation_info_nz = as.data.frame(annotations_nz)
rownames(annotation_info_nz) = make.names(annotations_nz$locus_tag, unique=TRUE)
genes_nz = annotation_info_nz[annotation_info_nz$type=="gene",]
gene_annotations_nz = subset(genes_nz, select = c("start", "end", "width", "strand", "gene", "locus_tag"))
short_annotations_nz = gene_annotations_nz[,c("gene", "locus_tag")]
write_xls(gene_annotations_nz, "annotations", rowname="ID", file="excel/nz131_data_v0.xls")
microbes_nz = read.csv(file="reference/microbesonline/nz131_annotations.tab.gz", header=1, sep="\t")
microbes_go_nz = microbes_nz[,c("sysName","GO")]
go_entries_nz = strsplit(as.character(microbes_go_nz$GO), split=",", perl=TRUE)
microbes_go_oneperrow_nz = data.frame(name = rep(microbes_go_nz$sysName, sapply(go_entries_nz, length)), GO = unlist(go_entries_nz))
microbes_go_nz = microbes_go_oneperrow_nz
rm(microbes_go_oneperrow_nz)
rm(go_entries_nz)
## These are used for gene ontology stuff...
microbes_lengths_nz = microbes_nz[,c("sysName", "start","stop")]
microbes_lengths_nz$length = abs(microbes_nz$start - microbes_nz$stop)
microbes_lengths_nz = microbes_lengths_nz[,c("sysName","length")]
tooltip_data_nz = annotation_info_nz
## Error in eval(expr, envir, enclos): object 'annotation_info_nz' not found
tooltip_data_nz = tooltip_data_nz[,c("gene", "locus_tag")]
## Error in eval(expr, envir, enclos): object 'tooltip_data_nz' not found
tooltip_data_nz$tooltip = paste(tooltip_data_nz$gene, tooltip_data_nz$locus_tag, sep=": ")
## Error in eval(quote(list(...)), env): object 'tooltip_data_nz' not found
tooltip_data_nz$tooltip = gsub("\\+", " ", tooltip_data_nz$tooltip)
## Error in gsub("\\+", " ", tooltip_data_nz$tooltip): object 'tooltip_data_nz' not found
head(tooltip_data_nz)
## Error in head(tooltip_data_nz): object 'tooltip_data_nz' not found
## The all_samples.csv controls the experimental settings
## all the following lines manipulate the information therein
## in order to set up media types, replicates, etc
sample_definitions_nz = read.csv(file="all_samples_nz131_v0.csv", sep=",")
## If I have summary lines in the csv, they will not start with 'HPGL' and so should be dropped.
sample_definitions_nz = sample_definitions_nz[grepl('^HPGL', sample_definitions_nz$Sample.ID, perl=TRUE),]
## Pre set the color scheme
sample_definitions_nz$colors = library_colors
## This long statement just writes out a computer path name containing the count files to read by sample name.
sample_definitions_nz$counts = paste("data/count_tables/05v0M1l20gen_", sample_definitions_nz$Strain, "_", sample_definitions_nz$Time, "_genome.count.gz", sep="")
sample_definitions_nz = as.data.frame(sample_definitions_nz)
rownames(sample_definitions_nz) = sample_definitions_nz$Sample.ID
## my_read_files does just that, it reads the count tables and makes a large raw count table from them.
all_count_tables_nz = my_read_files(as.character(sample_definitions_nz$Sample.ID), as.character(sample_definitions_nz$counts))
## make it into a matrix for use as an expressionset
all_count_matrix_nz = as.matrix(all_count_tables_nz)
gene_info_nz = all_count_matrix_nz[rownames(all_count_matrix_nz) %in% annotations_nz$locus_tag,]
## Error in rownames(all_count_matrix_nz) %in% annotations_nz$locus_tag: object 'annotations_nz' not found
all_count_matrix_nz = all_count_matrix_nz[rownames(all_count_matrix_nz) %in% annotations_nz$locus_tag,]
## Error in rownames(all_count_matrix_nz) %in% annotations_nz$locus_tag: object 'annotations_nz' not found
metadata_nz = new("AnnotatedDataFrame", data.frame(sample=sample_definitions_nz$Sample.ID,
condition=sample_definitions_nz$Time,
batch=sample_definitions_nz$Library,
strain=sample_definitions_nz$Strain,
color=as.character(sample_definitions_nz$colors),
counts=sample_definitions_nz$counts))
## Now generate the expressionset object
sampleNames(metadata_nz) = colnames(all_count_matrix_nz)
feature_data_nz = new("AnnotatedDataFrame", as.data.frame(gene_info_nz))
## Error in value[[3L]](cond): object 'gene_info_nz' not found
## AnnotatedDataFrame 'initialize' could not update varMetadata:
## perhaps pData and varMetadata are inconsistent?
featureNames(feature_data_nz) = rownames(all_count_matrix_nz)
## Error in featureNames(feature_data_nz) = rownames(all_count_matrix_nz): object 'feature_data_nz' not found
experiment_nz = new("ExpressionSet", exprs=all_count_matrix_nz,
phenoData=metadata_nz, featureData=feature_data_nz)
## Error in is(object, expected): object 'feature_data_nz' not found
## print some information to see that it worked
print(experiment_nz)
## Error in print(experiment_nz): object 'experiment_nz' not found
summary(exprs(experiment_nz))
## Error in exprs(experiment_nz): object 'experiment_nz' not found
head(fData(experiment_nz))
## Error in fData(experiment_nz): object 'experiment_nz' not found
head(pData(experiment_nz))
## Error in pData(experiment_nz): object 'experiment_nz' not found
dim(fData(experiment_nz))
## Error in fData(experiment_nz): object 'experiment_nz' not found
raw_data_nz = exprs(experiment_nz)
## Error in exprs(experiment_nz): object 'experiment_nz' not found
write_xls(raw_data_nz, "raw_data", file="excel/nz131_data_v0.xls")
## Error in writeWorksheet(xls, data, sheet = sheet): object 'raw_data_nz' not found
all_expt_nz = expt_subset(experiment_nz, "")
head(exprs(all_expt_nz$expressionset))
all_qrpkm_nz = my_norm(expt=all_expt_nz, norm="quant", filter="log2", filter_low=FALSE, out_type="rpkm", annotations=annotation_info_nz)$counts
write_xls(all_qrpkm_nz, "norm_data", file="excel/nz131_data_v0.xls", rowname="row.names")
all_norm_expt_nz = all_expt_nz
## I did that so that I can replace the expressionset with the
## normalized data more easily...
normalized_expressionset_nz = all_norm_expt_nz$expressionset
exprs(normalized_expressionset_nz) = all_qrpkm_nz
all_norm_expt_nz$expressionset = normalized_expressionset_nz
all_design_nz = data.frame(all_norm_expt_nz$design)
all_design_nz$longnames = paste(all_design_nz$strain, all_design_nz$batch, all_design_nz$condition, sep="_")
my_boxplot(expt=all_norm_expt_nz, names=all_design_nz$longnames)
my_disheat(expt=all_norm_expt_nz, names=all_design_nz$longnames)
my_corheat(expt=all_norm_expt_nz, names=all_design_nz$longnames)
correlations_nz = my_cor(exprs(all_expt_nz$expressionset), method="spearman")
write_xls(correlations_nz, "sp_cor", file="excel/nz131_data_v0.xls")
## Compare library t0 t1
comp_nz = exprs(expt_subset(all_norm_expt_nz, "condition=='t0'|condition=='t1'")$expressionset)
pdf(file="figures/comparisons/nz131/t0_vs_t1_scatter_v0.pdf")
sc = my_linear_scatter(comp_nz, cormethod="spearman")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
## Compare library t0 t2
comp_nz = exprs(expt_subset(all_norm_expt_nz, "condition=='t0'|condition=='t2'")$expressionset)
pdf(file="figures/comparisons/nz131/t0_vs_t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_nz, cormethod="spearman")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
## Compare library t0 t3
comp_nz = exprs(expt_subset(all_norm_expt_nz, "condition=='t0'|condition=='t3'")$expressionset)
pdf(file="figures/comparisons/nz131/t0_vs_t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_nz, cormethod="spearman")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
## Compare library t1 t2
comp_nz = exprs(expt_subset(all_norm_expt_nz, "condition=='t1'|condition=='t2'")$expressionset)
pdf(file="figures/comparisons/nz131/t1_vs_t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_nz, cormethod="spearman")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
## Compare library t1 t3
comp_nz = exprs(expt_subset(all_norm_expt_nz, "condition=='t1'|condition=='t3'")$expressionset)
pdf(file="figures/comparisons/nz131/t1_vs_t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_nz, cormethod="spearman")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
## Compare library t2 t3
comp_nz = exprs(expt_subset(all_norm_expt_nz, "condition=='t2'|condition=='t3'")$expressionset)
pdf(file="figures/comparisons/nz131/t2_vs_t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_nz, cormethod="spearman")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
annotations_ala = import.gff3("reference/genbank/mgas_alabama.gff", asRangedData=FALSE)
annotation_info_ala = as.data.frame(annotations_ala)
rownames(annotation_info_ala) = make.names(annotations_ala$locus_tag, unique=TRUE)
genes_ala = annotation_info_ala[annotation_info_ala$type=="gene",]
gene_annotations_ala = subset(genes_ala, select = c("start", "end", "width", "strand", "gene", "locus_tag"))
short_annotations_ala = gene_annotations_ala[,c("gene", "locus_tag")]
write_xls(gene_annotations_ala, "annotations", rowname="ID", file="excel/alabama_data_v0.xls")
##microbes_ala = read.csv(file="reference/microbesonline/alabama_annotations.tab.gz", header=1, sep="\t")
##microbes_go_ala = microbes_ala[,c("sysName","GO")]
##go_entries_ala = strsplit(as.character(microbes_go_ala$GO), split=",", perl=TRUE)
##microbes_go_oneperrow_ala = data.frame(name = rep(microbes_go_ala$sysName, sapply(go_entries_ala, length)), GO = unlist(go_entries_ala))
##microbes_go_ala = microbes_go_oneperrow_ala
##rm(microbes_go_oneperrow_ala)
##rm(go_entries_ala)
## These are used for gene ontology stuff...
##microbes_lengths_ala = microbes_ala[,c("sysName", "start","stop")]
##microbes_lengths_ala$length = abs(microbes_ala$start - microbes_ala$stop)
##microbes_lengths_ala = microbes_lengths_ala[,c("sysName","length")]
tooltip_data_ala = annotation_info_ala
tooltip_data_ala = tooltip_data_ala[,c("gene", "locus_tag")]
tooltip_data_ala$tooltip = paste(tooltip_data_ala$gene, tooltip_data_ala$locus_tag, sep=": ")
tooltip_data_ala$tooltip = gsub("\\+", " ", tooltip_data_ala$tooltip)
head(tooltip_data_ala)
## The all_samples.csv controls the experimental settings
## all the following lines manipulate the information therein
## in order to set up media types, replicates, etc
sample_definitions_ala = read.csv(file="all_samples_alabama_v0.csv", sep=",")
## If I have summary lines in the csv, they will not start with 'HPGL' and so should be dropped.
sample_definitions_ala = sample_definitions_ala[grepl('^HPGL', sample_definitions_ala$Sample.ID, perl=TRUE),]
## Pre set the color scheme
sample_definitions_ala$colors = library_colors
## This long statement just writes out a computer path name containing the count files to read by sample name.
sample_definitions_ala$counts = paste("data/count_tables/05v0M1l20gen_", "alab49", "_", sample_definitions_ala$Time, "_genome.count.gz", sep="")
sample_definitions_ala = as.data.frame(sample_definitions_ala)
rownames(sample_definitions_ala) = sample_definitions_ala$Sample.ID
## my_read_files does just that, it reads the count tables and makes a large raw count table from them.
all_count_tables_ala = my_read_files(as.character(sample_definitions_ala$Sample.ID), as.character(sample_definitions_ala$counts))
## make it into a matrix for use as an expressionset
all_count_matrix_ala = as.matrix(all_count_tables_ala)
gene_info_ala = all_count_matrix_ala[rownames(all_count_matrix_ala) %in% annotations_ala$locus_tag,]
all_count_matrix_ala = all_count_matrix_ala[rownames(all_count_matrix_ala) %in% annotations_ala$locus_tag,]
metadata_ala = new("AnnotatedDataFrame", data.frame(sample=sample_definitions_ala$Sample.ID,
condition=sample_definitions_ala$Time,
batch=sample_definitions_ala$Library,
strain=sample_definitions_ala$Strain,
color=as.character(sample_definitions_ala$colors),
counts=sample_definitions_ala$counts))
## Now generate the expressionset object
sampleNames(metadata_ala) = colnames(all_count_matrix_ala)
feature_data_ala = new("AnnotatedDataFrame", as.data.frame(gene_info_ala))
featureNames(feature_data_ala) = rownames(all_count_matrix_ala)
experiment_ala = new("ExpressionSet", exprs=all_count_matrix_ala,
phenoData=metadata_ala, featureData=feature_data_ala)
## print some information to see that it worked
print(experiment_ala)
summary(exprs(experiment_ala))
head(fData(experiment_ala))
head(pData(experiment_ala))
dim(fData(experiment_ala))
raw_data_ala = exprs(experiment_ala)
write_xls(raw_data_ala, "raw_data", file="excel/alabama_data_v0.xls")
all_expt_ala = expt_subset(experiment_ala, "")
head(exprs(all_expt_ala$expressionset))
all_qrpkm_ala = my_norm(expt=all_expt_ala, norm="quant", filter="log2", filter_low=FALSE, out_type="rpkm", annotations=annotation_info_ala)$counts
write_xls(all_qrpkm_ala, "norm_data", file="excel/alabama_data_v0.xls", rowname="row.names")
all_norm_expt_ala = all_expt_ala
## I did that so that I can replace the expressionset with the
## normalized data more easily...
normalized_expressionset_ala = all_norm_expt_ala$expressionset
exprs(normalized_expressionset_ala) = all_qrpkm_ala
all_norm_expt_ala$expressionset = normalized_expressionset_ala
all_design_ala = data.frame(all_norm_expt_ala$design)
all_design_ala$longnames = paste(all_design_ala$strain, all_design_ala$batch, all_design_ala$condition, sep="_")
my_boxplot(expt=all_norm_expt_ala, names=all_design_ala$longnames)
my_disheat(expt=all_norm_expt_ala, names=all_design_ala$longnames)
my_corheat(expt=all_norm_expt_ala, names=all_design_ala$longnames)
correlations_ala = my_cor(exprs(all_expt_ala$expressionset), method="spearman")
write_xls(correlations_ala, "sp_cor", file="excel/alabama_data_v0.xls")
## Compare library t0 t1
comp_ala = exprs(expt_subset(all_norm_expt_ala, "condition=='t0'|condition=='t1'")$expressionset)
pdf(file="figures/comparisons/alabama/t0_vs_t1_scatter_v0.pdf")
sc = my_linear_scatter(comp_ala, cormethod="spearman")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
## Compare library t0 t2
comp_ala = exprs(expt_subset(all_norm_expt_ala, "condition=='t0'|condition=='t2'")$expressionset)
pdf(file="figures/comparisons/alabama/t0_vs_t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_ala, cormethod="spearman")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
## Compare library t0 t3
comp_ala = exprs(expt_subset(all_norm_expt_ala, "condition=='t0'|condition=='t3'")$expressionset)
pdf(file="figures/comparisons/alabama/t0_vs_t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_ala, cormethod="spearman")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
## Compare library t1 t2
comp_ala = exprs(expt_subset(all_norm_expt_ala, "condition=='t1'|condition=='t2'")$expressionset)
pdf(file="figures/comparisons/alabama/t1_vs_t2_scatter_v0.pdf")
sc = my_linear_scatter(comp_ala, cormethod="spearman")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
## Compare library t1 t3
comp_ala = exprs(expt_subset(all_norm_expt_ala, "condition=='t1'|condition=='t3'")$expressionset)
pdf(file="figures/comparisons/alabama/t1_vs_t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_ala, cormethod="spearman")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
## Compare library t2 t3
comp_ala = exprs(expt_subset(all_norm_expt_ala, "condition=='t2'|condition=='t3'")$expressionset)
pdf(file="figures/comparisons/alabama/t2_vs_t3_scatter_v0.pdf")
sc = my_linear_scatter(comp_ala, cormethod="spearman")
sc$scatter
sc$x_histogram
sc$y_histogram
sc$both_histogram
dev.off()
sc$correlation
sc$lm_model
sc$lm_summary
all_tas = read.table(file="data/essentiality/mh_ess/5448/tas_t0v0.txt", header=TRUE)
all_tas = all_tas[,c("Start", "reads", "TAs")]
head(all_tas)
by_thousands = group_by(all_tas, floor(Start / 1000))
setnames(by_thousands, "floor(Start/1000)", "thousands")
## The last row is broken
by_thousands = by_thousands[-nrow(by_thousands),]
by_thousands = aggregate(. ~ thousands, data=by_thousands, FUN=sum)
by_thousands = as.data.frame(by_thousands)
by_thousands = by_thousands[,c("thousands","reads", "TAs")]
by_thousands$log_reads = log2(by_thousands$reads + 1)
by_thousands$norm = by_thousands$reads / by_thousands$TAs
by_thousands$log_norm = log2(by_thousands$norm + 1)
scatter_df = by_thousands[,c("thousands","log_reads")]
thousands_plot = my_scatter(scatter_df)
thousands_plot + stat_smooth(method="loess", size=1.5)
scatter_df = by_thousands[,c("thousands","log_norm")]
thousands_plot = my_scatter(scatter_df)
thousands_plot + stat_smooth(method="loess", size=1.5)
make_circos = function(csv, annotations=gene_annotations_5448, offset=0, chr="chr1") {
## Testing parameters
##write.table(make_circos("data/essentiality/mh_ess/nz131/time0_essentiality_m2.csv", gene_annotations_nz), file="circos/data/essentiality_nz131_t0.txt", quote=FALSE, row.names=FALSE, col.names=FALSE)
#csv = "data/essentiality/mh_ess/nz131/t0v0_essentiality_m2.csv"
#annotations = gene_annotations_nz
## End testing parameters
data = read.csv(file=csv, sep="\t", comment.char="#", header=FALSE)
colnames(data) = c("gene","orf_hits","orf_tas","max_run","max_run_span","posterior_zbar","call")
data$gene = gsub("^cds_", "", data$gene, perl=TRUE)
rownames(data) = make.names(data$gene, unique=TRUE)
data = merge(data, annotations, by="row.names")
data[, chr ] = chr
data = data[,c(chr, "start", "end", "posterior_zbar")]
data$posterior_zbar = paste("value=", data$posterior_zbar, sep="")
data$start = data$start + offset
data$end = data$end + offset
return(data)
}
write.table(make_circos("data/essentiality/mh_ess/5448/t0v0_essentiality_m4.csv", gene_annotations_5448), file="circos/data/essentiality_5448_t1.txt", quote=FALSE, row.names=FALSE, col.names=FALSE)
write.table(make_circos("data/essentiality/mh_ess/5448/t1v0_essentiality_m4.csv", gene_annotations_5448), file="circos/data/essentiality_5448_t1.txt", quote=FALSE, row.names=FALSE, col.names=FALSE)
write.table(make_circos("data/essentiality/mh_ess/5448/t2v0_essentiality_m4.csv", gene_annotations_5448), file="circos/data/essentiality_5448_t2.txt", quote=FALSE, row.names=FALSE, col.names=FALSE)
write.table(make_circos("data/essentiality/mh_ess/5448/t3v0_essentiality_m4.csv", gene_annotations_5448), file="circos/data/essentiality_5448_t3.txt", quote=FALSE, row.names=FALSE, col.names=FALSE)
write.table(make_circos("data/essentiality/mh_ess/nz131/t0v0_essentiality_m2.csv", gene_annotations_nz), file="circos/data/essentiality_nz131_t0.txt", quote=FALSE, row.names=FALSE, col.names=FALSE)
write.table(make_circos("data/essentiality/mh_ess/nz131/t1v0_essentiality_m2.csv", gene_annotations_nz), file="circos/data/essentiality_nz131_t1.txt", quote=FALSE, row.names=FALSE, col.names=FALSE)
write.table(make_circos("data/essentiality/mh_ess/nz131/t2v0_essentiality_m2.csv", gene_annotations_nz), file="circos/data/essentiality_nz131_t2.txt", quote=FALSE, row.names=FALSE, col.names=FALSE)
write.table(make_circos("data/essentiality/mh_ess/nz131/t3v0_essentiality_m2.csv", gene_annotations_nz), file="circos/data/essentiality_nz131_t3.txt", quote=FALSE, row.names=FALSE, col.names=FALSE)
gas_plus = read.table(file="circos/data/gas_plus.txt")
nz131_plus = read.table(file="circos/data/nz131_plus.txt")
gas_minus = read.table(file="circos/data/gas_minus.txt")
nz131_minus = read.table(file="circos/data/nz131_minus.txt")
nz131_plus_offset = nz131_plus
colnames(nz131_plus_offset) = c("chr","start","end","value")
nz131_plus_offset$chr = "chr2"
nz131_minus_offset = nz131_minus
colnames(nz131_minus_offset) = c("chr","start","end","value")
nz131_minus_offset$chr = "chr2"
write.table(nz131_plus_offset, file="circos/data/nz131_offset_plus.txt", quote=FALSE, row.names=FALSE, col.names=FALSE)
write.table(nz131_minus_offset, file="circos/data/nz131_offset_minus.txt", quote=FALSE, row.names=FALSE, col.names=FALSE)
essential_cross = read.csv(file="reference/core_genome/reduced.csv")
head(gene_annotations_5448)
head(gene_annotations_nz)
dim(essential_cross)
essential_cross$Spy_5448 = gsub("_Spy_", "_Spy", essential_cross$Spy_5448)
essential_cross = merge(essential_cross, gene_annotations_5448, by.x="Spy_5448", by.y="row.names", all.x=TRUE)
essential_cross = essential_cross[,c("Spy_5448","State_5448","Spy_NZ131","State_NZ131","start","end")]
setnames(essential_cross, "start", "5448_start")
setnames(essential_cross, "end", "5448_end")
essential_cross = merge(essential_cross, gene_annotations_nz, by.x="Spy_NZ131", by.y="row.names", all.x=TRUE)
essential_cross = essential_cross[,c("Spy_5448","State_5448","Spy_NZ131","State_NZ131","5448_start","5448_end","start","end")]
setnames(essential_cross, "start", "nz131_start")
setnames(essential_cross, "end", "nz131_end")
essential_cross = essential_cross[complete.cases(essential_cross),]
print_arc = function(x) {
cat(x[5], " chr1 ", x[1], " ", x[2], "\n", x[5], " chr2 ", x[3], " ", x[4], "\n\n", file="circos/data/5448_nz131_arcs.txt", append=TRUE, sep="")
}
apply(essential_cross[,c("5448_start","5448_end","nz131_start","nz131_end","Spy_5448")], 1, print_arc)
if (!isTRUE(get0("skip_load"))) {
pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))
}
## If you wish to reproduce this exact build of hpgltools, invoke the following:
## > git clone http://github.com/abelew/hpgltools.git
## > git reset 5d8c266e48bb9f73cdac8300e5c7c9f5baf003dc
## R> packrat::restore()
## This is hpgltools commit: Wed Mar 21 15:55:32 2018 -0400: 5d8c266e48bb9f73cdac8300e5c7c9f5baf003dc
## Saving to tnseq_v0.rmd-v201605.rda.xz