Upon completion of the following tasks, I modified the sample sheet for the panamensis samples to include 5 new columns. They are named according to the clustering colors in the plot at the end of this file and named as follows:
One of the comments in our most recent meeting was to try making use of the bioconductor vcf/bcf libraries rather than my abbreviated ‘percentage’ metric. Barring that, at least include a filter on the extant set of percentages to include only those >90%.
I will attempt some of these ideas at the end of this document under the heading ‘variantAnnotation’, as that is the library used.
In an attempt to precisely quantify the genotypes of all the parasite samples in this work, the following script was applied to all samples (now included in CYOA):
samtools sort -l 9 -@ 4 ${query} outputs/${new} 2>outputs/samtools_sort.out 1>&2
if [ \!-r "${genome}.fai" ]; then
samtools faidx ${genome}
fi
samtools mpileup -u -f ${genome} outputs/${new}.bam 2>outputs/${new}_pileup.err |\
bcftools view -bvcg - 1>outputs/${name}_raw.bcf 2>outputs/${new}_bcf.err &&\
bcftools view outputs/${name}_raw.bcf |\
vcfutils_hacked.pl varFilter -d10 1>outputs/${name}_filtered.vcf 2>outputs/${new}_filter.err
It results in a base call format file which includes a bunch of interesting columns including lines which look like:
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT outputs/hpgl0242_accepted_paired.bam LPAL13_SCAF000003 2964 . T C 222 . DP=858;VDB=5.874922e-02;RPB=-1.035971e-01;AF1=1;AC1=2;DP4=4,1,419,393;MQ=50;FQ=-282;PV4=0.38,0.42,1,1
The first 7 columns are pretty self-explanatory, describing the location and type of difference from the reference. The last is a bit more interesting, as it contains the following information (I will use the 2nd example above to describe each field):
In any event, I followed the above script with a short parser that extracts the position information + SNP (non-indels) and lays the %quality reads next to it, this for the example above:
LPAL13_SCAF000003_4872_T_A 0.712952158693115519 ...
In later iterations of this script, I also include the counts of each snp with “good” reads as >90% of the total. This leads to results which look like:
LPAL13_SCAF000003_4872_T_A 10 LPAL13_SCAF000003_5321_C_G 90 ...
This egregiously boils down the data to a simplistic: “Scaffold 3 position 4872 has a T to A 71.3% of the time.” This simplification was repeated for all samples and the results are processed below:
Note that the merges are likely to take a really long time at least for the first set of heterologous samples because there will be many mismatches in the merge. As a result I save the result so that I may reload it.
library(hpgltools)
library(data.table)
## data.table 1.10.0
## The fastest way to learn (by data.table authors): https://www.datacamp.com/courses/data-analysis-the-data-table-way
## Documentation: ?data.table, example(data.table) and browseVignettes("data.table")
## Release notes, videos and slides: http://r-datatable.com
snp_dt <- NULL
snp_dir <- "preprocessing/outputs/"
snp_dt <- as.data.table(read.table(paste0(snp_dir, "hpgl0631", "_parsed_count.txt")))
rownames(snp_dt) <- snp_dt$V1
snp_dt <- snp_dt[-1]
colnames(snp_dt) <- c("rownames","hpgl0631")
add_file <- function(id) {
tmp_dt <- as.data.table(read.table(paste0(snp_dir, id, "_parsed_count.txt")))
rownames(tmp_dt) <- tmp_dt$V1
tmp_dt <- tmp_dt[-1]
colnames(tmp_dt) <- c("rownames", id)
snp_dt <- merge(snp_dt, tmp_dt, by="rownames", all.x=TRUE, all.y=TRUE)
rownames(snp_dt) <- snp_dt$Row.names
snp_dt <- snp_dt[-1]
return(snp_dt)
}
snp_dt <- add_file("hpgl0242")
snp_dt <- add_file("hpgl0243")
snp_dt <- add_file("hpgl0244")
snp_dt <- add_file("hpgl0245")
snp_dt <- add_file("hpgl0246")
snp_dt <- add_file("hpgl0247")
snp_dt <- add_file("hpgl0248")
snp_dt <- add_file("hpgl0316")
snp_dt <- add_file("hpgl0318")
snp_dt <- add_file("hpgl0320")
snp_dt <- add_file("hpgl0322")
## snp_dt <- add_file("hpgl0631") ## This was the starting table.
snp_dt <- add_file("hpgl0632")
snp_dt <- add_file("hpgl0633")
snp_dt <- add_file("hpgl0634")
snp_dt <- add_file("hpgl0635")
snp_dt <- add_file("hpgl0636")
snp_dt <- add_file("hpgl0638")
snp_dt <- add_file("hpgl0639")
snp_dt <- add_file("hpgl0641")
snp_dt <- add_file("hpgl0643")
snp_dt <- add_file("hpgl0651")
snp_dt <- add_file("hpgl0652")
snp_dt <- add_file("hpgl0653")
snp_dt <- add_file("hpgl0654")
snp_dt <- add_file("hpgl0655")
snp_dt <- add_file("hpgl0656")
snp_dt <- add_file("hpgl0658")
snp_dt <- add_file("hpgl0659")
snp_dt <- add_file("hpgl0660")
snp_dt <- add_file("hpgl0661")
snp_dt <- add_file("hpgl0662")
snp_dt <- add_file("hpgl0663")
snp_dt[is.na(snp_dt)] <- 0
snp_norm <- hpgl_norm(snp_dt, transform="log2", convert="cpm", norm="quant", filter=TRUE)
## Step 1: performing count filter with option: cbcb
## Removing 284964 low-count genes (154735 remaining).
## Step 2: normalizing the data with quant.
## The experimental design is null. Some normalizations will therefore fail.
## If you receive an error about an object with no dimensions, that is likely why.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 1019227 values equal to 0, adding 1 to the matrix.
## Step : not doing batch correction.
summary(snp_norm)
## Length Class Mode
## actions 5 -none- list
## intermediate_counts 7 -none- list
## count_table 5106255 -none- numeric
## libsize 33 -none- numeric
snp_norm <- snp_norm$count_table
testing <- hpgl_cor(snp_norm)
plot_heatmap(testing)
## $map
## $map$rowInd
## [1] 4 16 24 14 6 26 19 30 32 5 1 7 28 22 10 21 12 9 11 20 8 29 23 13 18 17 27 33
## [29] 2 25 15 31 3
##
## $map$colInd
## [1] 4 16 24 14 6 26 19 30 32 5 1 7 28 22 10 21 12 9 11 20 8 29 23 13 18 17 27 33
## [29] 2 25 15 31 3
##
## $map$call
## heatmap.3(x = heatmap_data, scale = "none", trace = "none", margins = margin_list,
## ColSideColors = expt_colors, RowSideColors = row_colors,
## labRow = expt_names, labCol = expt_names, keysize = keysize,
## main = title, linewidth = 0.5)
##
## $map$carpet
## hpgl0244 hpgl0635 hpgl0653 hpgl0633 hpgl0246 hpgl0655 hpgl0639 hpgl0660 hpgl0662
## hpgl0244 1.0000 0.9976 0.9958 0.9964 0.9980 0.9982 0.9982 0.9970 0.9983
## hpgl0635 0.9976 1.0000 0.9942 0.9952 0.9963 0.9994 0.9994 0.9956 0.9996
## hpgl0653 0.9958 0.9942 1.0000 0.9982 0.9986 0.9951 0.9943 0.9986 0.9948
## hpgl0633 0.9964 0.9952 0.9982 1.0000 0.9991 0.9956 0.9953 0.9991 0.9957
## hpgl0246 0.9980 0.9963 0.9986 0.9991 1.0000 0.9970 0.9968 0.9996 0.9971
## hpgl0655 0.9982 0.9994 0.9951 0.9956 0.9970 1.0000 0.9996 0.9962 0.9998
## hpgl0639 0.9982 0.9994 0.9943 0.9953 0.9968 0.9996 1.0000 0.9959 0.9998
## hpgl0660 0.9970 0.9956 0.9986 0.9991 0.9996 0.9962 0.9959 1.0000 0.9963
## hpgl0662 0.9983 0.9996 0.9948 0.9957 0.9971 0.9998 0.9998 0.9963 1.0000
## hpgl0245 0.4857 0.4746 0.4714 0.4735 0.4825 0.4772 0.4791 0.4768 0.4802
## hpgl0631 -0.8630 -0.8628 -0.8623 -0.8634 -0.8688 -0.8650 -0.8652 -0.8659 -0.8668
## hpgl0247 -0.8630 -0.8630 -0.8624 -0.8636 -0.8689 -0.8651 -0.8653 -0.8660 -0.8669
## hpgl0658 -0.8624 -0.8622 -0.8616 -0.8627 -0.8682 -0.8643 -0.8646 -0.8652 -0.8662
## hpgl0651 -0.8594 -0.8592 -0.8586 -0.8598 -0.8653 -0.8613 -0.8617 -0.8623 -0.8633
## hpgl0318 -0.2095 -0.2050 -0.2044 -0.2030 -0.2008 -0.2041 -0.2030 -0.2018 -0.2025
## hpgl0643 -0.2832 -0.2794 -0.2790 -0.2771 -0.2731 -0.2784 -0.2767 -0.2755 -0.2756
## hpgl0322 -0.3457 -0.3461 -0.3458 -0.3450 -0.3368 -0.3434 -0.3424 -0.3415 -0.3400
## hpgl0316 -0.3046 -0.3022 -0.3021 -0.3006 -0.2950 -0.3003 -0.2995 -0.2981 -0.2979
## hpgl0320 -0.3191 -0.3172 -0.3169 -0.3158 -0.3095 -0.3153 -0.3141 -0.3132 -0.3124
## hpgl0641 -0.3259 -0.3242 -0.3241 -0.3228 -0.3163 -0.3223 -0.3211 -0.3201 -0.3193
## hpgl0248 -0.2675 -0.2715 -0.2718 -0.2711 -0.2624 -0.2682 -0.2679 -0.2677 -0.2655
## hpgl0659 -0.2658 -0.2694 -0.2698 -0.2690 -0.2606 -0.2663 -0.2659 -0.2657 -0.2635
## hpgl0652 -0.2634 -0.2672 -0.2672 -0.2666 -0.2583 -0.2639 -0.2637 -0.2633 -0.2613
## hpgl0632 -0.2642 -0.2677 -0.2680 -0.2672 -0.2590 -0.2646 -0.2642 -0.2639 -0.2619
## hpgl0638 -0.3309 -0.3320 -0.3317 -0.3311 -0.3232 -0.3290 -0.3284 -0.3274 -0.3260
## hpgl0636 -0.3198 -0.3200 -0.3196 -0.3188 -0.3120 -0.3173 -0.3167 -0.3154 -0.3145
## hpgl0656 -0.3230 -0.3233 -0.3226 -0.3220 -0.3151 -0.3204 -0.3200 -0.3185 -0.3177
## hpgl0663 -0.3264 -0.3268 -0.3265 -0.3258 -0.3186 -0.3241 -0.3235 -0.3223 -0.3212
## hpgl0242 -0.3163 -0.3186 -0.3178 -0.3174 -0.3091 -0.3153 -0.3150 -0.3136 -0.3120
## hpgl0654 -0.3037 -0.3048 -0.3034 -0.3031 -0.2966 -0.3017 -0.3016 -0.2997 -0.2990
## hpgl0634 -0.3048 -0.3058 -0.3046 -0.3041 -0.2975 -0.3029 -0.3026 -0.3006 -0.2999
## hpgl0661 -0.3094 -0.3109 -0.3099 -0.3094 -0.3024 -0.3078 -0.3075 -0.3058 -0.3048
## hpgl0243 -0.3133 -0.3152 -0.3142 -0.3137 -0.3063 -0.3120 -0.3116 -0.3101 -0.3089
## hpgl0245 hpgl0631 hpgl0247 hpgl0658 hpgl0651 hpgl0318 hpgl0643 hpgl0322 hpgl0316
## hpgl0244 0.48570 -0.86303 -0.86303 -0.86237 -0.85942 -0.20955 -0.28321 -0.3457 -0.30457
## hpgl0635 0.47456 -0.86282 -0.86298 -0.86217 -0.85923 -0.20503 -0.27939 -0.3461 -0.30216
## hpgl0653 0.47142 -0.86230 -0.86241 -0.86162 -0.85860 -0.20444 -0.27896 -0.3458 -0.30205
## hpgl0633 0.47351 -0.86338 -0.86356 -0.86274 -0.85982 -0.20295 -0.27710 -0.3450 -0.30059
## hpgl0246 0.48250 -0.86883 -0.86889 -0.86818 -0.86529 -0.20080 -0.27307 -0.3368 -0.29498
## hpgl0655 0.47721 -0.86498 -0.86508 -0.86432 -0.86133 -0.20415 -0.27842 -0.3434 -0.30032
## hpgl0639 0.47907 -0.86522 -0.86530 -0.86458 -0.86168 -0.20304 -0.27673 -0.3424 -0.29947
## hpgl0660 0.47683 -0.86589 -0.86601 -0.86524 -0.86228 -0.20182 -0.27550 -0.3415 -0.29807
## hpgl0662 0.48016 -0.86684 -0.86695 -0.86619 -0.86327 -0.20252 -0.27558 -0.3400 -0.29785
## hpgl0245 1.00000 -0.62892 -0.62841 -0.62932 -0.62827 -0.07451 0.02617 0.1802 0.08816
## hpgl0631 -0.62892 1.00000 0.99910 0.99938 0.99591 -0.02369 -0.07202 -0.1257 -0.09866
## hpgl0247 -0.62841 0.99910 1.00000 0.99923 0.99599 -0.02337 -0.07233 -0.1257 -0.09858
## hpgl0658 -0.62932 0.99938 0.99923 1.00000 0.99605 -0.02430 -0.07300 -0.1270 -0.09967
## hpgl0651 -0.62827 0.99591 0.99599 0.99605 1.00000 -0.02603 -0.07573 -0.1299 -0.10230
## hpgl0318 -0.07451 -0.02369 -0.02337 -0.02430 -0.02603 1.00000 0.49858 0.3975 0.44292
## hpgl0643 0.02617 -0.07202 -0.07233 -0.07300 -0.07573 0.49858 1.00000 0.7238 0.74851
## hpgl0322 0.18024 -0.12573 -0.12569 -0.12699 -0.12994 0.39750 0.72384 1.0000 0.85079
## hpgl0316 0.08816 -0.09866 -0.09858 -0.09967 -0.10230 0.44292 0.74851 0.8508 1.00000
## hpgl0320 0.10803 -0.10165 -0.10164 -0.10279 -0.10559 0.45609 0.76488 0.9057 0.86079
## hpgl0641 0.12011 -0.10865 -0.10874 -0.10984 -0.11268 0.44813 0.77080 0.9340 0.87729
## hpgl0248 0.32215 -0.16781 -0.16777 -0.16903 -0.17089 0.15925 0.40106 0.6897 0.51637
## hpgl0659 0.31662 -0.16816 -0.16816 -0.16938 -0.17121 0.16162 0.40558 0.6815 0.51887
## hpgl0652 0.31527 -0.16821 -0.16818 -0.16941 -0.17120 0.15934 0.40291 0.6758 0.51780
## hpgl0632 0.31386 -0.16828 -0.16828 -0.16950 -0.17138 0.16410 0.40683 0.6759 0.52143
## hpgl0638 0.20806 -0.12207 -0.12201 -0.12327 -0.12604 0.20089 0.48206 0.8075 0.60841
## hpgl0636 0.18755 -0.12048 -0.12048 -0.12163 -0.12440 0.20321 0.48591 0.7711 0.60705
## hpgl0656 0.18817 -0.12049 -0.12047 -0.12165 -0.12435 0.20573 0.48881 0.7796 0.61275
## hpgl0663 0.19245 -0.12028 -0.12024 -0.12145 -0.12422 0.20533 0.48501 0.7873 0.60928
## hpgl0242 0.26126 -0.14399 -0.14387 -0.14528 -0.14785 0.21754 0.50671 0.8480 0.62786
## hpgl0654 0.23020 -0.13778 -0.13745 -0.13895 -0.14124 0.23415 0.50858 0.7815 0.62362
## hpgl0634 0.23069 -0.13835 -0.13807 -0.13952 -0.14189 0.23518 0.51358 0.7834 0.62617
## hpgl0661 0.24034 -0.14113 -0.14083 -0.14233 -0.14472 0.22941 0.51264 0.8008 0.62626
## hpgl0243 0.25134 -0.14246 -0.14214 -0.14368 -0.14607 0.22972 0.50914 0.8155 0.62659
## hpgl0320 hpgl0641 hpgl0248 hpgl0659 hpgl0652 hpgl0632 hpgl0638 hpgl0636 hpgl0656
## hpgl0244 -0.3191 -0.3259 -0.2675 -0.2658 -0.2634 -0.2642 -0.3309 -0.3198 -0.3230
## hpgl0635 -0.3172 -0.3242 -0.2715 -0.2694 -0.2672 -0.2677 -0.3320 -0.3200 -0.3233
## hpgl0653 -0.3169 -0.3241 -0.2718 -0.2698 -0.2672 -0.2680 -0.3317 -0.3196 -0.3226
## hpgl0633 -0.3158 -0.3228 -0.2711 -0.2690 -0.2666 -0.2672 -0.3311 -0.3188 -0.3220
## hpgl0246 -0.3095 -0.3163 -0.2624 -0.2606 -0.2583 -0.2590 -0.3232 -0.3120 -0.3151
## hpgl0655 -0.3153 -0.3223 -0.2682 -0.2663 -0.2639 -0.2646 -0.3290 -0.3173 -0.3204
## hpgl0639 -0.3141 -0.3211 -0.2679 -0.2659 -0.2637 -0.2642 -0.3284 -0.3167 -0.3200
## hpgl0660 -0.3132 -0.3201 -0.2677 -0.2657 -0.2633 -0.2639 -0.3274 -0.3154 -0.3185
## hpgl0662 -0.3124 -0.3193 -0.2655 -0.2635 -0.2613 -0.2619 -0.3260 -0.3145 -0.3177
## hpgl0245 0.1080 0.1201 0.3222 0.3166 0.3153 0.3139 0.2081 0.1876 0.1882
## hpgl0631 -0.1016 -0.1087 -0.1678 -0.1682 -0.1682 -0.1683 -0.1221 -0.1205 -0.1205
## hpgl0247 -0.1016 -0.1087 -0.1678 -0.1682 -0.1682 -0.1683 -0.1220 -0.1205 -0.1205
## hpgl0658 -0.1028 -0.1098 -0.1690 -0.1694 -0.1694 -0.1695 -0.1233 -0.1216 -0.1217
## hpgl0651 -0.1056 -0.1127 -0.1709 -0.1712 -0.1712 -0.1714 -0.1260 -0.1244 -0.1244
## hpgl0318 0.4561 0.4481 0.1593 0.1616 0.1593 0.1641 0.2009 0.2032 0.2057
## hpgl0643 0.7649 0.7708 0.4011 0.4056 0.4029 0.4068 0.4821 0.4859 0.4888
## hpgl0322 0.9057 0.9340 0.6897 0.6815 0.6758 0.6759 0.8075 0.7711 0.7796
## hpgl0316 0.8608 0.8773 0.5164 0.5189 0.5178 0.5214 0.6084 0.6071 0.6128
## hpgl0320 1.0000 0.9123 0.5565 0.5558 0.5541 0.5557 0.6540 0.6453 0.6479
## hpgl0641 0.9123 1.0000 0.5882 0.5888 0.5854 0.5894 0.6917 0.6843 0.6877
## hpgl0248 0.5565 0.5882 1.0000 0.9970 0.9955 0.9944 0.7080 0.6657 0.6764
## hpgl0659 0.5558 0.5888 0.9970 1.0000 0.9962 0.9957 0.7000 0.6631 0.6729
## hpgl0652 0.5541 0.5854 0.9955 0.9962 1.0000 0.9947 0.6917 0.6573 0.6671
## hpgl0632 0.5557 0.5894 0.9944 0.9957 0.9947 1.0000 0.6928 0.6596 0.6681
## hpgl0638 0.6540 0.6917 0.7080 0.7000 0.6917 0.6928 1.0000 0.9745 0.9813
## hpgl0636 0.6453 0.6843 0.6657 0.6631 0.6573 0.6596 0.9745 1.0000 0.9780
## hpgl0656 0.6479 0.6877 0.6764 0.6729 0.6671 0.6681 0.9813 0.9780 1.0000
## hpgl0663 0.6500 0.6866 0.6847 0.6801 0.6734 0.6749 0.9884 0.9791 0.9842
## hpgl0242 0.6830 0.7183 0.7129 0.7003 0.6920 0.6911 0.8684 0.8240 0.8282
## hpgl0654 0.6616 0.6990 0.6359 0.6333 0.6309 0.6316 0.8002 0.7879 0.7860
## hpgl0634 0.6611 0.6995 0.6402 0.6381 0.6338 0.6360 0.8073 0.7937 0.7907
## hpgl0661 0.6681 0.7058 0.6649 0.6611 0.6562 0.6578 0.8268 0.8057 0.8043
## hpgl0243 0.6719 0.7082 0.6830 0.6764 0.6709 0.6715 0.8409 0.8116 0.8122
## hpgl0663 hpgl0242 hpgl0654 hpgl0634 hpgl0661 hpgl0243
## hpgl0244 -0.3264 -0.3163 -0.3037 -0.3048 -0.3094 -0.3133
## hpgl0635 -0.3268 -0.3186 -0.3048 -0.3058 -0.3109 -0.3152
## hpgl0653 -0.3265 -0.3178 -0.3034 -0.3046 -0.3099 -0.3142
## hpgl0633 -0.3258 -0.3174 -0.3031 -0.3041 -0.3094 -0.3137
## hpgl0246 -0.3186 -0.3091 -0.2966 -0.2975 -0.3024 -0.3063
## hpgl0655 -0.3241 -0.3153 -0.3017 -0.3029 -0.3078 -0.3120
## hpgl0639 -0.3235 -0.3150 -0.3016 -0.3026 -0.3075 -0.3116
## hpgl0660 -0.3223 -0.3136 -0.2997 -0.3006 -0.3058 -0.3101
## hpgl0662 -0.3212 -0.3120 -0.2990 -0.2999 -0.3048 -0.3089
## hpgl0245 0.1925 0.2613 0.2302 0.2307 0.2403 0.2513
## hpgl0631 -0.1203 -0.1440 -0.1378 -0.1384 -0.1411 -0.1425
## hpgl0247 -0.1202 -0.1439 -0.1375 -0.1381 -0.1408 -0.1421
## hpgl0658 -0.1214 -0.1453 -0.1390 -0.1395 -0.1423 -0.1437
## hpgl0651 -0.1242 -0.1478 -0.1412 -0.1419 -0.1447 -0.1461
## hpgl0318 0.2053 0.2175 0.2341 0.2352 0.2294 0.2297
## hpgl0643 0.4850 0.5067 0.5086 0.5136 0.5126 0.5091
## hpgl0322 0.7873 0.8480 0.7815 0.7834 0.8008 0.8155
## hpgl0316 0.6093 0.6279 0.6236 0.6262 0.6263 0.6266
## hpgl0320 0.6500 0.6830 0.6616 0.6611 0.6681 0.6719
## hpgl0641 0.6866 0.7183 0.6990 0.6995 0.7058 0.7082
## hpgl0248 0.6847 0.7129 0.6359 0.6402 0.6649 0.6830
## hpgl0659 0.6801 0.7003 0.6333 0.6381 0.6611 0.6764
## hpgl0652 0.6734 0.6920 0.6309 0.6338 0.6562 0.6709
## hpgl0632 0.6749 0.6911 0.6316 0.6360 0.6578 0.6715
## hpgl0638 0.9884 0.8684 0.8002 0.8073 0.8268 0.8409
## hpgl0636 0.9791 0.8240 0.7879 0.7937 0.8057 0.8116
## hpgl0656 0.9842 0.8282 0.7860 0.7907 0.8043 0.8122
## hpgl0663 1.0000 0.8423 0.7936 0.7997 0.8149 0.8241
## hpgl0242 0.8423 1.0000 0.8814 0.8830 0.9030 0.9213
## hpgl0654 0.7936 0.8814 1.0000 0.9849 0.9876 0.9834
## hpgl0634 0.7997 0.8830 0.9849 1.0000 0.9896 0.9848
## hpgl0661 0.8149 0.9030 0.9876 0.9896 1.0000 0.9952
## hpgl0243 0.8241 0.9213 0.9834 0.9848 0.9952 1.0000
##
## $map$rowDendrogram
## 'dendrogram' with 2 branches and 33 members total, at height 6.853
##
## $map$colDendrogram
## 'dendrogram' with 2 branches and 33 members total, at height 6.853
##
## $map$breaks
## [1] -0.86889 -0.74430 -0.61970 -0.49511 -0.37052 -0.24593 -0.12133 0.00326 0.12785
## [10] 0.25244 0.37704 0.50163 0.62622 0.75081 0.87541 1.00000
##
## $map$col
## [1] "#FF0000FF" "#FF1700FF" "#FF2E00FF" "#FF4600FF" "#FF5D00FF" "#FF7400FF" "#FF8B00FF"
## [8] "#FFA200FF" "#FFB900FF" "#FFD100FF" "#FFE800FF" "#FFFF00FF" "#FFFF2AFF" "#FFFF80FF"
## [15] "#FFFFD5FF"
##
## $map$colorTable
## low high color
## 1 -0.86889 -0.74430 #FF0000FF
## 2 -0.74430 -0.61970 #FF1700FF
## 3 -0.61970 -0.49511 #FF2E00FF
## 4 -0.49511 -0.37052 #FF4600FF
## 5 -0.37052 -0.24593 #FF5D00FF
## 6 -0.24593 -0.12133 #FF7400FF
## 7 -0.12133 0.00326 #FF8B00FF
## 8 0.00326 0.12785 #FFA200FF
## 9 0.12785 0.25244 #FFB900FF
## 10 0.25244 0.37704 #FFD100FF
## 11 0.37704 0.50163 #FFE800FF
## 12 0.50163 0.62622 #FFFF00FF
## 13 0.62622 0.75081 #FFFF2AFF
## 14 0.75081 0.87541 #FFFF80FF
## 15 0.87541 1.00000 #FFFFD5FF
##
##
## $plot
##
## $data
## hpgl0631 hpgl0242 hpgl0243 hpgl0244 hpgl0245 hpgl0246 hpgl0247
## hpgl0631 1.00000000 -0.1439927 -0.1424585 -0.8630325 -0.62891556 -0.8688344 0.99910002
## hpgl0242 -0.14399269 1.0000000 0.9212703 -0.3163018 0.26126031 -0.3090881 -0.14386520
## hpgl0243 -0.14245846 0.9212703 1.0000000 -0.3132536 0.25134401 -0.3062878 -0.14214276
## hpgl0244 -0.86303251 -0.3163018 -0.3132536 1.0000000 0.48569685 0.9979998 -0.86302687
## hpgl0245 -0.62891556 0.2612603 0.2513440 0.4856968 1.00000000 0.4824960 -0.62840958
## hpgl0246 -0.86883440 -0.3090881 -0.3062878 0.9979998 0.48249600 1.0000000 -0.86888804
## hpgl0247 0.99910002 -0.1438652 -0.1421428 -0.8630269 -0.62840958 -0.8688880 1.00000000
## hpgl0248 -0.16781343 0.7129310 0.6829772 -0.2674829 0.32215447 -0.2624485 -0.16776667
## hpgl0316 -0.09865628 0.6278563 0.6265857 -0.3045748 0.08816311 -0.2949836 -0.09857606
## hpgl0318 -0.02368881 0.2175357 0.2297202 -0.2095491 -0.07450799 -0.2008007 -0.02337350
## hpgl0320 -0.10164802 0.6829555 0.6719337 -0.3191003 0.10803016 -0.3094761 -0.10163958
## hpgl0322 -0.12572669 0.8479996 0.8154971 -0.3457498 0.18023611 -0.3367553 -0.12569293
## hpgl0632 -0.16828279 0.6911082 0.6715040 -0.2641616 0.31386181 -0.2589515 -0.16828407
## hpgl0633 -0.86338300 -0.3174015 -0.3137477 0.9963938 0.47351492 0.9991362 -0.86356290
## hpgl0634 -0.13835227 0.8830252 0.9847839 -0.3047689 0.23069485 -0.2975241 -0.13806757
## hpgl0635 -0.86282242 -0.3185764 -0.3151608 0.9975744 0.47455672 0.9963488 -0.86297863
## hpgl0636 -0.12048480 0.8240209 0.8115699 -0.3198467 0.18755180 -0.3120171 -0.12048193
## hpgl0638 -0.12206546 0.8684183 0.8408949 -0.3308692 0.20805541 -0.3232434 -0.12200761
## hpgl0639 -0.86522161 -0.3150297 -0.3116011 0.9981695 0.47907414 0.9968313 -0.86530404
## hpgl0641 -0.10865006 0.7183139 0.7082462 -0.3259055 0.12010978 -0.3163320 -0.10874022
## hpgl0643 -0.07201944 0.5067110 0.5091431 -0.2832131 0.02617430 -0.2730743 -0.07232716
## hpgl0651 0.99591437 -0.1478450 -0.1460675 -0.8594247 -0.62826940 -0.8652870 0.99599255
## hpgl0652 -0.16820881 0.6920415 0.6708572 -0.2634470 0.31526723 -0.2583383 -0.16817852
## hpgl0653 -0.86229817 -0.3178483 -0.3142255 0.9958067 0.47142406 0.9985582 -0.86240891
## hpgl0654 -0.13777691 0.8813858 0.9833788 -0.3036780 0.23020176 -0.2965996 -0.13745264
## hpgl0655 -0.86498327 -0.3153210 -0.3119866 0.9981519 0.47720805 0.9969504 -0.86508439
## hpgl0656 -0.12048846 0.8281539 0.8122216 -0.3229633 0.18816943 -0.3150883 -0.12046881
## hpgl0658 0.99938298 -0.1452800 -0.1436813 -0.8623692 -0.62931754 -0.8681846 0.99922645
## hpgl0659 -0.16816401 0.7003267 0.6764295 -0.2657559 0.31662147 -0.2605941 -0.16815562
## hpgl0660 -0.86588858 -0.3135962 -0.3100903 0.9970331 0.47683048 0.9995595 -0.86600899
## hpgl0661 -0.14113054 0.9030144 0.9952472 -0.3094320 0.24033819 -0.3023653 -0.14083203
## hpgl0662 -0.86683568 -0.3119878 -0.3089203 0.9982751 0.48015963 0.9971211 -0.86694864
## hpgl0663 -0.12027583 0.8423169 0.8240721 -0.3264250 0.19245387 -0.3186159 -0.12023902
## hpgl0248 hpgl0316 hpgl0318 hpgl0320 hpgl0322 hpgl0632 hpgl0633
## hpgl0631 -0.1678134 -0.09865628 -0.02368881 -0.1016480 -0.1257267 -0.1682828 -0.8633830
## hpgl0242 0.7129310 0.62785628 0.21753570 0.6829555 0.8479996 0.6911082 -0.3174015
## hpgl0243 0.6829772 0.62658574 0.22972022 0.6719337 0.8154971 0.6715040 -0.3137477
## hpgl0244 -0.2674829 -0.30457478 -0.20954908 -0.3191003 -0.3457498 -0.2641616 0.9963938
## hpgl0245 0.3221545 0.08816311 -0.07450799 0.1080302 0.1802361 0.3138618 0.4735149
## hpgl0246 -0.2624485 -0.29498364 -0.20080074 -0.3094761 -0.3367553 -0.2589515 0.9991362
## hpgl0247 -0.1677667 -0.09857606 -0.02337350 -0.1016396 -0.1256929 -0.1682841 -0.8635629
## hpgl0248 1.0000000 0.51637263 0.15925463 0.5565269 0.6897244 0.9943616 -0.2711360
## hpgl0316 0.5163726 1.00000000 0.44292374 0.8607897 0.8507941 0.5214300 -0.3005881
## hpgl0318 0.1592546 0.44292374 1.00000000 0.4560886 0.3974960 0.1641036 -0.2029510
## hpgl0320 0.5565269 0.86078972 0.45608859 1.0000000 0.9057467 0.5557278 -0.3157655
## hpgl0322 0.6897244 0.85079409 0.39749597 0.9057467 1.0000000 0.6759099 -0.3449732
## hpgl0632 0.9943616 0.52143004 0.16410356 0.5557278 0.6759099 1.0000000 -0.2671845
## hpgl0633 -0.2711360 -0.30058809 -0.20295102 -0.3157655 -0.3449732 -0.2671845 1.0000000
## hpgl0634 0.6401540 0.62616793 0.23518166 0.6610625 0.7834133 0.6359795 -0.3040882
## hpgl0635 -0.2715415 -0.30215539 -0.20502515 -0.3171962 -0.3461199 -0.2676919 0.9951531
## hpgl0636 0.6656917 0.60705470 0.20321208 0.6453147 0.7710753 0.6596438 -0.3188029
## hpgl0638 0.7079818 0.60840872 0.20088884 0.6540479 0.8075005 0.6927583 -0.3310506
## hpgl0639 -0.2679025 -0.29946646 -0.20303822 -0.3141222 -0.3423713 -0.2642135 0.9953019
## hpgl0641 0.5881895 0.87728984 0.44812555 0.9122701 0.9339538 0.5894443 -0.3228032
## hpgl0643 0.4010602 0.74851423 0.49858476 0.7648834 0.7238375 0.4068322 -0.2771020
## hpgl0651 -0.1708902 -0.10229619 -0.02603215 -0.1055924 -0.1299369 -0.1713827 -0.8598165
## hpgl0652 0.9955452 0.51779646 0.15933815 0.5541263 0.6758204 0.9946520 -0.2666361
## hpgl0653 -0.2718027 -0.30205283 -0.20444083 -0.3168701 -0.3458104 -0.2680258 0.9982222
## hpgl0654 0.6359458 0.62362208 0.23414896 0.6616301 0.7815401 0.6316060 -0.3031121
## hpgl0655 -0.2682460 -0.30032206 -0.20414513 -0.3152601 -0.3433696 -0.2645759 0.9956118
## hpgl0656 0.6763592 0.61275019 0.20572565 0.6478958 0.7795898 0.6681331 -0.3220115
## hpgl0658 -0.1690302 -0.09967276 -0.02430376 -0.1027901 -0.1269868 -0.1694966 -0.8627359
## hpgl0659 0.9970474 0.51887480 0.16162341 0.5557638 0.6814606 0.9956734 -0.2690213
## hpgl0660 -0.2677160 -0.29806906 -0.20182437 -0.3131909 -0.3415139 -0.2639441 0.9990696
## hpgl0661 0.6649311 0.62626255 0.22941251 0.6681313 0.8008463 0.6577516 -0.3093527
## hpgl0662 -0.2654621 -0.29785027 -0.20252094 -0.3124102 -0.3400293 -0.2618529 0.9957100
## hpgl0663 0.6847361 0.60927771 0.20532641 0.6500336 0.7872665 0.6749190 -0.3257893
## hpgl0634 hpgl0635 hpgl0636 hpgl0638 hpgl0639 hpgl0641 hpgl0643
## hpgl0631 -0.1383523 -0.8628224 -0.1204848 -0.1220655 -0.8652216 -0.1086501 -0.07201944
## hpgl0242 0.8830252 -0.3185764 0.8240209 0.8684183 -0.3150297 0.7183139 0.50671097
## hpgl0243 0.9847839 -0.3151608 0.8115699 0.8408949 -0.3116011 0.7082462 0.50914307
## hpgl0244 -0.3047689 0.9975744 -0.3198467 -0.3308692 0.9981695 -0.3259055 -0.28321313
## hpgl0245 0.2306949 0.4745567 0.1875518 0.2080554 0.4790741 0.1201098 0.02617430
## hpgl0246 -0.2975241 0.9963488 -0.3120171 -0.3232434 0.9968313 -0.3163320 -0.27307430
## hpgl0247 -0.1380676 -0.8629786 -0.1204819 -0.1220076 -0.8653040 -0.1087402 -0.07232716
## hpgl0248 0.6401540 -0.2715415 0.6656917 0.7079818 -0.2679025 0.5881895 0.40106017
## hpgl0316 0.6261679 -0.3021554 0.6070547 0.6084087 -0.2994665 0.8772898 0.74851423
## hpgl0318 0.2351817 -0.2050252 0.2032121 0.2008888 -0.2030382 0.4481256 0.49858476
## hpgl0320 0.6610625 -0.3171962 0.6453147 0.6540479 -0.3141222 0.9122701 0.76488338
## hpgl0322 0.7834133 -0.3461199 0.7710753 0.8075005 -0.3423713 0.9339538 0.72383749
## hpgl0632 0.6359795 -0.2676919 0.6596438 0.6927583 -0.2642135 0.5894443 0.40683220
## hpgl0633 -0.3040882 0.9951531 -0.3188029 -0.3310506 0.9953019 -0.3228032 -0.27710196
## hpgl0634 1.0000000 -0.3057560 0.7936778 0.8073121 -0.3025608 0.6994753 0.51357798
## hpgl0635 -0.3057560 1.0000000 -0.3199824 -0.3319763 0.9994164 -0.3241617 -0.27938841
## hpgl0636 0.7936778 -0.3199824 1.0000000 0.9744600 -0.3167287 0.6843080 0.48591266
## hpgl0638 0.8073121 -0.3319763 0.9744600 1.0000000 -0.3283832 0.6916972 0.48205726
## hpgl0639 -0.3025608 0.9994164 -0.3167287 -0.3283832 1.0000000 -0.3210869 -0.27673063
## hpgl0641 0.6994753 -0.3241617 0.6843080 0.6916972 -0.3210869 1.0000000 0.77079548
## hpgl0643 0.5135780 -0.2793884 0.4859127 0.4820573 -0.2767306 0.7707955 1.00000000
## hpgl0651 -0.1418889 -0.8592347 -0.1244034 -0.1260378 -0.8616787 -0.1126750 -0.07573223
## hpgl0652 0.6338359 -0.2671891 0.6572950 0.6916800 -0.2636988 0.5853767 0.40291152
## hpgl0653 -0.3046302 0.9941969 -0.3196337 -0.3316821 0.9942545 -0.3240839 -0.27895535
## hpgl0654 0.9849010 -0.3047554 0.7879096 0.8002429 -0.3015706 0.6989946 0.50858472
## hpgl0655 -0.3028744 0.9994182 -0.3172832 -0.3290399 0.9995785 -0.3223467 -0.27842429
## hpgl0656 0.7907004 -0.3232631 0.9780352 0.9812822 -0.3200011 0.6877345 0.48881425
## hpgl0658 -0.1395231 -0.8621729 -0.1216303 -0.1232707 -0.8645787 -0.1098390 -0.07300446
## hpgl0659 0.6380757 -0.2694445 0.6631165 0.7000396 -0.2659331 0.5888330 0.40558167
## hpgl0660 -0.3006270 0.9956390 -0.3154138 -0.3273655 0.9959355 -0.3200565 -0.27550291
## hpgl0661 0.9895706 -0.3108585 0.8056617 0.8267883 -0.3074516 0.7058225 0.51264317
## hpgl0662 -0.2999498 0.9995616 -0.3145283 -0.3260069 0.9998284 -0.3193101 -0.27557657
## hpgl0663 0.7996842 -0.3268162 0.9790830 0.9884240 -0.3235070 0.6865637 0.48500771
## hpgl0651 hpgl0652 hpgl0653 hpgl0654 hpgl0655 hpgl0656 hpgl0658
## hpgl0631 0.99591437 -0.1682088 -0.8622982 -0.1377769 -0.8649833 -0.1204885 0.99938298
## hpgl0242 -0.14784503 0.6920415 -0.3178483 0.8813858 -0.3153210 0.8281539 -0.14528000
## hpgl0243 -0.14606748 0.6708572 -0.3142255 0.9833788 -0.3119866 0.8122216 -0.14368128
## hpgl0244 -0.85942468 -0.2634470 0.9958067 -0.3036780 0.9981519 -0.3229633 -0.86236919
## hpgl0245 -0.62826940 0.3152672 0.4714241 0.2302018 0.4772080 0.1881694 -0.62931754
## hpgl0246 -0.86528697 -0.2583383 0.9985582 -0.2965996 0.9969504 -0.3150883 -0.86818458
## hpgl0247 0.99599255 -0.1681785 -0.8624089 -0.1374526 -0.8650844 -0.1204688 0.99922645
## hpgl0248 -0.17089020 0.9955452 -0.2718027 0.6359458 -0.2682460 0.6763592 -0.16903022
## hpgl0316 -0.10229619 0.5177965 -0.3020528 0.6236221 -0.3003221 0.6127502 -0.09967276
## hpgl0318 -0.02603215 0.1593382 -0.2044408 0.2341490 -0.2041451 0.2057257 -0.02430376
## hpgl0320 -0.10559240 0.5541263 -0.3168701 0.6616301 -0.3152601 0.6478958 -0.10279014
## hpgl0322 -0.12993690 0.6758204 -0.3458104 0.7815401 -0.3433696 0.7795898 -0.12698676
## hpgl0632 -0.17138267 0.9946520 -0.2680258 0.6316060 -0.2645759 0.6681331 -0.16949657
## hpgl0633 -0.85981645 -0.2666361 0.9982222 -0.3031121 0.9956118 -0.3220115 -0.86273593
## hpgl0634 -0.14188885 0.6338359 -0.3046302 0.9849010 -0.3028744 0.7907004 -0.13952309
## hpgl0635 -0.85923472 -0.2671891 0.9941969 -0.3047554 0.9994182 -0.3232631 -0.86217286
## hpgl0636 -0.12440337 0.6572950 -0.3196337 0.7879096 -0.3172832 0.9780352 -0.12163028
## hpgl0638 -0.12603779 0.6916800 -0.3316821 0.8002429 -0.3290399 0.9812822 -0.12327067
## hpgl0639 -0.86167871 -0.2636988 0.9942545 -0.3015706 0.9995785 -0.3200011 -0.86457873
## hpgl0641 -0.11267501 0.5853767 -0.3240839 0.6989946 -0.3223467 0.6877345 -0.10983902
## hpgl0643 -0.07573223 0.4029115 -0.2789553 0.5085847 -0.2784243 0.4888143 -0.07300446
## hpgl0651 1.00000000 -0.1712009 -0.8586017 -0.1412378 -0.8613280 -0.1243519 0.99605191
## hpgl0652 -0.17120089 1.0000000 -0.2672362 0.6308908 -0.2638992 0.6671142 -0.16941499
## hpgl0653 -0.85860169 -0.2672362 1.0000000 -0.3034394 0.9950619 -0.3226153 -0.86162222
## hpgl0654 -0.14123783 0.6308908 -0.3034394 1.0000000 -0.3017235 0.7859987 -0.13895393
## hpgl0655 -0.86132797 -0.2638992 0.9950619 -0.3017235 1.0000000 -0.3204215 -0.86432224
## hpgl0656 -0.12435191 0.6671142 -0.3226153 0.7859987 -0.3204215 1.0000000 -0.12165359
## hpgl0658 0.99605191 -0.1694150 -0.8616222 -0.1389539 -0.8643222 -0.1216536 1.00000000
## hpgl0659 -0.17121249 0.9961935 -0.2697681 0.6333482 -0.2662544 0.6729304 -0.16937587
## hpgl0660 -0.86227895 -0.2632841 0.9986289 -0.2996522 0.9962426 -0.3185260 -0.86523691
## hpgl0661 -0.14472021 0.6561760 -0.3098758 0.9876432 -0.3078161 0.8043251 -0.14233303
## hpgl0662 -0.86327331 -0.2613085 0.9947930 -0.2990133 0.9997579 -0.3177209 -0.86618774
## hpgl0663 -0.12421795 0.6734497 -0.3265100 0.7936198 -0.3240722 0.9842054 -0.12144937
## hpgl0659 hpgl0660 hpgl0661 hpgl0662 hpgl0663
## hpgl0631 -0.1681640 -0.8658886 -0.1411305 -0.8668357 -0.1202758
## hpgl0242 0.7003267 -0.3135962 0.9030144 -0.3119878 0.8423169
## hpgl0243 0.6764295 -0.3100903 0.9952472 -0.3089203 0.8240721
## hpgl0244 -0.2657559 0.9970331 -0.3094320 0.9982751 -0.3264250
## hpgl0245 0.3166215 0.4768305 0.2403382 0.4801596 0.1924539
## hpgl0246 -0.2605941 0.9995595 -0.3023653 0.9971211 -0.3186159
## hpgl0247 -0.1681556 -0.8660090 -0.1408320 -0.8669486 -0.1202390
## hpgl0248 0.9970474 -0.2677160 0.6649311 -0.2654621 0.6847361
## hpgl0316 0.5188748 -0.2980691 0.6262625 -0.2978503 0.6092777
## hpgl0318 0.1616234 -0.2018244 0.2294125 -0.2025209 0.2053264
## hpgl0320 0.5557638 -0.3131909 0.6681313 -0.3124102 0.6500336
## hpgl0322 0.6814606 -0.3415139 0.8008463 -0.3400293 0.7872665
## hpgl0632 0.9956734 -0.2639441 0.6577516 -0.2618529 0.6749190
## hpgl0633 -0.2690213 0.9990696 -0.3093527 0.9957100 -0.3257893
## hpgl0634 0.6380757 -0.3006270 0.9895706 -0.2999498 0.7996842
## hpgl0635 -0.2694445 0.9956390 -0.3108585 0.9995616 -0.3268162
## hpgl0636 0.6631165 -0.3154138 0.8056617 -0.3145283 0.9790830
## hpgl0638 0.7000396 -0.3273655 0.8267883 -0.3260069 0.9884240
## hpgl0639 -0.2659331 0.9959355 -0.3074516 0.9998284 -0.3235070
## hpgl0641 0.5888330 -0.3200565 0.7058225 -0.3193101 0.6865637
## hpgl0643 0.4055817 -0.2755029 0.5126432 -0.2755766 0.4850077
## hpgl0651 -0.1712125 -0.8622789 -0.1447202 -0.8632733 -0.1242180
## hpgl0652 0.9961935 -0.2632841 0.6561760 -0.2613085 0.6734497
## hpgl0653 -0.2697681 0.9986289 -0.3098758 0.9947930 -0.3265100
## hpgl0654 0.6333482 -0.2996522 0.9876432 -0.2990133 0.7936198
## hpgl0655 -0.2662544 0.9962426 -0.3078161 0.9997579 -0.3240722
## hpgl0656 0.6729304 -0.3185260 0.8043251 -0.3177209 0.9842054
## hpgl0658 -0.1693759 -0.8652369 -0.1423330 -0.8661877 -0.1214494
## hpgl0659 1.0000000 -0.2656806 0.6610817 -0.2634967 0.6801180
## hpgl0660 -0.2656806 1.0000000 -0.3058258 0.9963101 -0.3222930
## hpgl0661 0.6610817 -0.3058258 1.0000000 -0.3048359 0.8149287
## hpgl0662 -0.2634967 0.9963101 -0.3048359 1.0000000 -0.3212230
## hpgl0663 0.6801180 -0.3222930 0.8149287 -0.3212230 1.0000000
Lets try a fancier heatmap.
loadme(filename="annotation.rda.xz")
## Loading the savefile: /cbcb/nelsayed-scratch/atb/rnaseq/lpanamensis/savefiles/annotation.rda.xz
## Command run: load('/cbcb/nelsayed-scratch/atb/rnaseq/lpanamensis/savefiles/annotation.rda.xz', envir=globalenv())
samples <- as.data.frame(toupper(colnames(snp_df)))
## Error in is.data.frame(x): object 'snp_df' not found
colnames(samples) <- c("ID")
## Error in colnames(samples) <- c("ID"): object 'samples' not found
## There a cheesy way to extract the annotation information I want.
library(Heatplus)
snp_logit <- car::logit(snp_norm)
## Warning in car::logit(snp_norm): proportions remapped to (0.025, 0.975)
snp_sample_pca <- plot_pca(snp_logit, design=parasite_expt$design)
## Warning in RColorBrewer::brewer.pal(12, "Dark2"): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors
correlations <- hpgl_cor(snp_norm)
rownames(correlations) <- toupper(rownames(correlations))
distances <- as.matrix(dist(t(snp_norm)))
rownames(distances) <- toupper(rownames(distances))
expt_design <- parasite_expt$design
expt_design["HPGL0631","strain"]
## NULL
## Make certain that all orders are maintained between the correlations and design!!!
design_corr <- merge(expt_design, correlations, by="row.names")
rownames(design_corr) <- design_corr$Row.names
design_corr <- design_corr[rownames(correlations), ]
corr <- design_corr[, colnames(correlations)]
des <- design_corr[, colnames(expt_design)]
col_data <- as.data.frame(des[, c("strain","donor")])
## Error in `[.data.frame`(des, , c("strain", "donor")): undefined columns selected
row_data <- as.data.frame(des[, c("healing","condition")])
## Error in `[.data.frame`(des, , c("healing", "condition")): undefined columns selected
myannot <- list(Col=list(data=col_data), Row=list(data=row_data))
## Error in eval(expr, envir, enclos): object 'col_data' not found
##mylabels <- list(Row=list(nrow=nrow(row_data)), Col=list(nrow=nrow(row_data)))
mylabels <- list(Row=list(nrow=4), Col=list(nrow=4))
library(Heatplus)
map1 <- annHeatmap2(correlations,
cluster=list(cuth=1.0),
ann=myannot,
labels=mylabels)
## Error in modifyExistingList(deflist, arglist): object 'myannot' not found
plot(map1)
## Error in plot(map1): object 'map1' not found
Now the snp clustering heatmap includes all samples.
Let see if there are more fine-tunable parameters when using the variantAnnotation R package. Caveat: the compressed indexed vcf files I created appear to actually be supported by the Rsamtools package…
The following large block was used to calculate the coverage per position of all samples for every potential snp position. It took about a week to complete.
library(VariantAnnotation)
library(Rsamtools)
row = snp_dt[1,]
row
samples
the_samples = samples
indir=input_dir
suffix=bam_suffix
bam_suffix="_accepted_paired.bam"
suffix=bam_suffix
sample_num = 1
sample <- the_samples[sample_num]
if (isTRUE(tolower)) {
sample <- tolower(sample)
}
sample
filename <- paste0(indir, "/", sample, suffix)
filename
scanned <- Rsamtools::scanBam(file=filename)
rm(scanned)
filenames <- NULL
filenames <- c()
for (sample_num in 1:length(the_samples)) {
sample <- the_samples[sample_num]
if (isTRUE(tolower)) {
sample <- tolower(sample)
}
filename <- paste0(indir, "/", sample, suffix)
filenames <- append(filenames, filename)
}
filenames
file_list <- function(row, the_samples=samples, indir=input_dir, suffix=bam_suffix) {
filenames <- c()
for (sample_num in 1:length(the_samples)) {
sample <- the_samples[sample_num]
if (isTRUE(tolower)) {
sample <- tolower(sample)
}
filename <- paste0(indir, "/", sample, suffix)
filenames <- append(filenames, filename)
}
return(filenames)
}
pileup_files <- file_list(samples)
pileup_files
pileup_info <- Rsamtools::PileupFiles(pileup_files)
pileup_info
which <- GenomicRanges::GRanges(paste0(snp_dt[["species"]], "_", snp_dt[["chromosome"]], ":", snp_dt[["position"]]),
paste0(snp_dt[["species"]], "_", snp_dt[["chromosome"]], ":", snp_dt[["position"]] + 1))
queries <- paste0(snp_dt[["species"]], "_",
snp_dt[["chromosome"]], ":", snp_dt[["position"]], "-",
as.numeric(snp_dt[["position"]]) + 1)
which <- GenomicRanges::GRanges(queries)
what <- "seq"
pileups_params <- Rsamtools::ApplyPileupsParam(which=which, what=what)
calcInfo <- function(x) {
info <- apply(x[["seq"]], 2, function(y) {
y <- y[c("A", "C", "G", "T"), , drop=FALSE]
y <- y + 1L ## continuity
cvg <- colSums(y)
p <- y / cvg[col(y)]
h <- -colSums(p * log(p))
ifelse(cvg == 4L, NA, h)
})
retlist <- list(seqnames=x[["seqnames"]], pos=x[["pos"]], info=info)
return(retlist)
}
pileups_params <- Rsamtools::ApplyPileupsParam(which=which, what=what)
result <- Rsamtools::applyPileups(pileup_files, calcInfo, param=pileups_params)
calcInfo <- function(x) {
info <- apply(x[["seq"]], 2, function(y) {
y <- y[c("A", "C", "G", "T"), , drop=FALSE]
y <- y + 1L ## continuity
cvg <- colSums(y)
})
retlist <- list(seqnames=x[["seqnames"]], pos=x[["pos"]], info=info)
return(retlist)
}
queries <- paste0(snp_dt[["species"]], "_",
snp_dt[["chromosome"]], ":", snp_dt[["position"]], "-",
as.numeric(snp_dt[["position"]]) + 1)
queries <- head(queries, n=10)
which <- GenomicRanges::GRanges(queries)
what <- "seq"
pileups_params <- Rsamtools::ApplyPileupsParam(which=which, what=what)
result <- Rsamtools::applyPileups(pileup_info, calcInfo, param=pileups_params)
result
pileup_files <- file_list(samples)
pileup_info <- Rsamtools::PileupFiles(pileup_files)
calcInfo <- function(x) {
info <- apply(x[["seq"]], 2, function(y) {
y <- y[c("A", "C", "G", "T"), , drop=FALSE]
y <- y + 1L ## continuity
cvg <- colSums(y)
})
retlist <- list(seqnames=x[["seqnames"]], pos=x[["pos"]], info=info)
return(retlist)
}
queries <- paste0(snp_dt[["species"]], "_",
snp_dt[["chromosome"]], ":", snp_dt[["position"]], "-",
as.numeric(snp_dt[["position"]]) + 1)
queries <- head(queries, n=10)
which <- GenomicRanges::GRanges(queries)
what <- "seq"
pileups_params <- Rsamtools::ApplyPileupsParam(which=which, what=what)
result <- Rsamtools::applyPileups(pileup_info, calcInfo, param=pileups_params)
result
make_cov_cols <- function(element) {
row <- element[["info"]][2, ]
names(row) <- samples
}
new_columns <- lapply(result, make_cov_cols)
new_columns
lapply(result, make_cov_cols)
element = result[[1]]
element
row <- element[["info"]][2, ]
row
new_dt <- NULL
make_cov_cols <- function(element) {
row <- element[["info"]][2, ]
names(row) <- samples
new_dt <<- rbind(new_dt, row)
}
new_columns <- lapply(result, make_cov_cols)
rownames(new_dt) <- head(snp_dt[["rownames"]], n=10)
new_dt
snp_dt[, c("species", "chromosome", "position", "original", "new") := tstrsplit(rownames, "_", fixed=TRUE)]
pileup_files <- file_list(samples)
pileup_info <- Rsamtools::PileupFiles(pileup_files)
calc_coverage <- function(x) {
qme <- function(y) {
y <- y[c("A", "C", "G", "T"), , drop=FALSE]
y <- y + 1L
result <- colSums(y)
return(result)
}
info <- apply(x[["seq"]], 2, qme)
retlist <- list(seqnames=x[["seqnames"]], pos=x[["pos"]], info=info)
return(retlist)
}
queries <- paste0(snp_dt[["species"]], "_",
snp_dt[["chromosome"]], ":",
as.numeric(snp_dt[["position"]]) - 1, "-",
as.numeric(snp_dt[["position"]]) + 1)
which <- GenomicRanges::GRanges(queries)
what <- "seq"
pileups_params <- Rsamtools::ApplyPileupsParam(which=which, what=what)
result <- Rsamtools::applyPileups(pileup_info, calc_coverage, param=pileups_params)
names(result) <- snp_dt[["rownames"]]
new_dt <- NULL
make_cov_cols <- function(element) {
row <- element[["info"]][2, ]
names(row) <- paste0(samples, "_cov")
new_dt <<- rbind(new_dt, row)
}
new_columns <- lapply(result, make_cov_cols)
new_dt <- as.data.table(new_dt)
new_dt[["rownames"]] <- snp_dt[["rownames"]]
final_dt <- merge(snp_dt, new_dt, by="rownames")
snp_dt[, c("species", "chromosome", "position", "original", "new") := tstrsplit(rownames, "_", fixed=TRUE)]
snp_file_list <- function(row, the_samples=samples, indir=input_dir, suffix=bam_suffix) {
filenames <- c()
for (sample_num in 1:length(the_samples)) {
sample <- the_samples[sample_num]
filename <- paste0(indir, "/", sample, suffix)
filenames <- append(filenames, filename)
}
return(filenames)
}
pileup_files <- file_list(samples)
pileup_info <- Rsamtools::PileupFiles(pileup_files)
queries <- paste0(snp_dt[["species"]], "_",
snp_dt[["chromosome"]], ":",
as.numeric(snp_dt[["position"]]) - 1, "-",
as.numeric(snp_dt[["position"]]) + 1)
which <- GenomicRanges::GRanges(queries)
what <- "seq"
pileups_params <- Rsamtools::ApplyPileupsParam(which=which, what=what)
snp_calc_coverage <- function(x) {
qme <- function(y) {
y <- y[c("A", "C", "G", "T"), , drop=FALSE]
y <- y + 1L
result <- colSums(y)
return(result)
}
info <- apply(x[["seq"]], 2, qme)
retlist <- list(seqnames=x[["seqnames"]], pos=x[["pos"]], info=info)
return(retlist)
}
result <- Rsamtools::applyPileups(pileup_info, snp_calc_coverage, param=pileups_params)
names(result) <- snp_dt[["rownames"]]
saveme(filename="rsamtools_result.rda.xz")
tt <- loadme(filename="rsamtools_result.rda.xz")
## Loading the savefile: /cbcb/nelsayed-scratch/atb/rnaseq/lpanamensis/savefiles/rsamtools_result.rda.xz
## Command run: load('/cbcb/nelsayed-scratch/atb/rnaseq/lpanamensis/savefiles/rsamtools_result.rda.xz', envir=globalenv())
col_data <- as.data.frame(des[, c("strain")])
row_data <- as.data.frame(des[, c("condition")])
colnames(col_data) <- c("strain")
colnames(row_data) <- c("condition")
myannot <- list(Col=list(data=col_data), Row=list(data=row_data))
##mylabels <- list(Row=list(nrow=nrow(row_data)), Col=list(nrow=nrow(row_data)))
mylabels <- list(Row=list(nrow=4), Col=list(nrow=4))
library(Heatplus)
map2 <- annHeatmap2(correlations,
cluster=list(cuth=1.0),
ann=myannot,
labels=mylabels)
plot(map2)
design <- parasite_expt$design
macrophage_design <- design[ design$experimentname == "macrophage", ]
macrophage_samples <- tolower(rownames(macrophage_design))
snp_macrophage <- snp_norm[, macrophage_samples]
infection_design <- design[ design$experimentname == "infection", ]
infection_samples <- tolower(rownames(infection_design))
snp_infection <- snp_norm[, infection_samples]
macro_correlations <- hpgl_cor(snp_macrophage)
rownames(macro_correlations) <- toupper(rownames(macro_correlations))
macro_distances <- as.matrix(dist(t(snp_macrophage)))
rownames(macro_distances) <- toupper(rownames(macro_distances))
## Make certain that all orders are maintained between the correlations and design!!!
design_corr <- merge(macrophage_design, correlations, by="row.names")
rownames(design_corr) <- design_corr$Row.names
design_corr <- design_corr[rownames(macro_correlations), ]
corr <- design_corr[, colnames(macro_correlations)]
des <- design_corr[, colnames(macrophage_design)]
col_data <- as.data.frame(des[, c("strain")])
colnames(col_data) <- c("strain")
row_data <- as.data.frame(des[, c("condition")])
colnames(row_data) <- c("condition")
myannot <- list(Col=list(data=col_data), Row=list(data=row_data))
mylabels <- list(Row=list(nrow=4), Col=list(nrow=4))
library(Heatplus)
cluster_list <- list(
"cuth" = 0.5,
"col" = c("#B3CDE3","#FBB4AE","#CCEBC5","#FFFFCC","#FDDAEC","#F2F2F2"))
map3 <- annHeatmap2(macro_correlations,
cluster=cluster_list,
ann=myannot,
labels=mylabels)
png(file="images/macrophage_only_snp.png")
plot(map3)
dev.off()
## png
## 2
infect_correlations <- hpgl_cor(snp_infection)
rownames(infect_correlations) <- toupper(rownames(infect_correlations))
infect_distances <- as.matrix(dist(t(snp_infection)))
rownames(infect_distances) <- toupper(rownames(infect_distances))
## Make certain that all orders are maintained between the correlations and design!!!
design_corr <- merge(infection_design, correlations, by="row.names")
rownames(design_corr) <- design_corr$Row.names
design_corr <- design_corr[rownames(infect_correlations), ]
corr <- design_corr[, colnames(infect_correlations)]
des <- design_corr[, colnames(infection_design)]
col_data <- as.data.frame(des[, c("strain")])
colnames(col_data) <- c("strain")
row_data <- as.data.frame(des[, c("condition")])
colnames(row_data) <- c("condition")
myannot <- list(Col=list(data=col_data), Row=list(data=row_data))
mylabels <- list(Row=list(nrow=4), Col=list(nrow=4))
#cluster_list <- list(
# "cuth" = 1.0)
cluster_list <- list(
"cuth" = 0.5,
"col" = c("#FBB4AE", "#B3CDE3", "#FFFFCC", "#FDDAEC", "#F2F2F2"))
library(Heatplus)
map4 <- annHeatmap2(infect_correlations,
cluster=cluster_list,
ann=myannot,
labels=mylabels)
plot(map4)
figure_colors <- function (n, name = "Pastel1") {
qualpal = subset(RColorBrewer::brewer.pal.info, category == "qual")
name = match.arg(name, rownames(qualpal))
nmax = qualpal[name, "maxcolors"]
cols = RColorBrewer::brewer.pal(nmax, name)
ndx = rep(1:nmax, length = n)
cols[ndx]
}
print(figure_colors(10))
## [1] "#FBB4AE" "#B3CDE3" "#CCEBC5" "#DECBE4" "#FED9A6" "#FFFFCC" "#E5D8BD" "#FDDAEC"
## [9] "#F2F2F2" "#FBB4AE"