1 Sample Estimation: 20180410

2 M. musculus sample estimation

3 Creating expressionset(s)

head(mm_annotv2$annotation)

##                    ensembl_transcript_id    ensembl_gene_id version transcript_version
## ENSMUST00000000001    ENSMUST00000000001 ENSMUSG00000000001       4                  4
## ENSMUST00000000003    ENSMUST00000000003 ENSMUSG00000000003      15                 13
## ENSMUST00000000010    ENSMUST00000000010 ENSMUSG00000020875       9                  8
## ENSMUST00000000028    ENSMUST00000000028 ENSMUSG00000000028      14                 13
## ENSMUST00000000033    ENSMUST00000000033 ENSMUSG00000048583      16                 11
## ENSMUST00000000049    ENSMUST00000000049 ENSMUSG00000000049      11                  5
##                    hgnc_symbol
## ENSMUST00000000001            
## ENSMUST00000000003            
## ENSMUST00000000010            
## ENSMUST00000000028            
## ENSMUST00000000033            
## ENSMUST00000000049            
##                                                                                                             description
## ENSMUST00000000001 guanine nucleotide binding protein (G protein), alpha inhibiting 3 [Source:MGI Symbol;Acc:MGI:95773]
## ENSMUST00000000003                                                         probasin [Source:MGI Symbol;Acc:MGI:1860484]
## ENSMUST00000000010                                                        homeobox B9 [Source:MGI Symbol;Acc:MGI:96190]
## ENSMUST00000000028                                           cell division cycle 45 [Source:MGI Symbol;Acc:MGI:1338073]
## ENSMUST00000000033                                       insulin-like growth factor 2 [Source:MGI Symbol;Acc:MGI:96434]
## ENSMUST00000000049                                                   apolipoprotein H [Source:MGI Symbol;Acc:MGI:88058]
##                      gene_biotype cds_length chromosome_name strand start_position
## ENSMUST00000000001 protein_coding       1065               3      -      108107280
## ENSMUST00000000003 protein_coding        525               X      -       77837901
## ENSMUST00000000010 protein_coding        753              11      +       96271457
## ENSMUST00000000028 protein_coding       1701              16      -       18780447
## ENSMUST00000000033 protein_coding        543               7      -      142650766
## ENSMUST00000000049 protein_coding       1038              11      +      108343354
##                    end_position
## ENSMUST00000000001    108146146
## ENSMUST00000000003     77853623
## ENSMUST00000000010     96276595
## ENSMUST00000000028     18811987
## ENSMUST00000000033    142666816
## ENSMUST00000000049    108414396

mm_expt <- create_expt(metadata="sample_sheets/all_samples_201804.xlsx",
                       gene_info=mm_annotv2_df,
                       file_column="mmusculusfile")

## Reading the sample metadata.

## The sample definitions comprises: 24, 22 rows, columns.

## Reading count tables.

## Reading salmon data with tximport.

## Finished reading count tables.

## Matched 49753 annotations and counts.

## Bringing together the count matrix and gene information.

## Some annotations were lost in merging, setting them to 'undefined'.

##                                                pro     ama          12hinf      12huninf       24dinf    24duninf
mm_expt <- set_expt_colors(expt=mm_expt, colors=c("gray", "darkgreen", "darkblue",
                                                  "dodgerblue", "darkred", "#EE9999"))

4 Visualizing raw data

This file was coped from 02_sample_estimation_lmajor_201814.Rmd, so it might be a bit repetitive, but I suspect that there will be more interesting things going on in the host data. I will be sad to not make ‘paranormal’ jokes in my variable names. Also, these colors suck, lets fix that now.

There are lots of toys we have learned to use to play with with raw data and explore stuff like batch effects or non-canonical distributions or skewed counts. hpgltools provides some functionality to make this process easier. The graphs shown below and many more are generated with the wrapper ‘graph_metrics()’ but that takes away the chance to explain the graphs as I generate them.

mm_expt <- exclude_genes_expt(expt=mm_expt, column="Type", method="keep", patterns=c("protein_coding"))

## The Type column is null, doing nothing.

mm_metrics <- graph_metrics(mm_expt)

## Graphing number of non-zero genes with respect to CPM by library.

## Graphing library sizes.

## The scale difference between the smallest and largest
## libraries is > 10. Assuming a log10 scale is better, set scale=FALSE if not.

## Graphing a boxplot.

## This data will benefit from being displayed on the log scale.

## If this is not desired, set scale='raw'

## Some entries are 0.  We are on log scale, adding 1 to the data.

## Changed 984972 zero count features.

## Graphing a correlation heatmap.

## Graphing a standard median correlation.

## Performing correlation.

## Graphing a distance heatmap.

## Graphing a standard median distance.

## Performing distance.

## Graphing a PCA plot.

## Graphing a T-SNE plot.

## Plotting a density plot.

## This data will benefit from being displayed on the log scale.

## If this is not desired, set scale='raw'

## Some entries are 0.  We are on log scale, setting them to 0.5.

## Changed 984972 zero count features.

## Plotting the representation of the top-n genes.

## Printing a color to condition legend.

mm_norm <- normalize_expt(mm_expt, filter=TRUE, norm="quant", convert="cpm", transform="log2")

## This function will replace the expt$expressionset slot with:

## log2(cpm(quant(hpgl(data))))

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.

## Step 1: performing count filter with option: hpgl

## Removing 34796 low-count genes (21024 remaining).

## Step 2: normalizing the data with quant.

## Step 3: converting the data with cpm.

## Step 4: transforming the data with log2.

## transform_counts: Found 46655 values equal to 0, adding 1 to the matrix.

## Step 5: not doing batch correction.

mm_normmetrics <- graph_metrics(mm_norm)

## Graphing number of non-zero genes with respect to CPM by library.

## Graphing library sizes.

## Graphing a boxplot.

## Graphing a correlation heatmap.

## Graphing a standard median correlation.

## Performing correlation.

## Graphing a distance heatmap.

## Graphing a standard median distance.

## Performing distance.

## Graphing a PCA plot.

## Graphing a T-SNE plot.

## Plotting a density plot.

## Plotting the representation of the top-n genes.

## Printing a color to condition legend.

4.1 Look at metrics

Ok, so this time I expect to see very few reads in the promastigote/amastigote samples.

Is this true?

mm_metrics$legend

## pink and green are the uninfected samples
mm_metrics$libsize

## Still _WAY_ more reads than ideal for promastigote/amastigote.
## I wonder if this is not an artifact of me using salmon.

mm_normmetrics$pcaplot

## It looks to me that the primary principle component is coverage?
## If it is the only factor, when I remove the pro/ama samples, then 1075
## should stick way out to the side with the other samples appearing in order:
## 1076, 1085, 1092, 1083, 1077, 1073, ...
## That does not seem likely.

4.2 Drop extracellular samples

Now lets get rid of the uninfected samples and try again and see if I am right.

mm_only_expt <- subset_expt(mm_expt, subset="hostp=='yes'")

## There were 24, now there are 16 samples.

mm_only_metrics <- graph_metrics(mm_only_expt)

## Graphing number of non-zero genes with respect to CPM by library.

## Graphing library sizes.

## Graphing a boxplot.

## This data will benefit from being displayed on the log scale.

## If this is not desired, set scale='raw'

## Some entries are 0.  We are on log scale, adding 1 to the data.

## Changed 571038 zero count features.

## Graphing a correlation heatmap.

## Graphing a standard median correlation.

## Performing correlation.

## Graphing a distance heatmap.

## Graphing a standard median distance.

## Performing distance.

## Graphing a PCA plot.

## Graphing a T-SNE plot.

## Plotting a density plot.

## This data will benefit from being displayed on the log scale.

## If this is not desired, set scale='raw'

## Some entries are 0.  We are on log scale, setting them to 0.5.

## Changed 571038 zero count features.

## Plotting the representation of the top-n genes.

## Printing a color to condition legend.

mm_only_norm <- normalize_expt(mm_only_expt, transform="log2", convert="cpm",
                               norm="quant", filter=TRUE)

## This function will replace the expt$expressionset slot with:

## log2(cpm(quant(hpgl(data))))

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.

## Step 1: performing count filter with option: hpgl

## Removing 35767 low-count genes (20053 remaining).

## Step 2: normalizing the data with quant.

## Step 3: converting the data with cpm.

## Step 4: transforming the data with log2.

## transform_counts: Found 39120 values equal to 0, adding 1 to the matrix.

## Step 5: not doing batch correction.

mm_nonly_metrics <- graph_metrics(mm_only_norm)

## Graphing number of non-zero genes with respect to CPM by library.

## Graphing library sizes.

## Graphing a boxplot.

## Graphing a correlation heatmap.

## Graphing a standard median correlation.

## Performing correlation.

## Graphing a distance heatmap.

## Graphing a standard median distance.

## Performing distance.

## Graphing a PCA plot.

## Graphing a T-SNE plot.

## Plotting a density plot.

## Plotting the representation of the top-n genes.

## Printing a color to condition legend.

mm_only_batch <- normalize_expt(mm_only_expt, transform="log2", convert="cpm",
                                norm="quant", filter=TRUE, batch="svaseq")

## This function will replace the expt$expressionset slot with:

## log2(svaseq(cpm(quant(hpgl(data)))))

## It backs up the current data into a slot named:
##  expt$backup_expressionset. It will also save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep the libsizes in mind
##  when invoking limma.  The appropriate libsize is the non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize

## Warning in normalize_expt(mm_only_expt, transform = "log2", convert = "cpm", : Quantile
## normalization and sva do not always play well together.

## Step 1: performing count filter with option: hpgl

## Removing 35767 low-count genes (20053 remaining).

## Step 2: normalizing the data with quant.

## Step 3: converting the data with cpm.

## Step 4: transforming the data with log2.

## transform_counts: Found 39120 values equal to 0, adding 1 to the matrix.

## Step 5: doing batch correction with svaseq.

## In norm_batch, after testing logic of surrogate method/number, the
## number of surrogates is:  and the method is: be.

## Note to self:  If you get an error like 'x contains missing values'; I think this
##  means that the data has too many 0's and needs to have a better low-count filter applied.

## batch_counts: Before batch correction, 49686 entries 0<x<1.

## batch_counts: Before batch correction, 39120 entries are >= 0.

## After checking/setting the number of surrogates, it is: 4.

## batch_counts: Using sva::svaseq for batch correction.

## Note to self:  If you feed svaseq a data frame you will get an error like:

## data %*% (Id - mod %*% blah blah requires numeric/complex arguments.

## The number of elements which are < 0 after batch correction is: 12079

## The variable low_to_zero sets whether to change <0 values to 0 and is: FALSE

mm_bonly_metrics <- graph_metrics(mm_only_batch)

## Graphing number of non-zero genes with respect to CPM by library.

## Graphing library sizes.

## Graphing a boxplot.

## Graphing a correlation heatmap.

## Graphing a standard median correlation.

## Performing correlation.

## Graphing a distance heatmap.

## Graphing a standard median distance.

## Performing distance.

## Graphing a PCA plot.

## Graphing a T-SNE plot.

## Plotting a density plot.

## Plotting the representation of the top-n genes.

## Printing a color to condition legend.

4.3 Check coverage

I added a column to the sample design where bigger coverages (according to the libsize plot above get bigger numbers.

cov_test <- plot_pca(mm_only_norm, size_column="mmcoverage")
cov_test$plot

## Phew, that is good, no obvious clustering by coverage.

mm_only_metrics$libsize

mm_only_metrics$density

mm_nonly_metrics$corheat

mm_bonly_metrics$corheat

mm_bonly_metrics$pcaplot

mm_only_metrics$tsneplot

test <- pca_information(mm_only_expt,
                        expt_factors=c("mmcoverage", "time", "parasitesp", "condition"))

## More shallow curves in these plots suggest more genes in this principle component.

test$cor_heatmap

mm_varpart <- varpart(mm_only_expt, predictor=NULL,
                      factors=c("time", "parasitesp", "batch", "coveragefactor"))

## Attempting mixed linear model with: ~  (1|time) + (1|parasitesp) + (1|batch) + (1|coveragefactor)

## Fitting the expressionset to the model, this is slow.

## Projected run time: ~ 45 min

## Placing factor: time at the beginning of the model.

mm_varpart$partition_plot

5 Oh, wait these are transcripts!

I need to redo this from the perspective of genes and see if these patterns hold.

pander::pander(sessionInfo())

R version 3.4.4 (2018-03-15)

**Platform:** x86_64-pc-linux-gnu (64-bit)

locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C

attached base packages: stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: hpgltools(v.2018.03)

loaded via a namespace (and not attached): Biobase(v.2.38.0), edgeR(v.3.20.9), bit64(v.0.9-7), splines(v.3.4.4), foreach(v.1.4.4), gtools(v.3.5.0), stats4(v.3.4.4), pander(v.0.6.1), blob(v.1.1.0), yaml(v.2.1.18), ggrepel(v.0.7.0), pillar(v.1.2.1), RSQLite(v.2.0), backports(v.1.1.2), lattice(v.0.20-35), limma(v.3.34.9), quadprog(v.1.5-5), digest(v.0.6.15), RColorBrewer(v.1.1-2), minqa(v.1.2.4), colorspace(v.1.3-2), preprocessCore(v.1.40.0), htmltools(v.0.3.6), Matrix(v.1.2-12), plyr(v.1.8.4), XML(v.3.98-1.10), pkgconfig(v.2.0.1), devtools(v.1.13.5), genefilter(v.1.60.0), xtable(v.1.8-2), corpcor(v.1.6.9), scales(v.0.5.0.9000), gdata(v.2.18.0), Rtsne(v.0.13), openxlsx(v.4.0.17), BiocParallel(v.1.12.0), lme4(v.1.1-15), annotate(v.1.56.1), tibble(v.1.4.2), mgcv(v.1.8-23), IRanges(v.2.12.0), ggplot2(v.2.2.1), withr(v.2.1.2), BiocGenerics(v.0.24.0), lazyeval(v.0.2.1), pbkrtest(v.0.4-7), survival(v.2.41-3), magrittr(v.1.5), memoise(v.1.1.0), evaluate(v.0.10.1), doParallel(v.1.0.11), nlme(v.3.1-131.1), MASS(v.7.3-49), gplots(v.3.0.1), xml2(v.1.2.0), tools(v.3.4.4), directlabels(v.2017.03.31), data.table(v.1.10.4-3), hms(v.0.4.2), matrixStats(v.0.53.1), stringr(v.1.3.0), S4Vectors(v.0.16.0), locfit(v.1.5-9.1), munsell(v.0.4.3), AnnotationDbi(v.1.40.0), colorRamps(v.2.3), compiler(v.3.4.4), caTools(v.1.17.1), rlang(v.0.2.0.9001), RCurl(v.1.95-4.10), grid(v.3.4.4), nloptr(v.1.0.4), iterators(v.1.0.9), tximport(v.1.6.0), rjson(v.0.2.15), labeling(v.0.3), bitops(v.1.0-6), base64enc(v.0.1-3), rmarkdown(v.1.9), variancePartition(v.1.8.1), gtable(v.0.2.0), codetools(v.0.2-15), DBI(v.0.8), roxygen2(v.6.0.1), reshape2(v.1.4.3), R6(v.2.2.2), knitr(v.1.20), bit(v.1.1-12), commonmark(v.1.4), rprojroot(v.1.3-2), KernSmooth(v.2.23-15), readr(v.1.1.1), stringi(v.1.1.7), sva(v.3.26.0), parallel(v.3.4.4) and Rcpp(v.0.12.16)

this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))

## Saving to 02_sample_estimation_mmusculus_201804-v20180410.rda.xz

tt <- sm(saveme(filename=this_save))

LS0tCnRpdGxlOiAiTC4gbWFqb3IgMjAxNzogc2FtcGxlIGVzdGltYXRpb24gb2YgbmV3IE1vdXNlIHNhbXBsZXMgKDIwMTgwMykuIgphdXRob3I6ICJhdGIgYWJlbGV3QGdtYWlsLmNvbSIKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiBodG1sX2RvY3VtZW50OgogIGNvZGVfZG93bmxvYWQ6IHRydWUKICBjb2RlX2ZvbGRpbmc6IHNob3cKICBmaWdfY2FwdGlvbjogdHJ1ZQogIGZpZ19oZWlnaHQ6IDcKICBmaWdfd2lkdGg6IDcKICBoaWdobGlnaHQ6IGRlZmF1bHQKICBrZWVwX21kOiBmYWxzZQogIG1vZGU6IHNlbGZjb250YWluZWQKICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICBzZWxmX2NvbnRhaW5lZDogdHJ1ZQogIHRoZW1lOiByZWFkYWJsZQogIHRvYzogdHJ1ZQogIHRvY19mbG9hdDoKICAgIGNvbGxhcHNlZDogZmFsc2UKICAgIHNtb290aF9zY3JvbGw6IGZhbHNlCi0tLQoKPHN0eWxlPgogIGJvZHkgLm1haW4tY29udGFpbmVyIHsKICAgIG1heC13aWR0aDogMTYwMHB4OwogIH0KPC9zdHlsZT4KCmBgYHtyIG9wdGlvbnMsIGluY2x1ZGU9RkFMU0V9CmlmICghaXNUUlVFKGdldDAoInNraXBfbG9hZCIpKSkgewogIGxpYnJhcnkoaHBnbHRvb2xzKQogIHR0IDwtIGRldnRvb2xzOjpsb2FkX2FsbCgifi9ocGdsdG9vbHMiKQogIGtuaXRyOjpvcHRzX2tuaXQkc2V0KHByb2dyZXNzPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgICAgdmVyYm9zZT1UUlVFLAogICAgICAgICAgICAgICAgICAgICAgIHdpZHRoPTkwLAogICAgICAgICAgICAgICAgICAgICAgIGVjaG89VFJVRSkKICBrbml0cjo6b3B0c19jaHVuayRzZXQoZXJyb3I9VFJVRSwKICAgICAgICAgICAgICAgICAgICAgICAgZmlnLndpZHRoPTgsCiAgICAgICAgICAgICAgICAgICAgICAgIGZpZy5oZWlnaHQ9OCwKICAgICAgICAgICAgICAgICAgICAgICAgZHBpPTk2KQogIG9sZF9vcHRpb25zIDwtIG9wdGlvbnMoZGlnaXRzPTQsCiAgICAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzPUZBTFNFLAogICAgICAgICAgICAgICAgICAgICAgICAga25pdHIuZHVwbGljYXRlLmxhYmVsPSJhbGxvdyIpCiAgZ2dwbG90Mjo6dGhlbWVfc2V0KGdncGxvdDI6OnRoZW1lX2J3KGJhc2Vfc2l6ZT0xMCkpCiAgdmVyIDwtICIyMDE4MDQxMCIKICBwcmV2aW91c19maWxlIDwtICIwMV9hbm5vdGF0aW9uX21tdXNjdWx1c18yMDE4MDQuUm1kIgoKICB0bXAgPC0gdHJ5KHNtKGxvYWRtZShmaWxlbmFtZT1wYXN0ZTAoZ3N1YihwYXR0ZXJuPSJcXC5SbWQiLCByZXBsYWNlPSIiLCB4PXByZXZpb3VzX2ZpbGUpLCAiLXYiLCB2ZXIsICIucmRhLnh6IikpKSkKICBybWRfZmlsZSA8LSAiMDJfc2FtcGxlX2VzdGltYXRpb25fbW11c2N1bHVzXzIwMTgwNC5SbWQiCn0KYGBgCgojIFNhbXBsZSBFc3RpbWF0aW9uOiBgciB2ZXJgCgpNLiBtdXNjdWx1cyBzYW1wbGUgZXN0aW1hdGlvbgo9PT09PT09PT09PT09PT09PT09PT09PT09PT09PQoKIyBDcmVhdGluZyBleHByZXNzaW9uc2V0KHMpCgpgYGB7ciBjcmVhdGVfZXhwdH0KaGVhZChtbV9hbm5vdHYyJGFubm90YXRpb24pCgptbV9leHB0IDwtIGNyZWF0ZV9leHB0KG1ldGFkYXRhPSJzYW1wbGVfc2hlZXRzL2FsbF9zYW1wbGVzXzIwMTgwNC54bHN4IiwKICAgICAgICAgICAgICAgICAgICAgICBnZW5lX2luZm89bW1fYW5ub3R2Ml9kZiwKICAgICAgICAgICAgICAgICAgICAgICBmaWxlX2NvbHVtbj0ibW11c2N1bHVzZmlsZSIpCiMjICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcHJvICAgICBhbWEgICAgICAgICAgMTJoaW5mICAgICAgMTJodW5pbmYgICAgICAgMjRkaW5mICAgIDI0ZHVuaW5mCm1tX2V4cHQgPC0gc2V0X2V4cHRfY29sb3JzKGV4cHQ9bW1fZXhwdCwgY29sb3JzPWMoImdyYXkiLCAiZGFya2dyZWVuIiwgImRhcmtibHVlIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiZG9kZ2VyYmx1ZSIsICJkYXJrcmVkIiwgIiNFRTk5OTkiKSkKYGBgCgojIFZpc3VhbGl6aW5nIHJhdyBkYXRhCgpUaGlzIGZpbGUgd2FzIGNvcGVkIGZyb20gMDJfc2FtcGxlX2VzdGltYXRpb25fbG1ham9yXzIwMTgxNC5SbWQsIHNvIGl0IG1pZ2h0IGJlCmEgYml0IHJlcGV0aXRpdmUsIGJ1dCBJIHN1c3BlY3QgdGhhdCB0aGVyZSB3aWxsIGJlIG1vcmUgaW50ZXJlc3RpbmcgdGhpbmdzIGdvaW5nCm9uIGluIHRoZSBob3N0IGRhdGEuICBJIHdpbGwgYmUgc2FkIHRvIG5vdCBtYWtlICdwYXJhbm9ybWFsJyBqb2tlcyBpbiBteQp2YXJpYWJsZSBuYW1lcy4gIEFsc28sIHRoZXNlIGNvbG9ycyBzdWNrLCBsZXRzIGZpeCB0aGF0IG5vdy4KClRoZXJlIGFyZSBsb3RzIG9mIHRveXMgd2UgaGF2ZSBsZWFybmVkIHRvIHVzZSB0byBwbGF5IHdpdGggd2l0aCByYXcgZGF0YSBhbmQgZXhwbG9yZSBzdHVmZiBsaWtlCmJhdGNoIGVmZmVjdHMgb3Igbm9uLWNhbm9uaWNhbCBkaXN0cmlidXRpb25zIG9yIHNrZXdlZCBjb3VudHMuICBocGdsdG9vbHMgcHJvdmlkZXMgc29tZSBmdW5jdGlvbmFsaXR5CnRvIG1ha2UgdGhpcyBwcm9jZXNzIGVhc2llci4gIFRoZSBncmFwaHMgc2hvd24gYmVsb3cgYW5kIG1hbnkgbW9yZSBhcmUgZ2VuZXJhdGVkIHdpdGggdGhlIHdyYXBwZXIKJ2dyYXBoX21ldHJpY3MoKScgYnV0IHRoYXQgdGFrZXMgYXdheSB0aGUgY2hhbmNlIHRvIGV4cGxhaW4gdGhlIGdyYXBocyBhcyBJIGdlbmVyYXRlIHRoZW0uCgpgYGB7ciByYXdfZXhwbG9yZSwgZmlnLnNob3c9ImhpZGUifQptbV9leHB0IDwtIGV4Y2x1ZGVfZ2VuZXNfZXhwdChleHB0PW1tX2V4cHQsIGNvbHVtbj0iVHlwZSIsIG1ldGhvZD0ia2VlcCIsIHBhdHRlcm5zPWMoInByb3RlaW5fY29kaW5nIikpCm1tX21ldHJpY3MgPC0gZ3JhcGhfbWV0cmljcyhtbV9leHB0KQptbV9ub3JtIDwtIG5vcm1hbGl6ZV9leHB0KG1tX2V4cHQsIGZpbHRlcj1UUlVFLCBub3JtPSJxdWFudCIsIGNvbnZlcnQ9ImNwbSIsIHRyYW5zZm9ybT0ibG9nMiIpCm1tX25vcm1tZXRyaWNzIDwtIGdyYXBoX21ldHJpY3MobW1fbm9ybSkKYGBgCgojIyBMb29rIGF0IG1ldHJpY3MKCk9rLCBzbyB0aGlzIHRpbWUgSSBleHBlY3QgdG8gc2VlIHZlcnkgZmV3IHJlYWRzIGluIHRoZSBwcm9tYXN0aWdvdGUvYW1hc3RpZ290ZQpzYW1wbGVzLgoKSXMgdGhpcyB0cnVlPwoKYGBge3Igc2hvd19wbG90c30KbW1fbWV0cmljcyRsZWdlbmQKIyMgcGluayBhbmQgZ3JlZW4gYXJlIHRoZSB1bmluZmVjdGVkIHNhbXBsZXMKbW1fbWV0cmljcyRsaWJzaXplCiMjIFN0aWxsIF9XQVlfIG1vcmUgcmVhZHMgdGhhbiBpZGVhbCBmb3IgcHJvbWFzdGlnb3RlL2FtYXN0aWdvdGUuCiMjIEkgd29uZGVyIGlmIHRoaXMgaXMgbm90IGFuIGFydGlmYWN0IG9mIG1lIHVzaW5nIHNhbG1vbi4KCm1tX25vcm1tZXRyaWNzJHBjYXBsb3QKIyMgSXQgbG9va3MgdG8gbWUgdGhhdCB0aGUgcHJpbWFyeSBwcmluY2lwbGUgY29tcG9uZW50IGlzIGNvdmVyYWdlPwojIyBJZiBpdCBpcyB0aGUgb25seSBmYWN0b3IsIHdoZW4gSSByZW1vdmUgdGhlIHByby9hbWEgc2FtcGxlcywgdGhlbiAxMDc1CiMjIHNob3VsZCBzdGljayB3YXkgb3V0IHRvIHRoZSBzaWRlIHdpdGggdGhlIG90aGVyIHNhbXBsZXMgYXBwZWFyaW5nIGluIG9yZGVyOgojIyAxMDc2LCAxMDg1LCAxMDkyLCAxMDgzLCAxMDc3LCAxMDczLCAuLi4KIyMgVGhhdCBkb2VzIG5vdCBzZWVtIGxpa2VseS4KYGBgCgojIyBEcm9wIGV4dHJhY2VsbHVsYXIgc2FtcGxlcwoKTm93IGxldHMgZ2V0IHJpZCBvZiB0aGUgdW5pbmZlY3RlZCBzYW1wbGVzIGFuZCB0cnkgYWdhaW4gYW5kIHNlZSBpZiBJIGFtIHJpZ2h0LgoKYGBge3IgZHJvcF9leHRyYWNlbGx1bGFyLCBmaWcuc2hvdz0iaGlkZSJ9Cm1tX29ubHlfZXhwdCA8LSBzdWJzZXRfZXhwdChtbV9leHB0LCBzdWJzZXQ9Imhvc3RwPT0neWVzJyIpCm1tX29ubHlfbWV0cmljcyA8LSBncmFwaF9tZXRyaWNzKG1tX29ubHlfZXhwdCkKbW1fb25seV9ub3JtIDwtIG5vcm1hbGl6ZV9leHB0KG1tX29ubHlfZXhwdCwgdHJhbnNmb3JtPSJsb2cyIiwgY29udmVydD0iY3BtIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5vcm09InF1YW50IiwgZmlsdGVyPVRSVUUpCm1tX25vbmx5X21ldHJpY3MgPC0gZ3JhcGhfbWV0cmljcyhtbV9vbmx5X25vcm0pCm1tX29ubHlfYmF0Y2ggPC0gbm9ybWFsaXplX2V4cHQobW1fb25seV9leHB0LCB0cmFuc2Zvcm09ImxvZzIiLCBjb252ZXJ0PSJjcG0iLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5vcm09InF1YW50IiwgZmlsdGVyPVRSVUUsIGJhdGNoPSJzdmFzZXEiKQptbV9ib25seV9tZXRyaWNzIDwtIGdyYXBoX21ldHJpY3MobW1fb25seV9iYXRjaCkKYGBgCgojIyBDaGVjayBjb3ZlcmFnZQoKSSBhZGRlZCBhIGNvbHVtbiB0byB0aGUgc2FtcGxlIGRlc2lnbiB3aGVyZSBiaWdnZXIgY292ZXJhZ2VzIChhY2NvcmRpbmcgdG8gdGhlCmxpYnNpemUgcGxvdCBhYm92ZSBnZXQgYmlnZ2VyIG51bWJlcnMuCgpgYGB7ciBwY2FfY292ZXJhZ2V9CmNvdl90ZXN0IDwtIHBsb3RfcGNhKG1tX29ubHlfbm9ybSwgc2l6ZV9jb2x1bW49Im1tY292ZXJhZ2UiKQpjb3ZfdGVzdCRwbG90CiMjIFBoZXcsIHRoYXQgaXMgZ29vZCwgbm8gb2J2aW91cyBjbHVzdGVyaW5nIGJ5IGNvdmVyYWdlLgpgYGAKCgpgYGB7ciBsb29rX21ldHJpY3N9Cm1tX29ubHlfbWV0cmljcyRsaWJzaXplCgptbV9vbmx5X21ldHJpY3MkZGVuc2l0eQoKbW1fbm9ubHlfbWV0cmljcyRjb3JoZWF0Cm1tX2Jvbmx5X21ldHJpY3MkY29yaGVhdAoKbW1fYm9ubHlfbWV0cmljcyRwY2FwbG90Cm1tX29ubHlfbWV0cmljcyR0c25lcGxvdApgYGAKCmBgYHtyIHBjYV9pbmZvfQp0ZXN0IDwtIHBjYV9pbmZvcm1hdGlvbihtbV9vbmx5X2V4cHQsCiAgICAgICAgICAgICAgICAgICAgICAgIGV4cHRfZmFjdG9ycz1jKCJtbWNvdmVyYWdlIiwgInRpbWUiLCAicGFyYXNpdGVzcCIsICJjb25kaXRpb24iKSkKdGVzdCRjb3JfaGVhdG1hcApgYGAKCmBgYHtyIHZhcnBhcnR9Cm1tX3ZhcnBhcnQgPC0gdmFycGFydChtbV9vbmx5X2V4cHQsIHByZWRpY3Rvcj1OVUxMLAogICAgICAgICAgICAgICAgICAgICAgZmFjdG9ycz1jKCJ0aW1lIiwgInBhcmFzaXRlc3AiLCAiYmF0Y2giLCAiY292ZXJhZ2VmYWN0b3IiKSkKbW1fdmFycGFydCRwYXJ0aXRpb25fcGxvdApgYGAKCiMgT2gsIHdhaXQgdGhlc2UgYXJlIHRyYW5zY3JpcHRzIQoKSSBuZWVkIHRvIHJlZG8gdGhpcyBmcm9tIHRoZSBwZXJzcGVjdGl2ZSBvZiBnZW5lcyBhbmQgc2VlIGlmIHRoZXNlIHBhdHRlcm5zIGhvbGQuCgpgYGB7ciBzYXZlbWV9CnBhbmRlcjo6cGFuZGVyKHNlc3Npb25JbmZvKCkpCnRoaXNfc2F2ZSA8LSBwYXN0ZTAoZ3N1YihwYXR0ZXJuPSJcXC5SbWQiLCByZXBsYWNlPSIiLCB4PXJtZF9maWxlKSwgIi12IiwgdmVyLCAiLnJkYS54eiIpCm1lc3NhZ2UocGFzdGUwKCJTYXZpbmcgdG8gIiwgdGhpc19zYXZlKSkKdHQgPC0gc20oc2F2ZW1lKGZpbGVuYW1lPXRoaXNfc2F2ZSkpCmBgYAo=

L. major 2017: sample estimation of new Mouse samples (201803).

atb abelew@gmail.com

2018-04-16