1 Extract some count tables

I will use this sheet for exporting data to collaborators. Currently, that is only the folks interested in doing some analyses with the first PLOS NTD paper.

It is worth noting that I recently found some errors in my accounting for these and the plosntd2 samples (I had the pathogen strains reversed).

hs_annot <- load_biomart_annotations()$annotation

## The biomart annotations file already exists, loading from it.

rownames(hs_annot) <- make.names(
  paste0(hs_annot[["ensembl_transcript_id"]], ".",
         hs_annot[["transcript_version"]]),
  unique=TRUE)
hs_tx_gene <- hs_annot[, c("ensembl_gene_id", "ensembl_transcript_id")]
hs_tx_gene[["id"]] <- rownames(hs_tx_gene)
hs_tx_gene <- hs_tx_gene[, c("id", "ensembl_gene_id")]
new_hs_annot <- hs_annot
rownames(new_hs_annot) <- make.names(hs_annot[["ensembl_gene_id"]], unique=TRUE)

sample_sheet <- "sample_sheets/UMD_leishmania_host_metasheet_20200227.xlsx"
## As of 20200222, I have only performed hisat2 mapping for the plos ntd2 data.
prefix <- "excel/plos_ntd_host_hisat2-v"
plosntd1_expt <- create_expt(sample_sheet,
                             file_column="hsapienshisatfile",
                             gene_info=new_hs_annot,
                             savefile=glue::glue("{prefix}{ver}.rda"))

## Reading the sample metadata.

## The sample definitions comprises: 441 rows(samples) and 68 columns(metadata fields).

## Reading count tables.

## Reading count tables with read.table().

## /mnt/sshfs/cbcbsub/fs/cbcb-lab/nelsayed/scratch/atb/rnaseq/multiple_leishmania_2018/preprocessing/hpgl1108/outputs/hisat2_hg38_91/E1.count.xz contains 58307 rows.

## preprocessing/hpgl1109/outputs/hisat2_hg38_91/E2.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1110/outputs/hisat2_hg38_91/E3.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1111/outputs/hisat2_hg38_91/E4.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1112/outputs/hisat2_hg38_91/E5.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1113/outputs/hisat2_hg38_91/E6.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1114/outputs/hisat2_hg38_91/E7.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1115/outputs/hisat2_hg38_91/E8.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1116/outputs/hisat2_hg38_91/L1.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1117/outputs/hisat2_hg38_91/L2.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1118/outputs/hisat2_hg38_91/L3.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1119/outputs/hisat2_hg38_91/L4.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1120/outputs/hisat2_hg38_91/L5.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1121/outputs/hisat2_hg38_91/L6.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1122/outputs/hisat2_hg38_91/L7.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1123/outputs/hisat2_hg38_91/L8.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1124/outputs/hisat2_hg38_91/L9.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1125/outputs/hisat2_hg38_91/L10.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1126/outputs/hisat2_hg38_91/L11.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1127/outputs/hisat2_hg38_91/L12.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1128/outputs/hisat2_hg38_91/L13.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1129/outputs/hisat2_hg38_91/L14.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1130/outputs/hisat2_hg38_91/L15.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1131/outputs/hisat2_hg38_91/L16.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1132/outputs/hisat2_hg38_91/L17.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1133/outputs/hisat2_hg38_91/N1.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1134/outputs/hisat2_hg38_91/N2.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1135/outputs/hisat2_hg38_91/N3.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1136/outputs/hisat2_hg38_91/N4.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1137/outputs/hisat2_hg38_91/N5.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1138/outputs/hisat2_hg38_91/N6.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1139/outputs/hisat2_hg38_91/N7.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1140/outputs/hisat2_hg38_91/N8.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1141/outputs/hisat2_hg38_91/N9.count.xz contains 58307 rows and merges to 58307 rows.

## preprocessing/hpgl1142/outputs/hisat2_hg38_91/N10.count.xz contains 58307 rows and merges to 58307 rows.

## Finished reading count tables.

## Matched 57645 annotations and counts.

## Bringing together the count matrix and gene information.

## Some annotations were lost in merging, setting them to 'undefined'.

## The final expressionset has 58302 rows and 35 columns.

written_csv <- readr::write_csv(x=as.data.frame(exprs(plosntd1_expt)),
                                path=glue::glue("{prefix}{ver}.csv"))
written_xls <- write_expt(plosntd1_expt,
                          excel=glue::glue("{prefix}{ver}.xlsx"))

## Writing the first sheet, containing a legend and some summary data.

## Writing the raw reads.

## Graphing the raw reads.

## varpart sees only 1 batch, adjusting the model accordingly.

## Attempting mixed linear model with: ~  (1|condition)

## Fitting the expressionset to the model, this is slow.

## A couple of common errors:
## An error like 'vtv downdated' may be because there are too many 0s, filter the data and rerun.
## An error like 'number of levels of each grouping factor must be < number of observations' means
## that the factor used is not appropriate for the analysis - it really only works for factors
## which are shared among multiple samples.

## Retrying with only condition in the model.

## Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
##   contrasts can be applied only to factors with 2 or more levels

## Attempting again with only condition failed.

## Error in simple_varpart(expt, predictor = NULL, factors = c("condition",  : 
##

## Writing the normalized reads.

## Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
##   contrasts can be applied only to factors with 2 or more levels

## Graphing the normalized reads.

## varpart sees only 1 batch, adjusting the model accordingly.

## Attempting mixed linear model with: ~  (1|condition)

## Fitting the expressionset to the model, this is slow.

## A couple of common errors:
## An error like 'vtv downdated' may be because there are too many 0s, filter the data and rerun.
## An error like 'number of levels of each grouping factor must be < number of observations' means
## that the factor used is not appropriate for the analysis - it really only works for factors
## which are shared among multiple samples.

## Retrying with only condition in the model.

## Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
##   contrasts can be applied only to factors with 2 or more levels

## Attempting again with only condition failed.

## Error in simple_varpart(norm_data, predictor = NULL, factors = c("condition",  : 
##

## Writing the median reads by factor.

## Don't forget, using salmon, _every_ sample will be included!
salmon_prefix <- "excel/plos_ntd_host_salmon-v"
salmon_plosntd1_expt <- create_expt(sample_sheet,
                                    file_column="hsapiensfile",
                                    gene_info=new_hs_annot,
                                    tx_gene_map=hs_tx_gene,
                                    savefile=glue::glue("{salmon_prefix}{ver}.rda"))

## Reading the sample metadata.

## The sample definitions comprises: 441 rows(samples) and 68 columns(metadata fields).

## Reading count tables.

## Using the transcript to gene mapping.

## Reading salmon data with tximport.

## Finished reading count tables.

## Matched 19629 annotations and counts.

## Bringing together the count matrix and gene information.

## The mapped IDs are not the rownames of your gene information, changing them now.

## Some annotations were lost in merging, setting them to 'undefined'.

## Warning in create_expt(sample_sheet, file_column = "hsapiensfile",
## gene_info = new_hs_annot, : The following samples have no counts!
## hpgl0721hpgl0722hpgl0723hpgl0724

## The final expressionset has 19629 rows and 271 columns.

salmon_plosntd1_expt <- subset_expt(salmon_plosntd1_expt, subset="study=='plosntd1'")

## Using a subset expression.

## There were 271, now there are 35 samples.

save_result <- try(save(salmon_plosntd1_expt, file=paste0(salmon_prefix, ver, ".rda")))

salmon_written_csv <- readr::write_csv(x=as.data.frame(exprs(salmon_plosntd1_expt)),
                                       path=glue::glue("{salmon_prefix}{ver}.csv"))
salmon_written_xls <- write_expt(salmon_plosntd1_expt,
                                 excel=glue::glue("{salmon_prefix}{ver}.xlsx"))

## Writing the first sheet, containing a legend and some summary data.

## Writing the raw reads.

## Graphing the raw reads.

## varpart sees only 1 batch, adjusting the model accordingly.

## Attempting mixed linear model with: ~  (1|condition)

## Fitting the expressionset to the model, this is slow.

## A couple of common errors:
## An error like 'vtv downdated' may be because there are too many 0s, filter the data and rerun.
## An error like 'number of levels of each grouping factor must be < number of observations' means
## that the factor used is not appropriate for the analysis - it really only works for factors
## which are shared among multiple samples.

## Retrying with only condition in the model.

## Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
##   contrasts can be applied only to factors with 2 or more levels

## Attempting again with only condition failed.

## Error in simple_varpart(expt, predictor = NULL, factors = c("condition",  : 
##

## Writing the normalized reads.

## Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
##   contrasts can be applied only to factors with 2 or more levels

## Graphing the normalized reads.

## varpart sees only 1 batch, adjusting the model accordingly.

## Attempting mixed linear model with: ~  (1|condition)

## Fitting the expressionset to the model, this is slow.

## A couple of common errors:
## An error like 'vtv downdated' may be because there are too many 0s, filter the data and rerun.
## An error like 'number of levels of each grouping factor must be < number of observations' means
## that the factor used is not appropriate for the analysis - it really only works for factors
## which are shared among multiple samples.

## Retrying with only condition in the model.

## Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) : 
##   contrasts can be applied only to factors with 2 or more levels

## Attempting again with only condition failed.

## Error in simple_varpart(norm_data, predictor = NULL, factors = c("condition",  : 
##

## Writing the median reads by factor.

fun <- load("excel/plos_ntd_host_hisat2-v20200227.rda")
ls()

##  [1] "expt"                 "fun"                  "hs_annot"            
##  [4] "hs_tx_gene"           "new_hs_annot"         "old_options"         
##  [7] "plosntd1_expt"        "prefix"               "previous_file"       
## [10] "rundate"              "salmon_plosntd1_expt" "salmon_prefix"       
## [13] "salmon_written_csv"   "salmon_written_xls"   "sample_sheet"        
## [16] "save_result"          "tt"                   "ver"                 
## [19] "written_csv"          "written_xls"

summary(expt)

##                  Length Class         Mode     
## title             1     -none-        character
## notes             1     -none-        character
## initial_metadata 69     data.frame    list     
## expressionset     1     ExpressionSet S4       
## design           69     data.frame    list     
## conditions       35     factor        numeric  
## batches          35     factor        numeric  
## samplenames      35     -none-        character
## colors           35     -none-        character
## state             5     -none-        list     
## libsize          35     -none-        numeric

salmon_plosntd1_exprs <- expt[["expressionset"]]
head(exprs(salmon_plosntd1_exprs), n=1)

##                 hpgl1108 hpgl1109 hpgl1110 hpgl1111 hpgl1112 hpgl1113 hpgl1114
## ENSG00000000003     1017      742      742     1585      871     1697      835
##                 hpgl1115 hpgl1116 hpgl1117 hpgl1118 hpgl1119 hpgl1120 hpgl1121
## ENSG00000000003      383      652      934      445      428      424      592
##                 hpgl1122 hpgl1123 hpgl1124 hpgl1125 hpgl1126 hpgl1127 hpgl1128
## ENSG00000000003      192      117      439     1009      664      971      889
##                 hpgl1129 hpgl1130 hpgl1131 hpgl1132 hpgl1133 hpgl1134 hpgl1135
## ENSG00000000003      396      653      408      445     4579     3436      954
##                 hpgl1136 hpgl1137 hpgl1138 hpgl1139 hpgl1140 hpgl1141 hpgl1142
## ENSG00000000003     1021     1126     1126     1120      687      900     1508

head(pData(salmon_plosntd1_exprs), n=1)

##          sampleid    study    lab         host hoststrain hostcelltype
## hpgl1108 hpgl1108 plosntd1 mosser homo_sapiens       <NA>         skin
##          hostcellsource infectstate differentiationmethod stimulation
## hpgl1108           <NA>         yes                  <NA>        <NA>
##          pathogenspecies pathogenstrain pathogenstage expttime moi
## hpgl1108   lbraziliensis           <NA>    amastigote     t21d  NA
##          numberparasitecells numberhostcells infectstateold donor celltype
## hpgl1108                  NA              NA       infected  <NA>     skin
##                state studybatch skipped        host1 pathogenspecies1
## hpgl1108 lb_infected       <NA>      no homo_sapiens    lbraziliensis
##                                                    hsapiensfile
## hpgl1108 preprocessing/hpgl1108/outputs/salmon_hg38_91/quant.sf
##                                                  hsapienshisatfile
## hpgl1108 preprocessing/hpgl1108/outputs/hisat2_hg38_91/E1.count.xz
##          mmusculusfile lmajorfile
## hpgl1108          <NA>       <NA>
##                                                         lmexicanafile
## hpgl1108 preprocessing/hpgl1108/outputs/salmon_lmexicana_v36/quant.sf
##          lpanamensisfile
## hpgl1108            <NA>
##                                                         lbraziliensisfile
## hpgl1108 preprocessing/hpgl1108/outputs/salmon_lbraziliensis_v37/quant.sf
##          lamazonensisfile   samplename infectionstatus samplealias
## hpgl1108             <NA> 9224523042_I early infection          E1
##          parasitedetectionstatus lesionsizemm illnessdurationdays patientage
## hpgl1108                negative           36                  21         44
##          patientsex experimentalias tubelabel tubealias expperson exptdate
## hpgl1108          F            <NA>      <NA>      <NA>      <NA>       NA
##          cellsperwell infectionperiod media parasiteenrichment
## hpgl1108           NA            <NA>  <NA>               <NA>
##          parasitesperinfectedcell parasitesper100cells percentinfectedcells
## hpgl1108                     <NA>                   NA                 <NA>
##          parasitecellrange rnangul libraryconstruction sraaccession condition
## hpgl1108              <NA>      NA                <NA>   SRR3162852      skin
##              batch notes totalreads trimmedreads mappedhost mappedparasite
## hpgl1108 medimmune  <NA>         NA     35419185   12379219         399150
##                 host2 percentmappedhost percentmappedparasite sampleid.1 file
## hpgl1108 homo_sapiens            0.3495               0.01127   hpgl1108 null

head(fData(salmon_plosntd1_exprs), n=1)

##                 ensembl_transcript_id ensembl_gene_id version
## ENSG00000000003       ENST00000373020 ENSG00000000003      14
##                 transcript_version hgnc_symbol
## ENSG00000000003                  8      TSPAN6
##                                                       description
## ENSG00000000003 tetraspanin 6 [Source:HGNC Symbol;Acc:HGNC:11858]
##                   gene_biotype cds_length chromosome_name strand start_position
## ENSG00000000003 protein_coding        738               X      -      100627109
##                 end_position
## ENSG00000000003    100639991

pander::pander(sessionInfo())
message(paste0("This is hpgltools commit: ", get_git_commit()))
this_save <- paste0(gsub(pattern="\\.Rmd", replace="", x=rmd_file), "-v", ver, ".rda.xz")
message(paste0("Saving to ", this_save))
tmp <- sm(saveme(filename=this_save))

LS0tCnRpdGxlOiAiVXNlIHRoaXMgc2hlZXQgdG8gZXh0cmFjdCBzdWJzZXRzIG9mIHRoZSBkYXRhLiIKYXV0aG9yOiAiYXRiIGFiZWxld0BnbWFpbC5jb20iCmRhdGU6ICJgciBTeXMuRGF0ZSgpYCIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlCiAgICBjb2RlX2ZvbGRpbmc6IHNob3cKICAgIGZpZ19jYXB0aW9uOiB0cnVlCiAgICBmaWdfaGVpZ2h0OiA3CiAgICBmaWdfd2lkdGg6IDcKICAgIGhpZ2hsaWdodDogdGFuZ28KICAgIGtlZXBfbWQ6IGZhbHNlCiAgICBtb2RlOiBzZWxmY29udGFpbmVkCiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIHNlbGZfY29udGFpbmVkOiB0cnVlCiAgICB0aGVtZTogcmVhZGFibGUKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OgogICAgICBjb2xsYXBzZWQ6IGZhbHNlCiAgICAgIHNtb290aF9zY3JvbGw6IGZhbHNlCiAgcm1kZm9ybWF0czo6cmVhZHRoZWRvd246CiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlCiAgICBjb2RlX2ZvbGRpbmc6IHNob3cKICAgIGRmX3ByaW50OiBwYWdlZAogICAgZmlnX2NhcHRpb246IHRydWUKICAgIGZpZ19oZWlnaHQ6IDcKICAgIGZpZ193aWR0aDogNwogICAgaGlnaGxpZ2h0OiB0YW5nbwogICAgd2lkdGg6IDMwMAogICAga2VlcF9tZDogZmFsc2UKICAgIG1vZGU6IHNlbGZjb250YWluZWQKICAgIHRvY19mbG9hdDogdHJ1ZQogIEJpb2NTdHlsZTo6aHRtbF9kb2N1bWVudDoKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKICAgIGNvZGVfZm9sZGluZzogc2hvdwogICAgZmlnX2NhcHRpb246IHRydWUKICAgIGZpZ19oZWlnaHQ6IDcKICAgIGZpZ193aWR0aDogNwogICAgaGlnaGxpZ2h0OiB0YW5nbwogICAga2VlcF9tZDogZmFsc2UKICAgIG1vZGU6IHNlbGZjb250YWluZWQKICAgIHRvY19mbG9hdDogdHJ1ZQotLS0KCjxzdHlsZSB0eXBlPSJ0ZXh0L2NzcyI+CmJvZHksIHRkIHsKICBmb250LXNpemU6IDE2cHg7Cn0KY29kZS5yewogIGZvbnQtc2l6ZTogMTZweDsKfQpwcmUgewogZm9udC1zaXplOiAxNnB4Cn0KPC9zdHlsZT4KCmBgYHtyIG9wdGlvbnMsIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkoImhwZ2x0b29scyIpCnR0IDwtIGRldnRvb2xzOjpsb2FkX2FsbCgiL2RhdGEvaHBnbHRvb2xzIikKa25pdHI6Om9wdHNfa25pdCRzZXQod2lkdGg9MTIwLAogICAgICAgICAgICAgICAgICAgICBwcm9ncmVzcz1UUlVFLAogICAgICAgICAgICAgICAgICAgICB2ZXJib3NlPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgIGVjaG89VFJVRSkKa25pdHI6Om9wdHNfY2h1bmskc2V0KGVycm9yPVRSVUUsCiAgICAgICAgICAgICAgICAgICAgICBkcGk9OTYpCm9sZF9vcHRpb25zIDwtIG9wdGlvbnMoZGlnaXRzPTQsCiAgICAgICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9ycz1GQUxTRSwKICAgICAgICAgICAgICAgICAgICAgICBrbml0ci5kdXBsaWNhdGUubGFiZWw9ImFsbG93IikKZ2dwbG90Mjo6dGhlbWVfc2V0KGdncGxvdDI6OnRoZW1lX2J3KGJhc2Vfc2l6ZT0xMCkpCnJ1bmRhdGUgPC0gZm9ybWF0KFN5cy5EYXRlKCksIGZvcm1hdD0iJVklbSVkIikKcHJldmlvdXNfZmlsZSA8LSAiMDJfZXN0aW1hdGlvbl9pbmZlY3Rpb25fMjAxODA4MjIuUm1kIgp2ZXIgPC0gIjIwMjAwMjI3IgoKIyN0bXAgPC0gc20obG9hZG1lKGZpbGVuYW1lPXBhc3RlMChnc3ViKHBhdHRlcm49IlxcLlJtZCIsIHJlcGxhY2U9IiIsIHg9cHJldmlvdXNfZmlsZSksICItdiIsIHZlciwgIi5yZGEueHoiKSkpCiMjcm1kX2ZpbGUgPC0gIjAzX2V4cHJlc3Npb25faW5mZWN0aW9uXzIwMTgwODIyLlJtZCIKYGBgCgojIEV4dHJhY3Qgc29tZSBjb3VudCB0YWJsZXMKCkkgd2lsbCB1c2UgdGhpcyBzaGVldCBmb3IgZXhwb3J0aW5nIGRhdGEgdG8gY29sbGFib3JhdG9ycy4gIEN1cnJlbnRseSwgdGhhdCBpcwpvbmx5IHRoZSBmb2xrcyBpbnRlcmVzdGVkIGluIGRvaW5nIHNvbWUgYW5hbHlzZXMgd2l0aCB0aGUgZmlyc3QgUExPUyBOVEQgcGFwZXIuCgpJdCBpcyB3b3J0aCBub3RpbmcgdGhhdCBJIHJlY2VudGx5IGZvdW5kIHNvbWUgZXJyb3JzIGluIG15IGFjY291bnRpbmcgZm9yIHRoZXNlCmFuZCB0aGUgcGxvc250ZDIgc2FtcGxlcyAoSSBoYWQgdGhlIHBhdGhvZ2VuIHN0cmFpbnMgcmV2ZXJzZWQpLgoKYGBge3IgZXh0cmFjdF9wbG9zbnRkMSwgZmlnLnNob3c9ImhpZGUifQpoc19hbm5vdCA8LSBsb2FkX2Jpb21hcnRfYW5ub3RhdGlvbnMoKSRhbm5vdGF0aW9uCnJvd25hbWVzKGhzX2Fubm90KSA8LSBtYWtlLm5hbWVzKAogIHBhc3RlMChoc19hbm5vdFtbImVuc2VtYmxfdHJhbnNjcmlwdF9pZCJdXSwgIi4iLAogICAgICAgICBoc19hbm5vdFtbInRyYW5zY3JpcHRfdmVyc2lvbiJdXSksCiAgdW5pcXVlPVRSVUUpCmhzX3R4X2dlbmUgPC0gaHNfYW5ub3RbLCBjKCJlbnNlbWJsX2dlbmVfaWQiLCAiZW5zZW1ibF90cmFuc2NyaXB0X2lkIildCmhzX3R4X2dlbmVbWyJpZCJdXSA8LSByb3duYW1lcyhoc190eF9nZW5lKQpoc190eF9nZW5lIDwtIGhzX3R4X2dlbmVbLCBjKCJpZCIsICJlbnNlbWJsX2dlbmVfaWQiKV0KbmV3X2hzX2Fubm90IDwtIGhzX2Fubm90CnJvd25hbWVzKG5ld19oc19hbm5vdCkgPC0gbWFrZS5uYW1lcyhoc19hbm5vdFtbImVuc2VtYmxfZ2VuZV9pZCJdXSwgdW5pcXVlPVRSVUUpCgpzYW1wbGVfc2hlZXQgPC0gInNhbXBsZV9zaGVldHMvVU1EX2xlaXNobWFuaWFfaG9zdF9tZXRhc2hlZXRfMjAyMDAyMjcueGxzeCIKIyMgQXMgb2YgMjAyMDAyMjIsIEkgaGF2ZSBvbmx5IHBlcmZvcm1lZCBoaXNhdDIgbWFwcGluZyBmb3IgdGhlIHBsb3MgbnRkMiBkYXRhLgpwcmVmaXggPC0gImV4Y2VsL3Bsb3NfbnRkX2hvc3RfaGlzYXQyLXYiCnBsb3NudGQxX2V4cHQgPC0gY3JlYXRlX2V4cHQoc2FtcGxlX3NoZWV0LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIGZpbGVfY29sdW1uPSJoc2FwaWVuc2hpc2F0ZmlsZSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZ2VuZV9pbmZvPW5ld19oc19hbm5vdCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzYXZlZmlsZT1nbHVlOjpnbHVlKCJ7cHJlZml4fXt2ZXJ9LnJkYSIpKQoKd3JpdHRlbl9jc3YgPC0gcmVhZHI6OndyaXRlX2Nzdih4PWFzLmRhdGEuZnJhbWUoZXhwcnMocGxvc250ZDFfZXhwdCkpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBhdGg9Z2x1ZTo6Z2x1ZSgie3ByZWZpeH17dmVyfS5jc3YiKSkKd3JpdHRlbl94bHMgPC0gd3JpdGVfZXhwdChwbG9zbnRkMV9leHB0LAogICAgICAgICAgICAgICAgICAgICAgICAgIGV4Y2VsPWdsdWU6OmdsdWUoIntwcmVmaXh9e3Zlcn0ueGxzeCIpKQoKCiMjIERvbid0IGZvcmdldCwgdXNpbmcgc2FsbW9uLCBfZXZlcnlfIHNhbXBsZSB3aWxsIGJlIGluY2x1ZGVkIQpzYWxtb25fcHJlZml4IDwtICJleGNlbC9wbG9zX250ZF9ob3N0X3NhbG1vbi12IgpzYWxtb25fcGxvc250ZDFfZXhwdCA8LSBjcmVhdGVfZXhwdChzYW1wbGVfc2hlZXQsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGZpbGVfY29sdW1uPSJoc2FwaWVuc2ZpbGUiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBnZW5lX2luZm89bmV3X2hzX2Fubm90LAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0eF9nZW5lX21hcD1oc190eF9nZW5lLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzYXZlZmlsZT1nbHVlOjpnbHVlKCJ7c2FsbW9uX3ByZWZpeH17dmVyfS5yZGEiKSkKc2FsbW9uX3Bsb3NudGQxX2V4cHQgPC0gc3Vic2V0X2V4cHQoc2FsbW9uX3Bsb3NudGQxX2V4cHQsIHN1YnNldD0ic3R1ZHk9PSdwbG9zbnRkMSciKQpzYXZlX3Jlc3VsdCA8LSB0cnkoc2F2ZShzYWxtb25fcGxvc250ZDFfZXhwdCwgZmlsZT1wYXN0ZTAoc2FsbW9uX3ByZWZpeCwgdmVyLCAiLnJkYSIpKSkKCnNhbG1vbl93cml0dGVuX2NzdiA8LSByZWFkcjo6d3JpdGVfY3N2KHg9YXMuZGF0YS5mcmFtZShleHBycyhzYWxtb25fcGxvc250ZDFfZXhwdCkpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBwYXRoPWdsdWU6OmdsdWUoIntzYWxtb25fcHJlZml4fXt2ZXJ9LmNzdiIpKQpzYWxtb25fd3JpdHRlbl94bHMgPC0gd3JpdGVfZXhwdChzYWxtb25fcGxvc250ZDFfZXhwdCwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZXhjZWw9Z2x1ZTo6Z2x1ZSgie3NhbG1vbl9wcmVmaXh9e3Zlcn0ueGxzeCIpKQpgYGAKCmBgYHtyIGV4YW1wbGVfZXhwcmVzc2lvbnNldF9leHRyYWN0aW9ufQpmdW4gPC0gbG9hZCgiZXhjZWwvcGxvc19udGRfaG9zdF9oaXNhdDItdjIwMjAwMjI3LnJkYSIpCmxzKCkKc3VtbWFyeShleHB0KQpzYWxtb25fcGxvc250ZDFfZXhwcnMgPC0gZXhwdFtbImV4cHJlc3Npb25zZXQiXV0KaGVhZChleHBycyhzYWxtb25fcGxvc250ZDFfZXhwcnMpLCBuPTEpCmhlYWQocERhdGEoc2FsbW9uX3Bsb3NudGQxX2V4cHJzKSwgbj0xKQpoZWFkKGZEYXRhKHNhbG1vbl9wbG9zbnRkMV9leHBycyksIG49MSkKYGBgCgpgYGB7ciBzYXZlbWUsIGV2YWw9RkFMU0V9CnBhbmRlcjo6cGFuZGVyKHNlc3Npb25JbmZvKCkpCm1lc3NhZ2UocGFzdGUwKCJUaGlzIGlzIGhwZ2x0b29scyBjb21taXQ6ICIsIGdldF9naXRfY29tbWl0KCkpKQp0aGlzX3NhdmUgPC0gcGFzdGUwKGdzdWIocGF0dGVybj0iXFwuUm1kIiwgcmVwbGFjZT0iIiwgeD1ybWRfZmlsZSksICItdiIsIHZlciwgIi5yZGEueHoiKQptZXNzYWdlKHBhc3RlMCgiU2F2aW5nIHRvICIsIHRoaXNfc2F2ZSkpCnRtcCA8LSBzbShzYXZlbWUoZmlsZW5hbWU9dGhpc19zYXZlKSkKYGBgCg==

Use this sheet to extract subsets of the data.

atb abelew@gmail.com

2020-02-28

1 Extract some count tables