1 Use EncyclopeDIA data including more samples.

I think I will just load an existing expressionset generated from openswath.

##  [1] "s2018_0817BrikenTrypsinDIA01" "s2018_0817BrikenTrypsinDIA02"
##  [3] "s2018_0817BrikenTrypsinDIA03" "s2018_0817BrikenTrypsinDIA11"
##  [5] "s2018_0817BrikenTrypsinDIA12" "s2018_0817BrikenTrypsinDIA13"
##  [7] "s2018_0817BrikenTrypsinDIA07" "s2018_0817BrikenTrypsinDIA08"
##  [9] "s2018_0817BrikenTrypsinDIA09" "s2018_0817BrikenTrypsinDIA17"
## [11] "s2018_0817BrikenTrypsinDIA18" "s2018_0817BrikenTrypsinDIA19"
##  [1] "X2018_0502BrikenDIA01.mzML"       
##  [2] "X2018_0502BrikenDIA02.mzML"       
##  [3] "X2018_0502BrikenDIA03.mzML"       
##  [4] "X2018_0502BrikenDIA04.mzML"       
##  [5] "X2018_0502BrikenDIA05.mzML"       
##  [6] "X2018_0502BrikenDIA06.mzML"       
##  [7] "X2018_0502BrikenDIA07.mzML"       
##  [8] "X2018_0502BrikenDIA08.mzML"       
##  [9] "X2018_0502BrikenDIA09.mzML"       
## [10] "X2018_0502BrikenDIA10.mzML"       
## [11] "X2018_0502BrikenDIA11.mzML"       
## [12] "X2018_0502BrikenDIA12.mzML"       
## [13] "X2018_0726Briken01.mzML"          
## [14] "X2018_0726Briken02.mzML"          
## [15] "X2018_0726Briken03.mzML"          
## [16] "X2018_0726Briken04.mzML"          
## [17] "X2018_0726Briken05.mzML"          
## [18] "X2018_0726Briken06.mzML"          
## [19] "X2018_0726Briken07.mzML"          
## [20] "X2018_0726Briken08.mzML"          
## [21] "X2018_0726Briken09.mzML"          
## [22] "X2018_0726Briken11.mzML"          
## [23] "X2018_0726Briken12.mzML"          
## [24] "X2018_0726Briken13.mzML"          
## [25] "X2018_0726Briken14.mzML"          
## [26] "X2018_0726Briken15.mzML"          
## [27] "X2018_0726Briken16.mzML"          
## [28] "X2018_0726Briken17.mzML"          
## [29] "X2018_0726Briken18.mzML"          
## [30] "X2018_0726Briken19.mzML"          
## [31] "X2018_0817BrikenTrypsinDIA01.mzML"
## [32] "X2018_0817BrikenTrypsinDIA02.mzML"
## [33] "X2018_0817BrikenTrypsinDIA03.mzML"
## [34] "X2018_0817BrikenTrypsinDIA04.mzML"
## [35] "X2018_0817BrikenTrypsinDIA05.mzML"
## [36] "X2018_0817BrikenTrypsinDIA06.mzML"
## [37] "X2018_0817BrikenTrypsinDIA07.mzML"
## [38] "X2018_0817BrikenTrypsinDIA08.mzML"
## [39] "X2018_0817BrikenTrypsinDIA09.mzML"
## [40] "X2018_0817BrikenTrypsinDIA11.mzML"
## [41] "X2018_0817BrikenTrypsinDIA12.mzML"
## [42] "X2018_0817BrikenTrypsinDIA13.mzML"
## [43] "X2018_0817BrikenTrypsinDIA14.mzML"
## [44] "X2018_0817BrikenTrypsinDIA15.mzML"
## [45] "X2018_0817BrikenTrypsinDIA16.mzML"
## [46] "X2018_0817BrikenTrypsinDIA17.mzML"
## [47] "X2018_0817BrikenTrypsinDIA18.mzML"
## [48] "X2018_0817BrikenTrypsinDIA19.mzML"
##  [1] "s2018_0502BrikenDIA01"        "s2018_0502BrikenDIA02"       
##  [3] "s2018_0502BrikenDIA03"        "s2018_0502BrikenDIA04"       
##  [5] "s2018_0502BrikenDIA05"        "s2018_0502BrikenDIA06"       
##  [7] "s2018_0502BrikenDIA07"        "s2018_0502BrikenDIA08"       
##  [9] "s2018_0502BrikenDIA09"        "s2018_0502BrikenDIA10"       
## [11] "s2018_0502BrikenDIA11"        "s2018_0502BrikenDIA12"       
## [13] "s2018_0726Briken01"           "s2018_0726Briken02"          
## [15] "s2018_0726Briken03"           "s2018_0726Briken04"          
## [17] "s2018_0726Briken05"           "s2018_0726Briken06"          
## [19] "s2018_0726Briken07"           "s2018_0726Briken08"          
## [21] "s2018_0726Briken09"           "s2018_0726Briken11"          
## [23] "s2018_0726Briken12"           "s2018_0726Briken13"          
## [25] "s2018_0726Briken14"           "s2018_0726Briken15"          
## [27] "s2018_0726Briken16"           "s2018_0726Briken17"          
## [29] "s2018_0726Briken18"           "s2018_0726Briken19"          
## [31] "s2018_0817BrikenTrypsinDIA01" "s2018_0817BrikenTrypsinDIA02"
## [33] "s2018_0817BrikenTrypsinDIA03" "s2018_0817BrikenTrypsinDIA04"
## [35] "s2018_0817BrikenTrypsinDIA05" "s2018_0817BrikenTrypsinDIA06"
## [37] "s2018_0817BrikenTrypsinDIA07" "s2018_0817BrikenTrypsinDIA08"
## [39] "s2018_0817BrikenTrypsinDIA09" "s2018_0817BrikenTrypsinDIA11"
## [41] "s2018_0817BrikenTrypsinDIA12" "s2018_0817BrikenTrypsinDIA13"
## [43] "s2018_0817BrikenTrypsinDIA14" "s2018_0817BrikenTrypsinDIA15"
## [45] "s2018_0817BrikenTrypsinDIA16" "s2018_0817BrikenTrypsinDIA17"
## [47] "s2018_0817BrikenTrypsinDIA18" "s2018_0817BrikenTrypsinDIA19"
##  [1] "s2018_0502BrikenDIA01"        "s2018_0502BrikenDIA02"       
##  [3] "s2018_0502BrikenDIA03"        "s2018_0502BrikenDIA04"       
##  [5] "s2018_0502BrikenDIA05"        "s2018_0502BrikenDIA06"       
##  [7] "s2018_0502BrikenDIA07"        "s2018_0502BrikenDIA08"       
##  [9] "s2018_0502BrikenDIA09"        "s2018_0502BrikenDIA10"       
## [11] "s2018_0502BrikenDIA11"        "s2018_0502BrikenDIA12"       
## [13] "s2018_0726Briken01"           "s2018_0726Briken02"          
## [15] "s2018_0726Briken03"           "s2018_0726Briken04"          
## [17] "s2018_0726Briken05"           "s2018_0726Briken06"          
## [19] "s2018_0726Briken07"           "s2018_0726Briken08"          
## [21] "s2018_0726Briken09"           "s2018_0726Briken11"          
## [23] "s2018_0726Briken12"           "s2018_0726Briken13"          
## [25] "s2018_0726Briken14"           "s2018_0726Briken15"          
## [27] "s2018_0726Briken16"           "s2018_0726Briken17"          
## [29] "s2018_0726Briken18"           "s2018_0726Briken19"          
## [31] "s2018_0817BrikenTrypsinDIA01" "s2018_0817BrikenTrypsinDIA02"
## [33] "s2018_0817BrikenTrypsinDIA03" "s2018_0817BrikenTrypsinDIA04"
## [35] "s2018_0817BrikenTrypsinDIA05" "s2018_0817BrikenTrypsinDIA06"
## [37] "s2018_0817BrikenTrypsinDIA07" "s2018_0817BrikenTrypsinDIA08"
## [39] "s2018_0817BrikenTrypsinDIA09" "s2018_0817BrikenTrypsinDIA11"
## [41] "s2018_0817BrikenTrypsinDIA12" "s2018_0817BrikenTrypsinDIA13"
## [43] "s2018_0817BrikenTrypsinDIA14" "s2018_0817BrikenTrypsinDIA15"
## [45] "s2018_0817BrikenTrypsinDIA16" "s2018_0817BrikenTrypsinDIA17"
## [47] "s2018_0817BrikenTrypsinDIA18" "s2018_0817BrikenTrypsinDIA19"
## Reading the sample metadata.
## The sample definitions comprises: 48 rows(samples) and 28 columns(metadata fields).
## Matched 2632 annotations and counts.
## Bringing together the count matrix and gene information.
## The final expressionset has 2632 rows and 48 columns.

Ok so that was 100% weird. Let us next NA all the entries which are currently 0.

I think that somewhere along the way some set of samples got mis-ordered?

1.1 NA comparisons

## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(pofa(data))))
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: pofa
## Removing 2551 low-count genes (81 remaining).
## Removing 0 low-count genes (2632 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 2353 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure

## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in grid.Call.graphics(C_polygon, x$x, x$y, index): semi-
## transparency is not supported on this device: reported only once per page

## The original expressionset has 48 samples.
## The final expressionset has 18 samples.
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(pofa(data))))
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: pofa
## Removing 1555 low-count genes (1077 remaining).
## Removing 0 low-count genes (2632 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 692 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
## Warning in grid.Call.graphics(C_segments, x$x0, x$y0, x$x1, x$y1, x$arrow):
## semi-transparency is not supported on this device: reported only once per
## page

## This data will benefit from being displayed on the log scale.
## If this is not desired, set scale='raw'
## Some entries are 0.  We are on log scale, setting them to 0.5.
## Changed 9744 zero count features.
## Warning in grid.Call.graphics(C_polygon, x$x, x$y, index): semi-
## transparency is not supported on this device: reported only once per page

## There were 18, now there are 9 samples.
## This function will replace the expt$expressionset slot with:
## pofa(data)
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
##  some metrics are easier to see when the data is log2 transformed, but
##  EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted.  It is often advisable to cpm/rpkm
##  the data to normalize for sampling differences, keep in mind though that rpkm
##  has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
##  will try to detect this).
## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: pofa
## Removing 352 low-count genes (2280 remaining).
## Removing 21 low-count genes (2611 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
## Warning in grid.Call.graphics(C_segments, x$x0, x$y0, x$x1, x$y1, x$arrow):
## semi-transparency is not supported on this device: reported only once per
## page

## There were 18, now there are 9 samples.
## This function will replace the expt$expressionset slot with:
## pofa(data)
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Leaving the data in its current base format, keep in mind that
##  some metrics are easier to see when the data is log2 transformed, but
##  EdgeR/DESeq do not accept transformed data.
## Leaving the data unconverted.  It is often advisable to cpm/rpkm
##  the data to normalize for sampling differences, keep in mind though that rpkm
##  has some annoying biases, and voom() by default does a cpm (though hpgl_voom()
##  will try to detect this).
## Leaving the data unnormalized.  This is necessary for DESeq, but
##  EdgeR/limma might benefit from normalization.  Good choices include quantile,
##  size-factor, tmm, etc.
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: pofa
## Removing 1524 low-count genes (1108 remaining).
## Removing 71 low-count genes (2561 remaining).
## Step 2: not normalizing the data.
## Step 3: not converting the data.
## Step 4: not transforming the data.
## Step 5: not doing batch correction.
## Warning in grid.Call.graphics(C_segments, x$x0, x$y0, x$x1, x$y1, x$arrow):
## semi-transparency is not supported on this device: reported only once per
## page

## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 165 low-count genes (2467 remaining).
## Removing 102 low-count genes (2530 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 102 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
## Plotting a PCA before surrogates/batch inclusion.
## Assuming no batch in model for testing pca.
## Starting basic_pairwise().
## Starting basic pairwise comparison.
## Leaving the data alone, regardless of normalization state.
## Basic step 0/3: Transforming data.
## Basic step 1/3: Creating median and variance tables.
## Basic step 2/3: Performing 6 comparisons.
## Basic step 3/3: Creating faux DE Tables.
## Basic: Returning tables.
## Starting deseq_pairwise().
## Starting DESeq2 pairwise comparisons.
## About to round the data, this is a pretty terrible thing to do. But if you, like me, want to see what happens when you put non-standard data into deseq, then here you go.
## Warning in choose_binom_dataset(input, force = force): This data was
## inappropriately forced into integers.
## Choosing the non-intercept containing model.
## DESeq2 step 1/5: Including only condition in the deseq model.
## Warning in import_deseq(data, column_data, model_string, tximport =
## input[["tximport"]][["raw"]]): Converted down 1619 elements because they
## are larger than the maximum integer size.
## converting counts to integer mode
## DESeq2 step 2/5: Estimate size factors.
## DESeq2 step 3/5: Estimate dispersions.
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## Using a parametric fitting seems to have worked.
## DESeq2 step 4/5: nbinomWaldTest.
## Starting ebseq_pairwise().
## The data should be suitable for EdgeR/DESeq/EBSeq. If they freak out, check the state of the count table and ensure that it is in integer counts.
## Starting EBSeq pairwise subset.
## Choosing the non-intercept containing model.
## Starting EBTest of comp_whole vs. delta_whole.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of comp_whole vs. wt_whole.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of delta_whole vs. wt_whole.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting edger_pairwise().
## Starting edgeR pairwise comparisons.
## About to round the data, this is a pretty terrible thing to do. But if you, like me, want to see what happens when you put non-standard data into deseq, then here you go.
## Warning in choose_binom_dataset(input, force = force): This data was
## inappropriately forced into integers.
## Choosing the non-intercept containing model.
## EdgeR step 1/9: Importing and normalizing data.
## EdgeR step 2/9: Estimating the common dispersion.
## EdgeR step 3/9: Estimating dispersion across genes.
## EdgeR step 4/9: Estimating GLM Common dispersion.
## EdgeR step 5/9: Estimating GLM Trended dispersion.
## EdgeR step 6/9: Estimating GLM Tagged dispersion.
## EdgeR step 7/9: Running glmFit, switch to glmQLFit by changing the argument 'edger_test'.
## EdgeR step 8/9: Making pairwise contrasts.

## Starting limma_pairwise().
## Starting limma pairwise comparison.
## Leaving the data alone, regardless of normalization state.
## libsize was not specified, this parameter has profound effects on limma's result.
## Using the libsize from expt$libsize.
## Limma step 1/6: choosing model.
## Choosing the non-intercept containing model.
## Limma step 2/6: running limma::voom(), switch with the argument 'which_voom'.
## Using normalize.method=quantile for voom.

## Limma step 3/6: running lmFit with method: ls.
## Limma step 4/6: making and fitting contrasts with no intercept. (~ 0 + factors)
## Limma step 5/6: Running eBayes with robust=FALSE and trend=FALSE.
## Limma step 6/6: Writing limma outputs.
## Limma step 6/6: 1/3: Creating table: delta_whole_vs_comp_whole.  Adjust=BH
## Limma step 6/6: 2/3: Creating table: wt_whole_vs_comp_whole.  Adjust=BH
## Limma step 6/6: 3/3: Creating table: wt_whole_vs_delta_whole.  Adjust=BH
## Limma step 6/6: 1/3: Creating table: comp_whole.  Adjust=BH
## Limma step 6/6: 2/3: Creating table: delta_whole.  Adjust=BH
## Limma step 6/6: 3/3: Creating table: wt_whole.  Adjust=BH
## Comparing analyses.

## Deleting the file excel/enc_whole_combined_de-v20190310.xlsx before writing the tables.
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Warning in grid.Call.graphics(C_segments, x$x0, x$y0, x$x1, x$y1, x$arrow):
## semi-transparency is not supported on this device: reported only once per
## page
## Warning in grid.Call.graphics(C_segments, x$x0, x$y0, x$x1, x$y1, x$arrow):
## semi-transparency is not supported on this device: reported only once per
## page
## The keepers has no elements in the coefficients.
## Here are the keepers: delta_filtrate, wt_filtrate
## Here are the coefficients: delta_whole, comp_whole, wt_whole, comp_whole, wt_whole, delta_whole
## Error in combine_de_tables(enc_whole_de, keepers = keeper, excel = glue::glue("excel/enc_whole_combined_de-v{ver}.xlsx")): Unable to find the set of contrasts to keep, fix this and try again.
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 1172 low-count genes (1460 remaining).
## Removing 1018 low-count genes (1614 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## transform_counts: Found 6 values equal to 0, adding 1 to the matrix.
## Step 5: not doing batch correction.
## Plotting a PCA before surrogates/batch inclusion.
## Assuming no batch in model for testing pca.
## Starting basic_pairwise().
## Starting basic pairwise comparison.
## Leaving the data alone, regardless of normalization state.
## Basic step 0/3: Transforming data.
## Basic step 1/3: Creating median and variance tables.
## Basic step 2/3: Performing 6 comparisons.
## Basic step 3/3: Creating faux DE Tables.
## Basic: Returning tables.
## Starting deseq_pairwise().
## Starting DESeq2 pairwise comparisons.
## About to round the data, this is a pretty terrible thing to do. But if you, like me, want to see what happens when you put non-standard data into deseq, then here you go.
## Warning in choose_binom_dataset(input, force = force): This data was
## inappropriately forced into integers.
## Choosing the non-intercept containing model.
## DESeq2 step 1/5: Including only condition in the deseq model.
## Warning in import_deseq(data, column_data, model_string, tximport =
## input[["tximport"]][["raw"]]): Converted down 425 elements because they are
## larger than the maximum integer size.
## converting counts to integer mode
## DESeq2 step 2/5: Estimate size factors.
## DESeq2 step 3/5: Estimate dispersions.
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## Using a parametric fitting seems to have worked.
## DESeq2 step 4/5: nbinomWaldTest.
## Starting ebseq_pairwise().
## The data should be suitable for EdgeR/DESeq/EBSeq. If they freak out, check the state of the count table and ensure that it is in integer counts.
## Starting EBSeq pairwise subset.
## Choosing the non-intercept containing model.
## Starting EBTest of comp_filtrate vs. delta_filtrate.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of comp_filtrate vs. wt_filtrate.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of delta_filtrate vs. wt_filtrate.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting edger_pairwise().
## Starting edgeR pairwise comparisons.
## About to round the data, this is a pretty terrible thing to do. But if you, like me, want to see what happens when you put non-standard data into deseq, then here you go.
## Warning in choose_binom_dataset(input, force = force): This data was
## inappropriately forced into integers.
## Choosing the non-intercept containing model.
## EdgeR step 1/9: Importing and normalizing data.
## EdgeR step 2/9: Estimating the common dispersion.
## EdgeR step 3/9: Estimating dispersion across genes.
## EdgeR step 4/9: Estimating GLM Common dispersion.
## EdgeR step 5/9: Estimating GLM Trended dispersion.
## EdgeR step 6/9: Estimating GLM Tagged dispersion.
## EdgeR step 7/9: Running glmFit, switch to glmQLFit by changing the argument 'edger_test'.
## EdgeR step 8/9: Making pairwise contrasts.

## Starting limma_pairwise().
## Starting limma pairwise comparison.
## Leaving the data alone, regardless of normalization state.
## libsize was not specified, this parameter has profound effects on limma's result.
## Using the libsize from expt$libsize.
## Limma step 1/6: choosing model.
## Choosing the non-intercept containing model.
## Limma step 2/6: running limma::voom(), switch with the argument 'which_voom'.
## Using normalize.method=quantile for voom.
## Warning in t(counts + 0.5)/(lib.size + 1): longer object length is not a
## multiple of shorter object length

## Warning in t(fitted.cpm) * (lib.size + 1): longer object length is not a
## multiple of shorter object length
## Limma step 3/6: running lmFit with method: ls.
## Limma step 4/6: making and fitting contrasts with no intercept. (~ 0 + factors)
## Limma step 5/6: Running eBayes with robust=FALSE and trend=FALSE.
## Limma step 6/6: Writing limma outputs.
## Limma step 6/6: 1/3: Creating table: delta_filtrate_vs_comp_filtrate.  Adjust=BH
## Limma step 6/6: 2/3: Creating table: wt_filtrate_vs_comp_filtrate.  Adjust=BH
## Limma step 6/6: 3/3: Creating table: wt_filtrate_vs_delta_filtrate.  Adjust=BH
## Limma step 6/6: 1/3: Creating table: comp_filtrate.  Adjust=BH
## Limma step 6/6: 2/3: Creating table: delta_filtrate.  Adjust=BH
## Limma step 6/6: 3/3: Creating table: wt_filtrate.  Adjust=BH
## Comparing analyses.

## Deleting the file excel/enc_filtrate_combined_de-v20190310.xlsx before writing the tables.
## Writing a legend of columns.
## Printing a pca plot before/after surrogates/batch estimation.
## Warning in grid.Call.graphics(C_segments, x$x0, x$y0, x$x1, x$y1, x$arrow):
## semi-transparency is not supported on this device: reported only once per
## page

## Warning in grid.Call.graphics(C_segments, x$x0, x$y0, x$x1, x$y1, x$arrow):
## semi-transparency is not supported on this device: reported only once per
## page
## Working on 1/1: delta_wt which is: delta_filtrate/wt_filtrate.
## Found inverse table with wt_filtrate_vs_delta_filtrate
## 20181210 a pthread error in normalize.quantiles leads me to robust.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Adding venn plots for delta_wt.

## Limma expression coefficients for delta_wt; R^2: 0.799; equation: y = 0.927x - 0.661
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page

## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page
## Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
## resolveHJust(x$just, : semi-transparency is not supported on this device:
## reported only once per page
## Edger expression coefficients for delta_wt; R^2: 0.557; equation: y = 0.198x + 16
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page

## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page
## Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
## resolveHJust(x$just, : semi-transparency is not supported on this device:
## reported only once per page
## DESeq2 expression coefficients for delta_wt; R^2: 0.523; equation: y = 0.133x + 21.6
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page

## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page
## Warning in grid.Call.graphics(C_rect, x$x, x$y, x$width, x$height,
## resolveHJust(x$just, : semi-transparency is not supported on this device:
## reported only once per page
## Writing summary information.
## Attempting to add the comparison plot to pairwise_summary at row: 23 and column: 1
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page

## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page
## Performing save of the workbook.
## Note: zip::zip() is deprecated, please use zip::zipr() instead

2 Perform OpenSWATH analyses

I am going to mostly copy/paste some of the material from 03_swath2stats_20190327.Rmd here.

## Loading SWATH2stats
## Error in sample_annotation(data = tric_data, sample_annotation = sample_annot, : unused arguments (sample_annotation = sample_annot, fullpeptidename_column = "fullpeptidename")
## Error in is.data.frame(x): object 's2s_exp' not found
## Error in is.data.frame(x): object 's2s_exp' not found
## Error in is.data.frame(x): object 's2s_exp' not found
## Error in is.data.frame(x): object 's2s_exp' not found
## Error in is.data.frame(x): object 's2s_exp' not found
## Error in is.data.frame(x): object 's2s_exp' not found
## Error in is.data.frame(x): object 's2s_exp' not found
## Error in is.data.frame(x): object 'filtered_ms' not found
## Error in unifyProteinGroupLabels(data): object 'filtered_ms_fdr' not found
## Error in gsub("^[[:digit:]]\\/", "", data$ProteinName): object 'filtered_ms_fdr_pr' not found
## Error in unifyProteinGroupLabels(data): object 'filtered_ms_fdr_pr_all' not found
## Error in unifyProteinGroupLabels(data): object 'filtered_ms_fdr_pr_all_str' not found
## Error in aggregate(data[, "Intensity"], by = list(data$ProteinName, data$run_id), : object 'filtered_all_filters' not found
## Error in eval(expr, envir, enclos): object 'protein_matrix_filtered' not found
## Error in write_matrix_peptides(filtered_all_filters, write.csv = TRUE, : object 'filtered_all_filters' not found
## Error in eval(expr, envir, enclos): object 'peptide_matrix_filtered' not found
## Error in is.data.frame(x): object 'filtered_all_filters' not found
## Error in is.data.frame(x): object 'filtered_all_filters' not found
## Error in colnames(filtered_all_filters): object 'filtered_all_filters' not found
## Error in lapply(as.character(data$aggr_Fragment_Annotation), function(x) strsplit(x, : object 'filtered_all_filters' not found
## Error in convert_MSstats(disaggregated): could not find function "convert_MSstats"
## There were 43, now there are 17 samples.
## There were 43, now there are 26 samples.
## The factor comp_filtrate_01 has 2 rows.
## The factor comp_filtrate_02 has 3 rows.
## The factor comp_filtrate_03 has 3 rows.
## The factor delta_filtrate_01 has 2 rows.
## The factor delta_filtrate_02 has 3 rows.
## The factor delta_filtrate_03 has 2 rows.
## The factor wt_filtrate_01 has 4 rows.
## The factor wt_filtrate_02 has 3 rows.
## The factor wt_filtrate_03 has 4 rows.
## Reading the sample metadata.
## Warning in `[<-.factor`(`*tmp*`, iseq, value = c("undefined",
## "undefined", : invalid factor level, NA generated
## The sample definitions comprises: 9 rows(samples) and 29 columns(metadata fields).
## Matched 663 annotations and counts.
## Bringing together the count matrix and gene information.
## The final expressionset has 663 rows and 9 columns.
## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (663 remaining).
## Removing 0 low-count genes (663 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## Step 5: not doing batch correction.
## Warning in grid.Call.graphics(C_segments, x$x0, x$y0, x$x1, x$y1, x$arrow):
## semi-transparency is not supported on this device: reported only once per
## page

## This function will replace the expt$expressionset slot with:
## log2(cpm(quant(cbcb(data))))
## It will save copies of each step along the way
##  in expt$normalized with the corresponding libsizes. Keep libsizes in mind
##  when invoking limma.  The appropriate libsize is non-log(cpm(normalized)).
##  This is most likely kept at:
##  'new_expt$normalized$intermediate_counts$normalization$libsizes'
##  A copy of this may also be found at:
##  new_expt$best_libsize
## Not correcting the count-data for batch effects.  If batch is
##  included in EdgerR/limma's model, then this is probably wise; but in extreme
##  batch effects this is a good parameter to play with.
## Step 1: performing count filter with option: cbcb
## Removing 0 low-count genes (663 remaining).
## Removing 0 low-count genes (663 remaining).
## Step 2: normalizing the data with quant.
## Using normalize.quantiles.robust due to a thread error in preprocessCore.
## Step 3: converting the data with cpm.
## Step 4: transforming the data with log2.
## Step 5: not doing batch correction.
## Plotting a PCA before surrogates/batch inclusion.
## Using limma's removeBatchEffect to visualize with(out) batch inclusion.
## Starting basic_pairwise().
## Starting basic pairwise comparison.
## Leaving the data alone, regardless of normalization state.
## Basic step 0/3: Transforming data.
## Basic step 1/3: Creating median and variance tables.
## Basic step 2/3: Performing 6 comparisons.
## Basic step 3/3: Creating faux DE Tables.
## Basic: Returning tables.
## Starting deseq_pairwise().
## Starting DESeq2 pairwise comparisons.
## About to round the data, this is a pretty terrible thing to do. But if you, like me, want to see what happens when you put non-standard data into deseq, then here you go.
## Warning in choose_binom_dataset(input, force = force): This data was
## inappropriately forced into integers.
## The condition+batch model failed. Does your experimental design support both condition and batch? Using only a conditional model.
## Choosing the non-intercept containing model.
## DESeq2 step 1/5: Including batch and condition in the deseq model.
## converting counts to integer mode
## DESeq2 step 2/5: Estimate size factors.
## DESeq2 step 3/5: Estimate dispersions.
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## Using a parametric fitting seems to have worked.
## DESeq2 step 4/5: nbinomWaldTest.
## Starting ebseq_pairwise().
## The data should be suitable for EdgeR/DESeq/EBSeq. If they freak out, check the state of the count table and ensure that it is in integer counts.
## Starting EBSeq pairwise subset.
## Choosing the non-intercept containing model.
## Starting EBTest of comp_filtrate vs. delta_filtrate.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of comp_filtrate vs. wt_filtrate.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting EBTest of delta_filtrate vs. wt_filtrate.
## Copying ppee values as ajusted p-values until I figure out how to deal with them.
## Starting edger_pairwise().
## Starting edgeR pairwise comparisons.
## About to round the data, this is a pretty terrible thing to do. But if you, like me, want to see what happens when you put non-standard data into deseq, then here you go.
## Warning in choose_binom_dataset(input, force = force): This data was
## inappropriately forced into integers.
## The condition+batch model failed. Does your experimental design support both condition and batch? Using only a conditional model.
## Choosing the non-intercept containing model.
## EdgeR step 1/9: Importing and normalizing data.
## EdgeR step 2/9: Estimating the common dispersion.
## EdgeR step 3/9: Estimating dispersion across genes.
## EdgeR step 4/9: Estimating GLM Common dispersion.
## EdgeR step 5/9: Estimating GLM Trended dispersion.
## EdgeR step 6/9: Estimating GLM Tagged dispersion.
## EdgeR step 7/9: Running glmFit, switch to glmQLFit by changing the argument 'edger_test'.
## EdgeR step 8/9: Making pairwise contrasts.

## Starting limma_pairwise().
## Starting limma pairwise comparison.
## Leaving the data alone, regardless of normalization state.
## libsize was not specified, this parameter has profound effects on limma's result.
## Using the libsize from expt$libsize.
## Limma step 1/6: choosing model.
## The condition+batch model failed. Does your experimental design support both condition and batch? Using only a conditional model.
## Choosing the non-intercept containing model.
## Limma step 2/6: running limma::voom(), switch with the argument 'which_voom'.
## Using normalize.method=quantile for voom.

## Limma step 3/6: running lmFit with method: ls.
## Limma step 4/6: making and fitting contrasts with no intercept. (~ 0 + factors)
## Limma step 5/6: Running eBayes with robust=FALSE and trend=FALSE.
## Limma step 6/6: Writing limma outputs.
## Limma step 6/6: 1/3: Creating table: delta_filtrate_vs_comp_filtrate.  Adjust=BH
## Limma step 6/6: 2/3: Creating table: wt_filtrate_vs_comp_filtrate.  Adjust=BH
## Limma step 6/6: 3/3: Creating table: wt_filtrate_vs_delta_filtrate.  Adjust=BH
## Limma step 6/6: 1/3: Creating table: comp_filtrate.  Adjust=BH
## Limma step 6/6: 2/3: Creating table: delta_filtrate.  Adjust=BH
## Limma step 6/6: 3/3: Creating table: wt_filtrate.  Adjust=BH
## Comparing analyses.

## Writing a legend of columns.
## Working on 1/1: delta_wt which is: delta_filtrate/wt_filtrate.
## Found inverse table with wt_filtrate_vs_delta_filtrate
## 20181210 a pthread error in normalize.quantiles leads me to robust.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.
## Used Bon Ferroni corrected t test(s) between columns.

3 Compare tables OSW vs. Enc

## 
##  Pearson's product-moment correlation
## 
## data:  merged_table[["edger_logfc.x"]] and merged_table[["edger_logfc.y"]]
## t = 16, df = 650, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4682 0.5796
## sample estimates:
##    cor 
## 0.5262
## Used Bon Ferroni corrected t test(s) between columns.
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): semi-
## transparency is not supported on this device: reported only once per page

Row.names seqnames start end width strand source type score phase id locustag gene description function. deseq_logfc.x deseq_adjp.x edger_logfc.x edger_adjp.x limma_logfc.x limma_adjp.x basic_nummed.x basic_denmed.x basic_numvar.x basic_denvar.x basic_logfc.x basic_t.x basic_p.x basic_adjp.x deseq_basemean.x deseq_lfcse.x deseq_stat.x deseq_p.x ebseq_fc.x ebseq_logfc.x ebseq_postfc.x ebseq_mean.x ebseq_ppee.x ebseq_ppde.x ebseq_adjp.x edger_logcpm.x edger_lr.x edger_p.x limma_ave.x limma_t.x limma_b.x limma_p.x limma_adjp_fdr.x deseq_adjp_fdr.x edger_adjp_fdr.x basic_adjp_fdr.x lfc_meta.x lfc_var.x lfc_varbymed.x p_meta.x p_var.x deseq_logfc.y deseq_adjp.y edger_logfc.y edger_adjp.y limma_logfc.y limma_adjp.y basic_nummed.y basic_denmed.y basic_numvar.y basic_denvar.y basic_logfc.y basic_t.y basic_p.y basic_adjp.y deseq_basemean.y deseq_lfcse.y deseq_stat.y deseq_p.y ebseq_fc.y ebseq_logfc.y ebseq_postfc.y ebseq_mean.y ebseq_ppee.y ebseq_ppde.y ebseq_adjp.y edger_logcpm.y edger_lr.y edger_p.y limma_ave.y limma_t.y limma_b.y limma_p.y limma_adjp_fdr.y deseq_adjp_fdr.y edger_adjp_fdr.y basic_adjp_fdr.y lfc_meta.y lfc_var.y lfc_varbymed.y p_meta.y p_var.y
4 Rv0007 Mtb_R24 9914 10828 915 + artemis gene undefined undefined Rv0007 Rv0007 Rv0007 conserved membrane protein Unknown 2.606 0.0000 2.607 0.0000 2.348 0.0168 4.703 7.715 3.349e-01 1.227e+00 3.0120 -3.429 4.122e-02 3.253e-01 131.40 0.5030 5.182 0.0000 0.1632 2.615 0.1678 103.66 0.0001 9.999e-01 0.0001 7.058 25.360 0.0000 6.589 -4.622 -0.3206 0.0007 1.676e-02 9.087e-06 2.247e-05 3.253e-01 2.549 1.048e-02 4.112e-03 2.430e-04 1.767e-07 2.818 0.0017 2.687 0.0720 2.5010 0.1356 21.18 25.35 5.718e+00 3.183e+00 4.180 -3.257 3.493e-02 3.495e-01 3.183e+07 0.7824 3.602 0.0003 0.2043 2.2910 0.2060 1.789e+07 0.5439 4.561e-01 0.5439 4.890 6.423 0.0113 4.8370 -2.9010 -2.9610 0.0182 1.356e-01 3.088e-03 7.193e-02 3.496e-01 11.380 2.228e+02 1.958e+01 9.932e-03 8.146e-05
14 Rv0056 Mtb_R24 59409 59867 459 + artemis gene undefined undefined Rv0056 Rv0056 rplI 50S ribosomal protein L9 Binds to the 23S rRNA. 1.774 0.0185 1.796 0.0137 2.040 0.0455 6.023 7.996 2.578e+00 4.433e-02 1.9730 -2.302 1.438e-01 4.928e-01 224.40 0.5850 3.032 0.0024 0.2791 1.841 0.2819 160.66 0.0028 9.972e-01 0.0028 7.823 10.460 0.0012 7.407 -3.612 -2.0680 0.0041 4.552e-02 1.897e-02 1.373e-02 4.927e-01 1.930 5.847e-02 3.030e-02 2.568e-03 2.015e-06 1.633 0.0004 1.567 0.0687 2.0250 0.0125 23.75 27.41 1.073e+00 1.844e+00 3.662 -3.860 2.058e-02 2.896e-01 1.414e+08 0.4090 3.993 0.0001 0.4298 1.2181 0.4301 9.572e+07 0.7659 2.341e-01 0.7659 7.062 6.528 0.0106 8.1950 -5.9370 0.9284 0.0003 1.253e-02 7.210e-04 6.868e-02 2.897e-01 10.520 2.380e+02 2.263e+01 3.646e-03 3.649e-05
30 Rv0175 Mtb_R24 206814 207455 642 + artemis gene undefined undefined Rv0175 Rv0175 Rv0175 MCE-associated membrane protein Unknown 2.447 0.0073 2.414 0.0066 2.051 0.0908 2.620 4.655 5.162e-01 2.629e+00 2.0350 -2.040 1.421e-01 4.928e-01 22.64 0.7257 3.372 0.0007 0.1855 2.430 0.2024 27.34 0.3154 6.846e-01 0.3154 4.606 12.160 0.0005 3.914 -2.869 -3.1890 0.0152 9.079e-02 7.508e-03 6.622e-03 4.927e-01 2.373 9.919e-03 4.180e-03 5.479e-03 7.089e-05 2.138 0.0000 1.966 0.6547 -0.2713 0.9267 19.76 22.97 1.003e-01 4.670e+00 3.202 -3.318 7.563e-02 4.542e-01 6.079e+06 0.4753 4.500 0.0000 0.3651 1.4537 0.3705 4.822e+06 0.5149 4.851e-01 0.5149 2.449 1.052 0.3050 2.4640 0.1894 -5.4990 0.8542 9.267e-01 8.496e-05 6.548e-01 4.543e-01 2.156 3.966e-02 1.840e-02 3.864e-01 1.874e-01
31 Rv0178 Mtb_R24 208938 209672 735 + artemis gene undefined undefined Rv0178 Rv0178 Rv0178 MCE-associated membrane protein Unknown 3.164 0.0001 3.153 0.0002 2.826 0.0432 3.314 6.634 5.785e-01 3.423e+00 3.3200 -2.404 1.067e-01 4.424e-01 41.88 0.6953 4.550 0.0000 0.1134 3.141 0.1257 38.58 0.0042 9.958e-01 0.0042 5.453 20.650 0.0000 4.701 -3.683 -1.7950 0.0036 4.322e-02 1.366e-04 1.824e-04 4.423e-01 3.067 2.530e-02 8.250e-03 1.197e-03 4.257e-06 2.574 0.0000 2.439 0.1437 2.1930 0.1299 20.49 24.97 1.197e+00 3.489e+00 4.482 -3.834 2.745e-02 3.181e-01 2.086e+07 0.4699 5.477 0.0000 0.2479 2.0120 0.2499 1.485e+07 0.5556 4.444e-01 0.5556 4.262 4.723 0.0298 5.2900 -3.3080 -2.3910 0.0096 1.299e-01 6.210e-07 1.437e-01 3.181e-01 11.170 2.243e+02 2.009e+01 1.311e-02 2.308e-04
41 Rv0216 Mtb_R24 258913 259926 1014 + artemis gene undefined undefined Rv0216 Rv0216 Rv0216 conserved hypothetical protein Unknown 1.663 0.0012 1.665 0.0079 1.601 0.0262 4.291 6.347 2.796e-02 4.818e-01 2.0560 -3.889 5.034e-02 3.374e-01 39.12 0.4224 3.936 0.0001 0.3094 1.692 0.3196 42.72 0.0000 1.000e+00 0.0000 5.352 11.680 0.0006 5.092 -4.203 -0.9701 0.0015 2.620e-02 1.276e-03 7.901e-03 3.374e-01 1.658 2.376e-04 1.433e-04 7.255e-04 4.822e-07 2.317 0.0014 2.218 0.3099 1.1830 0.5137 20.54 24.61 2.507e+00 2.529e+00 4.070 -3.640 2.197e-02 2.948e-01 1.522e+07 0.6335 3.658 0.0003 0.2775 1.8493 0.2801 1.084e+07 0.6073 3.927e-01 0.6073 3.832 2.842 0.0918 3.9240 -1.1240 -5.1900 0.2910 5.137e-01 2.562e-03 3.099e-01 2.947e-01 10.280 1.921e+02 1.868e+01 1.277e-01 2.210e-02
53 Rv0270 Mtb_R24 324567 326249 1683 + artemis gene undefined undefined Rv0270 Rv0270 fadD2 fatty-acid-CoA ligase Function unknown, but involved in lipid degradation. 1.723 0.0000 1.734 0.0012 1.560 0.0157 5.446 7.434 1.244e-01 1.190e-01 1.9880 -5.974 3.951e-03 2.873e-01 111.00 0.3492 4.933 0.0000 0.2959 1.757 0.3001 105.35 0.0000 1.000e+00 0.0000 6.816 16.470 0.0000 6.631 -4.752 -0.1286 0.0006 1.570e-02 2.329e-05 1.172e-03 2.873e-01 1.685 5.885e-03 3.492e-03 2.132e-04 1.066e-07 2.139 1.0000 2.163 0.0518 1.9630 0.2710 23.74 25.40 4.988e-01 1.256e+01 1.658 -1.856 1.953e-01 5.188e-01 7.688e+07 0.6985 3.062 1.0000 0.4005 1.3202 0.4009 5.835e+07 0.5208 4.792e-01 0.5208 6.221 7.154 0.0075 6.5870 -1.8680 -4.8400 0.0959 2.710e-01 1.000e+00 5.180e-02 5.188e-01 10.870 2.282e+02 2.101e+01 3.678e-01 3.017e-01
69 Rv0357c Mtb_R24 435471 436769 1299 - artemis gene undefined undefined Rv0357c Rv0357c purA adenylosuccinate synthetase Involved in AMP biosynthesis (first committed step). Plays an important role in the de novo pathway of purine nucleotide biosynthesis [catalytic activity: GTP imp L-aspartate = GDP phosphate adenylosuccinate]. 2.743 0.0000 2.742 0.0000 2.619 0.0023 3.519 6.367 3.606e-01 3.074e-01 2.8480 -5.728 4.687e-03 2.873e-01 54.15 0.4420 6.206 0.0000 0.1509 2.728 0.1608 47.71 0.0000 1.000e+00 0.0000 5.807 27.810 0.0000 5.376 -6.913 3.0280 0.0000 2.346e-03 2.773e-08 7.381e-06 2.873e-01 2.739 1.408e-05 5.142e-06 8.301e-06 2.034e-10 2.769 0.0000 2.640 0.0988 2.2990 0.1095 20.47 25.01 7.060e-01 3.442e+00 4.540 -4.179 2.880e-02 3.211e-01 2.138e+07 0.3844 7.203 0.0000 0.2167 2.2063 0.2186 1.511e+07 0.5098 4.902e-01 0.5098 4.296 5.619 0.0178 5.3150 -3.5390 -2.0700 0.0067 1.095e-01 9.634e-12 9.883e-02 3.212e-01 11.310 2.215e+02 1.958e+01 8.153e-03 8.044e-05
70 Rv0360c Mtb_R24 438302 438739 438 - artemis gene undefined undefined Rv0360c Rv0360c Rv0360c conserved hypothetical protein Function unknown 1.678 0.0182 1.688 0.0260 1.765 0.0572 3.705 5.643 9.667e-01 8.947e-02 1.9380 -3.044 7.549e-02 3.997e-01 41.18 0.5501 3.050 0.0023 0.2946 1.763 0.3086 31.25 0.0030 9.970e-01 0.0030 5.431 8.820 0.0030 5.066 -3.332 -2.4290 0.0066 5.722e-02 1.864e-02 2.600e-02 3.996e-01 1.707 1.752e-03 1.026e-03 3.971e-03 5.484e-06 2.534 0.3561 2.378 0.4824 5.6180 0.1833 21.22 25.15 1.620e+02 3.628e+00 3.938 -1.523 2.620e-01 5.188e-01 2.504e+07 1.9320 1.312 0.1897 0.2558 1.9667 0.2575 1.713e+07 0.5844 4.156e-01 0.5844 4.525 1.752 0.1857 4.4620 -2.5070 -3.4070 0.0344 1.833e-01 6.577e-01 4.823e-01 5.188e-01 11.740 2.578e+02 2.196e+01 1.366e-01 7.838e-03
71 Rv0361 Mtb_R24 438822 439649 828 + artemis gene undefined undefined Rv0361 Rv0361 Rv0361 conserved membrane protein Unknown 2.208 0.0162 2.207 0.0143 1.655 0.2201 4.076 6.696 4.724e-01 3.939e+00 2.6200 -1.456 2.596e-01 5.555e-01 62.57 0.7083 3.117 0.0018 0.2139 2.225 0.2238 46.61 0.0771 9.229e-01 0.0771 6.016 10.330 0.0013 5.423 -2.014 -4.6690 0.0690 2.201e-01 1.657e-02 1.429e-02 5.556e-01 2.033 9.170e-02 4.511e-02 2.406e-02 1.518e-03 1.889 0.0088 1.839 0.4372 -2.0820 0.6276 20.66 24.30 1.764e+00 1.582e+00 3.643 -3.959 1.678e-02 2.718e-01 1.474e+07 0.6096 3.099 0.0019 0.3551 1.4935 0.3578 1.006e+07 0.6334 3.666e-01 0.6334 3.803 2.003 0.1570 3.5910 0.8942 -5.1380 0.3953 6.275e-01 1.622e-02 4.372e-01 2.718e-01 1.369 7.603e-01 5.552e-01 1.847e-01 3.926e-02
141 Rv0709 Mtb_R24 805526 805759 234 + artemis gene undefined undefined Rv0709 Rv0709 rpmC 50S ribosomal protein L29 Involved in translation mechanisms. 1.773 0.0002 1.787 0.0009 1.932 0.0059 8.188 10.240 5.338e-01 1.647e-01 2.0540 -3.784 3.009e-02 3.118e-01 924.60 0.3945 4.493 0.0000 0.2850 1.811 0.2855 802.22 0.0000 1.000e+00 0.0000 9.852 17.120 0.0000 9.657 -5.707 1.2680 0.0001 5.913e-03 1.673e-04 8.607e-04 3.118e-01 1.821 6.543e-03 3.592e-03 5.869e-05 4.450e-09 1.286 0.0084 1.277 0.1282 1.8160 0.0145 26.03 29.05 3.525e-01 6.870e-01 3.027 -5.502 7.059e-03 2.064e-01 3.981e+08 0.4129 3.114 0.0018 0.5118 0.9662 0.5119 3.098e+08 0.7708 2.292e-01 0.7708 8.574 4.974 0.0257 9.7340 -5.7850 0.5963 0.0003 1.451e-02 1.553e-02 1.282e-01 2.064e-01 10.270 2.420e+02 2.355e+01 9.293e-03 2.032e-04
155 Rv0800 Mtb_R24 893318 894619 1302 + artemis gene undefined undefined Rv0800 Rv0800 pepC aminopeptidase Function unknown; possibly hydrolyzes peptides and/or proteins. 1.416 0.0052 1.416 0.0137 1.323 0.0649 7.170 8.143 3.191e-01 4.669e-01 0.9732 -2.678 5.739e-02 3.625e-01 251.60 0.4027 3.516 0.0004 0.3711 1.430 0.3729 236.40 0.0041 9.959e-01 0.0041 7.979 10.480 0.0012 7.817 -3.193 -2.8500 0.0085 6.488e-02 5.290e-03 1.373e-02 3.625e-01 1.420 4.069e-05 2.866e-05 3.387e-03 1.987e-05 1.041 0.0350 1.029 0.3523 1.1310 0.2648 24.38 27.16 1.798e-01 8.370e-01 2.773 -5.163 1.637e-02 2.718e-01 1.088e+08 0.4038 2.577 0.0100 0.6131 0.7058 0.6133 8.367e+07 0.6714 3.286e-01 0.6714 6.699 2.524 0.1122 7.8540 -1.9830 -4.7050 0.0798 2.648e-01 6.460e-02 3.524e-01 2.718e-01 9.328 2.067e+02 2.216e+01 6.734e-02 2.730e-03
191 Rv0991c Mtb_R24 1108172 1108504 333 - artemis gene undefined undefined Rv0991c Rv0991c Rv0991c conserved serine rich protein Function unknown 2.209 0.0005 2.208 0.0014 2.121 0.0238 3.408 5.896 4.595e-01 5.285e-01 2.4880 -3.855 1.838e-02 3.118e-01 41.84 0.5232 4.222 0.0000 0.2097 2.254 0.2210 40.60 0.0000 1.000e+00 0.0000 5.447 16.060 0.0001 5.048 -4.276 -0.8326 0.0013 2.381e-02 4.856e-04 1.360e-03 3.118e-01 2.274 1.294e-02 5.688e-03 4.596e-04 5.213e-07 2.208 0.0012 2.178 0.3166 1.1340 0.5744 20.42 24.85 1.214e+00 1.022e+00 4.431 -5.020 7.530e-03 2.086e-01 1.516e+07 0.5953 3.709 0.0002 0.2741 1.8670 0.2762 1.389e+07 0.6381 3.619e-01 0.6381 3.851 2.783 0.0952 4.6700 -1.0060 -5.3360 0.3415 5.743e-01 2.143e-03 3.166e-01 2.086e-01 10.110 1.883e+02 1.862e+01 1.457e-01 3.103e-02
221 Rv1187 Mtb_R24 1329390 1331021 1632 + artemis gene undefined undefined Rv1187 Rv1187 rocA pyrroline-5-carboxylate dehydrogenase Involved in the arginase pathway [catalytic activity: 1-pyrroline-5-carboxylate NAD( ) H(2)O = L-glutamate NADH] 1.708 0.0169 1.703 0.0200 1.905 0.0352 3.117 4.563 4.884e-01 4.563e-01 1.4460 -3.097 3.638e-02 3.226e-01 27.25 0.5516 3.097 0.0020 0.2889 1.791 0.3100 20.60 0.0066 9.934e-01 0.0066 4.870 9.508 0.0020 4.443 -3.865 -1.4770 0.0026 3.525e-02 1.730e-02 2.005e-02 3.226e-01 1.748 5.547e-03 3.172e-03 2.203e-03 1.234e-07 2.353 1.0000 2.262 0.6680 1.2290 0.9290 17.29 23.02 1.248e+02 2.171e+00 5.733 -1.657 2.352e-01 5.188e-01 7.297e+06 2.0350 1.156 1.0000 0.2627 1.9285 0.2708 3.456e+06 0.5666 4.334e-01 0.5666 2.787 1.002 0.3169 -0.7276 -0.1804 -4.9950 0.8610 9.290e-01 1.000e+00 6.680e-01 5.188e-01 10.480 1.994e+02 1.903e+01 7.260e-01 1.303e-01
246 Rv1323 Mtb_R24 1485862 1487031 1170 + artemis gene undefined undefined Rv1323 Rv1323 fadA4 acetyl-CoA acetyltransferase Function unknown, but supposed involvement in lipid degradation [catalytic activity: 2 acetyl-CoA = CoA acetoacetyl-CoA]. 1.747 0.0306 1.762 0.0271 1.793 0.0908 7.421 9.440 1.727e+00 1.584e+00 2.0180 -1.705 1.636e-01 4.936e-01 445.90 0.6184 2.825 0.0047 0.2947 1.763 0.2959 385.26 0.2159 7.841e-01 0.2159 8.806 8.698 0.0032 8.471 -2.871 -3.4380 0.0151 9.079e-02 3.137e-02 2.708e-02 4.936e-01 1.762 2.991e-04 1.698e-04 7.686e-03 4.227e-05 1.092 0.2490 1.061 0.3894 1.9950 0.0827 24.67 28.99 3.225e+00 8.507e-01 4.316 -3.054 5.557e-02 4.248e-01 3.496e+08 0.6975 1.566 0.1174 0.5837 0.7767 0.5838 2.649e+08 0.7038 2.962e-01 0.7038 8.388 2.292 0.1300 9.3870 -3.8810 -1.9960 0.0040 8.271e-02 4.598e-01 3.893e-01 4.248e-01 10.160 2.473e+02 2.435e+01 8.380e-02 4.816e-03
255 Rv1380 Mtb_R24 1553232 1554191 960 + artemis gene undefined undefined Rv1380 Rv1380 pyrB aspartate carbamoyltransferase Involved in pyrimidine biosynthesis (second step) [catalytic activity: carbamoyl phosphate L-aspartate = phosphate N-carbamoyl-L-aspartate] 2.263 0.0012 2.284 0.0015 2.414 0.0168 4.406 6.799 2.046e+00 4.353e-02 2.3930 -3.097 8.576e-02 4.212e-01 87.64 0.5714 3.961 0.0001 0.2001 2.321 0.2063 75.39 0.0000 1.000e+00 0.0000 6.484 15.750 0.0001 6.072 -4.653 -0.2470 0.0007 1.676e-02 1.235e-03 1.464e-03 4.212e-01 2.352 1.920e-02 8.165e-03 2.794e-04 1.273e-07 1.402 0.2406 1.372 0.4519 0.8510 0.7456 23.06 25.80 7.955e+00 1.230e+00 2.740 -2.525 9.857e-02 4.812e-01 4.522e+07 0.8792 1.595 0.1108 0.4875 1.0365 0.4882 3.275e+07 0.6809 3.191e-01 0.6809 5.426 1.895 0.1687 5.8680 -0.6295 -5.8780 0.5453 7.456e-01 4.446e-01 4.520e-01 4.812e-01 8.271 1.416e+02 1.712e+01 2.749e-01 5.566e-02
284 Rv1536 Mtb_R24 1736519 1739644 3126 + artemis gene undefined undefined Rv1536 Rv1536 ileS isoleucyl-tRNA synthetase Charging ILE tRNA [catalytic activity: ATP L-isoleucine tRNA(ILE) = AMP diphosphate L-isoleucyl-tRNA(ILE)]. 1.731 0.0298 1.758 0.0218 2.017 0.0498 5.362 7.262 2.933e+00 8.755e-02 1.9010 -2.188 1.530e-01 4.928e-01 143.40 0.6097 2.838 0.0045 0.2875 1.798 0.2914 113.46 0.0017 9.983e-01 0.0017 7.184 9.237 0.0024 6.806 -3.463 -2.3030 0.0053 4.975e-02 3.051e-02 2.183e-02 4.927e-01 1.898 7.189e-02 3.787e-02 4.058e-03 2.270e-06 1.490 0.0080 1.445 0.1262 1.8250 0.0482 23.97 27.68 1.294e+00 1.427e+00 3.711 -3.863 1.819e-02 2.792e-01 1.310e+08 0.4757 3.133 0.0017 0.4623 1.1131 0.4625 1.042e+08 0.7568 2.432e-01 0.7568 6.958 5.008 0.0252 8.1170 -4.4510 -1.0010 0.0017 4.820e-02 1.479e-02 1.262e-01 2.791e-01 10.410 2.392e+02 2.299e+01 9.569e-03 1.840e-04
298 Rv1612 Mtb_R24 1811127 1812359 1233 + artemis gene undefined undefined Rv1612 Rv1612 trpB tryptophan synthase, beta subunit Tryptophan biosynthesis pathway (fifth last step). The beta subunit is responsible for the synthesis of L-tryptophan from indole and L-serine. [catalytic activity: L-serine 1-(indol-3-YL)glycerol 3-phosphate = L-tryptophan glyceraldehyde 3-phosphate H(2)O] 1.412 0.0251 1.433 0.0273 1.266 0.0905 6.310 7.049 3.744e-01 4.176e-01 0.7386 -2.646 5.741e-02 3.625e-01 150.60 0.4839 2.917 0.0035 0.3679 1.443 0.3713 122.77 0.0794 9.206e-01 0.0794 7.254 8.639 0.0033 7.009 -2.898 -3.3140 0.0144 9.051e-02 2.571e-02 2.727e-02 3.625e-01 1.405 9.151e-04 6.514e-04 7.086e-03 4.057e-05 1.477 0.0039 1.322 0.1987 1.9250 0.0646 23.74 26.63 1.102e+00 4.079e+00 2.890 -2.793 6.807e-02 4.400e-01 9.194e+07 0.4395 3.362 0.0008 0.5512 0.8594 0.5515 6.685e+07 0.7528 2.472e-01 0.7528 6.400 3.923 0.0476 7.6910 -4.2010 -1.3190 0.0025 6.460e-02 7.253e-03 1.987e-01 4.400e-01 10.360 2.412e+02 2.327e+01 1.697e-02 7.062e-04
328 Rv1809 Mtb_R24 2051282 2052688 1407 + artemis gene undefined undefined Rv1809 Rv1809 PPE33 PPE family protein Function unknown 3.240 0.0001 3.249 0.0001 2.990 0.0207 4.417 7.851 6.100e-01 1.434e+00 3.4350 -3.807 2.497e-02 3.118e-01 84.78 0.6968 4.650 0.0000 0.1060 3.238 0.1103 109.89 0.0002 9.998e-01 0.0002 6.429 21.680 0.0000 5.415 -4.431 -0.6185 0.0010 2.067e-02 9.188e-05 1.125e-04 3.118e-01 3.216 2.465e-03 7.666e-04 3.348e-04 3.297e-07 1.355 0.0844 1.384 0.3767 0.1836 0.9422 23.28 26.69 7.717e-02 4.310e-01 3.414 -7.761 6.447e-03 2.064e-01 4.510e+07 0.6204 2.185 0.0289 0.4751 1.0736 0.4757 4.408e+07 0.6403 3.597e-01 0.6403 5.443 2.369 0.1238 5.5280 -0.1476 -6.1290 0.8860 9.422e-01 1.558e-01 3.767e-01 2.064e-01 2.190 1.956e+00 8.932e-01 3.462e-01 2.208e-01
331 Rv1827 Mtb_R24 2072596 2073084 489 + artemis gene undefined undefined Rv1827 Rv1827 cfp17 hypothetical protein CDS 1.201 0.0292 1.220 0.0357 1.300 0.0511 9.564 10.740 2.231e-01 1.522e-01 1.1710 -3.280 3.211e-02 3.118e-01 1519.00 0.4211 2.853 0.0043 0.4250 1.234 0.4253 1471.05 0.0071 9.929e-01 0.0071 10.570 7.976 0.0047 10.470 -3.425 -2.5400 0.0056 5.114e-02 2.992e-02 3.568e-02 3.118e-01 1.269 1.015e-02 8.001e-03 4.902e-03 4.406e-07 1.008 0.2109 1.086 0.3202 1.7720 0.0157 27.42 30.56 8.455e-01 6.902e-01 3.141 -4.344 1.249e-02 2.420e-01 9.521e+08 0.6012 1.677 0.0936 0.5806 0.7844 0.5806 8.325e+08 0.7828 2.172e-01 0.7828 9.868 2.755 0.0970 10.3200 -5.6500 0.1798 0.0004 1.572e-02 3.895e-01 3.202e-01 2.421e-01 10.100 2.456e+02 2.433e+01 6.365e-02 3.007e-03
340 Rv1872c Mtb_R24 2121907 2123151 1245 - artemis gene undefined undefined Rv1872c Rv1872c lldD2 L-lactate dehydrogenase Involved in respiration; catalyzes conversion of lactate into pyruvate [catalytic activity: (S)-lactate 2 ferricytochrome C = pyruvate 2 ferrocytochrome C]. 1.928 0.0020 1.915 0.0043 1.724 0.0498 4.900 6.275 2.001e-01 9.669e-01 1.3750 -2.871 6.968e-02 3.997e-01 79.00 0.5073 3.800 0.0001 0.2614 1.935 0.2672 77.40 0.0825 9.175e-01 0.0825 6.329 13.150 0.0003 5.997 -3.478 -2.2470 0.0051 4.975e-02 2.001e-03 4.340e-03 3.996e-01 1.858 1.132e-02 6.091e-03 1.854e-03 8.053e-06 2.154 1.0000 2.666 0.0296 1.2080 0.6186 23.42 24.88 5.901e-02 1.746e+01 1.469 -1.575 2.551e-01 5.188e-01 6.222e+07 0.7866 2.738 1.0000 0.3103 1.6882 0.3107 6.475e+07 0.4012 5.988e-01 0.4012 6.245 8.478 0.0036 6.6950 -0.9210 -5.9780 0.3819 6.186e-01 1.000e+00 2.956e-02 5.188e-01 10.490 1.946e+02 1.855e+01 4.618e-01 2.530e-01
377 Rv2185c Mtb_R24 2447066 2447500 435 - artemis gene undefined undefined Rv2185c Rv2185c TB16.3 conserved hypothetical protein Unknown 3.805 0.0000 3.831 0.0000 3.634 0.0048 6.444 9.617 9.787e-01 1.211e+00 3.1730 -4.281 1.315e-02 3.118e-01 714.40 0.5990 6.351 0.0000 0.0702 3.833 0.0709 704.71 0.0255 9.745e-01 0.0255 9.489 36.870 0.0000 8.731 -6.001 1.7390 0.0001 4.819e-03 1.537e-08 1.673e-07 3.118e-01 3.782 3.816e-03 1.009e-03 2.908e-05 2.536e-09 1.349 0.0035 1.383 0.0817 1.9350 0.0157 26.66 29.63 1.184e-01 1.228e+00 2.973 -5.115 2.480e-02 3.093e-01 5.838e+08 0.3970 3.397 0.0007 0.4863 1.0400 0.4864 4.233e+08 0.8200 1.800e-01 0.8200 9.140 6.115 0.0134 9.5300 -5.6620 0.3662 0.0004 1.572e-02 6.443e-03 8.170e-02 3.093e-01 10.350 2.416e+02 2.333e+01 4.814e-03 5.544e-05
380 Rv2194 Mtb_R24 2457553 2458395 843 + artemis gene undefined undefined Rv2194 Rv2194 qcrC ubiquinol-cytochrome C reductase qcrC cytochrome C subunit Respiration 1.524 0.0033 1.537 0.0072 1.529 0.0335 6.249 8.150 7.393e-01 1.923e-01 1.9010 -2.898 6.324e-02 3.777e-01 231.30 0.4184 3.643 0.0003 0.3389 1.561 0.3411 195.77 0.0000 1.000e+00 0.0000 7.862 11.930 0.0006 7.675 -3.947 -1.5140 0.0023 3.352e-02 3.431e-03 7.180e-03 3.777e-01 1.547 8.066e-04 5.213e-04 1.028e-03 1.162e-06 1.071 0.0958 1.036 0.4025 0.9077 0.3800 23.49 26.99 1.234e+00 1.227e+00 3.498 -3.542 2.398e-02 3.065e-01 9.836e+07 0.5046 2.122 0.0338 0.6084 0.7168 0.6087 6.658e+07 0.6670 3.330e-01 0.6670 6.553 2.211 0.1370 6.8330 -1.4700 -5.3410 0.1767 3.800e-01 1.769e-01 4.024e-01 3.065e-01 8.231 1.545e+02 1.878e+01 1.158e-01 5.441e-03
393 Rv2240c Mtb_R24 2511690 2512487 798 - artemis gene undefined undefined Rv2240c Rv2240c Rv2240c hypothetical protein Unknown 1.565 0.0002 1.566 0.0043 1.571 0.0106 8.140 9.504 5.546e-02 1.857e-01 1.3640 -5.380 1.159e-02 3.118e-01 526.40 0.3484 4.492 0.0000 0.3339 1.583 0.3346 595.73 0.0000 1.000e+00 0.0000 9.035 13.210 0.0003 8.848 -5.103 0.3389 0.0003 1.064e-02 1.673e-04 4.302e-03 3.118e-01 1.580 6.601e-04 4.177e-04 2.077e-04 3.101e-08 2.050 0.0077 2.083 0.0363 1.3060 0.4289 23.98 27.96 7.248e-01 9.279e-03 3.979 -7.610 1.567e-02 2.712e-01 1.401e+08 0.6506 3.151 0.0016 0.2846 1.8131 0.2848 1.174e+08 0.7299 2.701e-01 0.7299 7.086 8.022 0.0046 7.0810 -1.3160 -5.5810 0.2218 4.289e-01 1.413e-02 3.630e-02 2.712e-01 10.430 2.099e+02 2.013e+01 7.602e-02 1.594e-02
405 Rv2301 Mtb_R24 2573015 2573707 693 + artemis gene undefined undefined Rv2301 Rv2301 cut2 cutinase CDS 1.499 0.0237 1.517 0.0258 1.486 0.0624 7.569 9.726 8.359e-01 3.124e-01 2.1570 -2.443 8.420e-02 4.197e-01 481.40 0.5101 2.939 0.0033 0.3389 1.561 0.3398 529.44 0.3472 6.528e-01 0.3472 8.908 8.858 0.0029 8.671 -3.226 -2.8450 0.0080 6.235e-02 2.430e-02 2.580e-02 4.197e-01 1.516 2.831e-04 1.868e-04 4.747e-03 8.098e-06 1.321 0.1437 1.364 0.2279 1.3530 0.2613 26.93 29.89 7.308e-01 4.516e-02 2.965 -5.746 2.206e-02 2.948e-01 5.651e+08 0.6918 1.909 0.0563 0.4672 1.0978 0.4673 5.719e+08 0.6821 3.179e-01 0.6821 9.103 3.585 0.0583 10.0500 -2.0300 -5.0910 0.0741 2.613e-01 2.654e-01 2.279e-01 2.947e-01 10.010 2.247e+02 2.244e+01 6.289e-02 9.528e-05
414 Rv2376c Mtb_R24 2655609 2656115 507 - artemis gene undefined undefined Rv2376c Rv2376c cfp2 low molecular weight protein antigen CDS 3.079 0.0052 3.063 0.0340 3.062 0.0707 12.280 16.310 2.084e+00 6.481e+00 4.0300 -1.611 2.007e-01 5.043e-01 43070.00 0.8779 3.508 0.0005 0.1211 3.046 0.1211 32472.56 0.3348 6.652e-01 0.3348 15.390 8.087 0.0045 14.300 -3.095 -3.0230 0.0101 7.070e-02 5.347e-03 3.397e-02 5.043e-01 3.129 1.011e-02 3.232e-03 5.013e-03 2.365e-05 -1.630 0.0325 2.534 0.0998 3.0240 0.0003 31.30 36.08 8.686e-01 7.910e-02 4.783 -7.694 1.007e-02 2.297e-01 2.750e+09 0.6248 -2.608 0.0091 0.2082 2.2638 0.2082 3.033e+10 0.9352 6.479e-02 0.9352 15.380 5.592 0.0180 15.5600 -11.0100 4.6310 0.0000 2.575e-04 6.001e-02 9.981e-02 2.298e-01 10.030 2.751e+02 2.744e+01 9.050e-03 8.143e-05
436 Rv2477c Mtb_R24 2782366 2784042 1677 - artemis gene undefined undefined Rv2477c Rv2477c Rv2477c macrolide-transport ATP-binding protein ABC transporter Thought to be involved in active transport of macrolide across the membrane (export): macrolide antibiotics resistance by an export mechanism. Responsible for energy coupling to the transport system. 2.412 0.0005 2.403 0.0008 2.225 0.0275 3.665 5.933 7.501e-01 1.290e+00 2.2690 -2.816 5.191e-02 3.374e-01 64.12 0.5693 4.236 0.0000 0.1835 2.446 0.1926 51.42 0.0625 9.375e-01 0.0625 6.042 17.270 0.0000 5.522 -4.129 -1.0830 0.0017 2.753e-02 4.718e-04 8.453e-04 3.374e-01 2.408 5.333e-06 2.215e-06 5.711e-04 8.861e-07 3.526 1.0000 3.151 0.0131 3.2750 0.1808 20.96 25.04 2.362e+00 1.253e+01 4.077 -2.331 1.107e-01 4.999e-01 6.384e+07 0.8900 3.962 1.0000 0.1987 2.3313 0.1995 3.557e+07 0.3153 6.847e-01 0.3153 5.721 10.300 0.0013 6.5610 -2.5290 -3.6550 0.0332 1.808e-01 1.000e+00 1.309e-02 4.998e-01 11.930 2.212e+02 1.855e+01 3.448e-01 3.222e-01
514 Rv3001c Mtb_R24 3359585 3360586 1002 - artemis gene undefined undefined Rv3001c Rv3001c ilvC ketol-acid reductoisomerase Involved in valine and isoleucine biosynthesis (at the second step) [catalytic activity: (R)-2,3-dihydroxy-3-methylbutanoate NADP( ) = (S)-2-hydroxy-2-methyl-3-oxobutanoate NADPH]. 1.806 0.0522 1.791 0.0496 1.204 0.2682 4.058 5.035 3.796e-01 3.029e+00 0.9770 -1.188 3.355e-01 6.208e-01 48.75 0.6974 2.590 0.0096 0.2836 1.818 0.2929 47.71 0.6138 3.862e-01 0.6138 5.653 7.118 0.0076 5.181 -1.791 -5.0170 0.1007 2.681e-01 5.354e-02 4.961e-02 6.207e-01 1.610 1.029e-01 6.392e-02 3.931e-02 2.827e-03 3.424 1.0000 2.994 0.0416 2.7020 0.2648 21.23 24.17 1.830e+00 1.597e+01 2.946 -1.873 1.780e-01 5.188e-01 5.078e+07 0.9886 3.463 1.0000 0.2369 2.0775 0.2379 3.046e+07 0.3278 6.722e-01 0.3278 5.314 7.711 0.0055 6.0770 -1.9760 -4.4530 0.0808 2.648e-01 1.000e+00 4.163e-02 5.188e-01 11.740 2.187e+02 1.863e+01 3.621e-01 3.066e-01
541 Rv3194c Mtb_R24 3563264 3564286 1023 - artemis gene undefined undefined Rv3194c Rv3194c Rv3194c conserved secreted protein Function unknown 2.891 0.0000 2.875 0.0000 2.913 0.0059 2.602 5.332 6.047e-01 6.239e-01 2.7300 -4.494 1.087e-02 3.118e-01 29.77 0.5771 5.009 0.0000 0.1303 2.940 0.1453 31.39 0.0002 9.998e-01 0.0002 4.974 24.570 0.0000 4.383 -5.711 1.4130 0.0001 5.913e-03 1.728e-05 2.796e-05 3.118e-01 2.947 1.229e-02 4.170e-03 4.485e-05 5.867e-09 8.481 0.0038 8.450 0.0001 12.5500 0.1299 0.00 23.79 7.890e+01 2.365e+00 23.790 -3.752 5.870e-02 4.280e-01 6.902e+06 2.5180 3.368 0.0008 0.0039 7.9871 0.0091 5.852e+06 0.0000 0.000e+00 0.0000 2.672 20.960 0.0000 -1.8180 -2.9850 -3.1970 0.0160 1.299e-01 7.106e-03 1.224e-04 4.280e-01 16.500 1.932e+02 1.171e+01 5.570e-03 8.095e-05
549 Rv3237c Mtb_R24 3613121 3613603 483 - artemis gene undefined undefined Rv3237c Rv3237c Rv3237c conserved hypothetical protein Function unknown 1.829 0.0012 1.826 0.0055 1.690 0.0346 4.287 5.878 2.225e-01 3.996e-01 1.5910 -3.937 1.975e-02 3.118e-01 50.84 0.4646 3.936 0.0001 0.2762 1.856 0.2865 42.81 0.0000 1.000e+00 0.0000 5.719 12.570 0.0004 5.424 -3.900 -1.4790 0.0025 3.460e-02 1.276e-03 5.537e-03 3.118e-01 1.794 3.497e-03 1.949e-03 9.761e-04 1.660e-06 3.641 0.3759 3.131 0.6001 13.2900 0.1299 0.00 20.36 1.452e+02 2.011e+01 20.360 -2.147 1.371e-01 5.188e-01 8.668e+06 2.8810 1.264 0.2063 0.2203 2.1823 0.2287 3.428e+06 0.2294 7.706e-01 0.2294 2.825 1.234 0.2667 -1.7850 -3.0440 -3.1810 0.0145 1.299e-01 6.943e-01 6.002e-01 5.188e-01 13.150 2.856e+02 2.172e+01 1.625e-01 1.734e-02
582 Rv3459c Mtb_R24 3879273 3879692 420 - artemis gene undefined undefined Rv3459c Rv3459c rpsK 30S ribosomal protein S11 S11 plays an essential role for the selection of the correct tRNA in protein biosynthesis. It is located on the large lobe of the small subunit. 1.503 0.0032 1.502 0.0179 1.459 0.0435 4.608 6.265 5.838e-02 3.609e-01 1.6570 -3.903 3.772e-02 3.248e-01 38.71 0.4107 3.661 0.0003 0.3535 1.500 0.3628 45.05 0.0000 1.000e+00 0.0000 5.334 9.788 0.0018 5.046 -3.668 -1.8850 0.0037 4.349e-02 3.269e-03 1.791e-02 3.248e-01 1.509 1.470e-04 9.738e-05 1.894e-03 2.941e-06 1.369 0.1024 1.056 0.6680 0.9426 0.4246 22.24 23.71 8.699e-02 8.847e+00 1.467 -1.774 2.157e-01 5.188e-01 1.857e+07 0.6565 2.086 0.0370 0.7889 0.3421 0.7896 1.571e+07 0.6504 3.496e-01 0.6504 3.984 0.780 0.3771 5.3160 -1.3270 -5.2200 0.2183 4.247e-01 1.892e-01 6.680e-01 5.188e-01 8.675 1.673e+02 1.928e+01 2.108e-01 2.896e-02
586 Rv3484 Mtb_R24 3903078 3904616 1539 + artemis gene undefined undefined Rv3484 Rv3484 cpsA hypothetical protein Unknown 1.914 0.0000 1.915 0.0004 1.773 0.0157 6.068 7.806 3.225e-01 4.045e-01 1.7380 -3.856 1.865e-02 3.118e-01 131.20 0.3873 4.941 0.0000 0.2627 1.929 0.2660 137.13 0.0000 1.000e+00 0.0000 7.049 18.870 0.0000 6.834 -4.775 -0.1039 0.0006 1.570e-02 2.329e-05 3.876e-04 3.118e-01 1.864 8.351e-03 4.481e-03 1.941e-04 1.047e-07 2.220 0.0001 2.231 0.0436 1.6910 0.2643 23.06 26.78 1.178e-01 3.880e-01 3.724 -9.939 1.852e-03 1.185e-01 6.512e+07 0.5103 4.351 0.0000 0.2623 1.9305 0.2629 5.031e+07 0.7036 2.964e-01 0.7036 5.967 7.603 0.0058 6.8770 -1.9900 -4.5090 0.0790 2.643e-01 1.657e-04 4.356e-02 1.186e-01 10.830 2.222e+02 2.051e+01 2.829e-02 1.939e-03
591 Rv3520c Mtb_R24 3956325 3957368 1044 - artemis gene undefined undefined Rv3520c Rv3520c Rv3520c coenzyme F420-dependent oxidoreductase Function unknown; probably involved in cellular metabolism. 2.621 0.0000 2.631 0.0001 2.702 0.0059 3.274 6.975 1.304e+00 3.021e-01 3.7010 -3.895 3.231e-02 3.118e-01 70.97 0.5085 5.155 0.0000 0.1561 2.680 0.1636 62.45 0.0000 1.000e+00 0.0000 6.186 23.290 0.0000 5.684 -5.761 1.4650 0.0001 5.913e-03 9.544e-06 5.120e-05 3.118e-01 2.668 5.376e-03 2.015e-03 4.185e-05 5.050e-09 1.572 0.0443 1.263 0.4518 1.3430 0.2846 22.56 24.80 4.084e-01 8.661e+00 2.240 -1.904 1.862e-01 5.188e-01 3.533e+07 0.6335 2.482 0.0130 0.6743 0.5684 0.6750 2.585e+07 0.6578 3.422e-01 0.6578 4.932 1.901 0.1680 6.2660 -1.8040 -4.7430 0.1060 2.847e-01 8.178e-02 4.517e-01 5.188e-01 10.040 2.232e+02 2.223e+01 9.568e-02 6.082e-03
597 Rv3583c Mtb_R24 4025056 4025544 489 - artemis gene undefined undefined Rv3583c Rv3583c Rv3583c transcriptional regulator Involved in transcriptional mechanism. 2.731 0.0000 2.743 0.0000 2.688 0.0025 4.007 7.510 7.557e-01 9.109e-02 3.5030 -5.351 2.050e-02 3.118e-01 114.00 0.4275 6.387 0.0000 0.1457 2.779 0.1507 94.98 0.0000 1.000e+00 0.0000 6.856 32.560 0.0000 6.401 -6.676 2.7180 0.0000 2.503e-03 1.404e-08 1.098e-06 3.118e-01 2.741 1.408e-05 5.138e-06 1.133e-05 3.847e-10 1.750 0.0000 1.643 0.0809 0.9757 0.2710 23.22 26.64 4.765e-01 2.956e+00 3.416 -3.619 4.495e-02 3.891e-01 8.411e+07 0.2555 6.848 0.0000 0.4272 1.2269 0.4277 5.581e+07 0.7396 2.604e-01 0.7396 6.293 6.144 0.0132 7.1740 -1.8770 -4.7980 0.0945 2.710e-01 1.180e-10 8.086e-02 3.892e-01 9.367 1.767e+02 1.886e+01 3.589e-02 2.619e-03
606 Rv3627c Mtb_R24 4065900 4067285 1386 - artemis gene undefined undefined Rv3627c Rv3627c Rv3627c conserved hypothetical protein Function unknown (possibly involved in cell wall biosynthesis). 2.859 0.0000 2.852 0.0000 2.726 0.0045 4.226 6.634 2.400e-01 3.649e-01 2.4080 -6.299 3.731e-03 2.873e-01 67.65 0.4602 6.213 0.0000 0.1355 2.884 0.1425 67.64 0.0000 1.000e+00 0.0000 6.114 30.260 0.0000 5.621 -6.169 2.0400 0.0001 4.490e-03 2.773e-08 2.788e-06 2.873e-01 2.832 1.704e-03 6.018e-04 2.289e-05 1.570e-09 1.654 0.0021 1.553 0.5303 0.5826 0.7456 21.16 24.30 6.880e-01 2.829e+00 3.149 -3.518 4.070e-02 3.758e-01 1.502e+07 0.4679 3.536 0.0004 0.4523 1.1447 0.4543 1.218e+07 0.6602 3.398e-01 0.6602 3.802 1.506 0.2198 4.1020 -0.6315 -5.6570 0.5440 7.456e-01 3.951e-03 5.303e-01 3.759e-01 3.498 1.066e+01 3.049e+00 2.547e-01 7.479e-02
642 Rv3879c Mtb_R24 4357593 4359782 2190 - artemis gene undefined undefined Rv3879c Rv3879c Rv3879c hypothetical alanine and proline rich protein Unknown 1.255 0.0225 1.279 0.0382 1.249 0.0619 7.523 8.620 5.198e-01 1.848e-01 1.0970 -2.571 7.580e-02 3.997e-01 361.50 0.4242 2.958 0.0031 0.4064 1.299 0.4077 303.58 0.0882 9.118e-01 0.0882 8.506 7.773 0.0053 8.349 -3.245 -2.7770 0.0078 6.191e-02 2.309e-02 3.822e-02 3.996e-01 1.282 7.053e-04 5.500e-04 5.384e-03 5.413e-06 2.032 0.0023 2.050 0.0185 2.3110 0.0663 25.29 29.36 7.199e-01 9.995e-02 4.062 -7.336 8.937e-03 2.259e-01 3.757e+08 0.5784 3.513 0.0004 0.2917 1.7775 0.2918 3.135e+08 0.7899 2.101e-01 0.7899 8.505 9.506 0.0020 9.4660 -4.1480 -1.6120 0.0027 6.629e-02 4.274e-03 1.846e-02 2.259e-01 10.860 2.333e+02 2.148e+01 1.729e-03 1.344e-06
Row.names seqnames start end width strand source type score phase id locustag gene description function. deseq_logfc.x deseq_adjp.x edger_logfc.x edger_adjp.x limma_logfc.x limma_adjp.x basic_nummed.x basic_denmed.x basic_numvar.x basic_denvar.x basic_logfc.x basic_t.x basic_p.x basic_adjp.x deseq_basemean.x deseq_lfcse.x deseq_stat.x deseq_p.x ebseq_fc.x ebseq_logfc.x ebseq_postfc.x ebseq_mean.x ebseq_ppee.x ebseq_ppde.x ebseq_adjp.x edger_logcpm.x edger_lr.x edger_p.x limma_ave.x limma_t.x limma_b.x limma_p.x limma_adjp_fdr.x deseq_adjp_fdr.x edger_adjp_fdr.x basic_adjp_fdr.x lfc_meta.x lfc_var.x lfc_varbymed.x p_meta.x p_var.x deseq_logfc.y deseq_adjp.y edger_logfc.y edger_adjp.y limma_logfc.y limma_adjp.y basic_nummed.y basic_denmed.y basic_numvar.y basic_denvar.y basic_logfc.y basic_t.y basic_p.y basic_adjp.y deseq_basemean.y deseq_lfcse.y deseq_stat.y deseq_p.y ebseq_fc.y ebseq_logfc.y ebseq_postfc.y ebseq_mean.y ebseq_ppee.y ebseq_ppde.y ebseq_adjp.y edger_logcpm.y edger_lr.y edger_p.y limma_ave.y limma_t.y limma_b.y limma_p.y limma_adjp_fdr.y deseq_adjp_fdr.y edger_adjp_fdr.y basic_adjp_fdr.y lfc_meta.y lfc_var.y lfc_varbymed.y p_meta.y p_var.y
37 Rv0203 Mtb_R24 241514 241924 411 + artemis gene undefined undefined Rv0203 Rv0203 Rv0203 hypothetical exported protein Unknown -1.740 0.0000 -1.734 0.0004 -1.370 0.0103 12.310 10.300 2.518e-02 1.683e-01 -2.0120 7.058 9.364e-03 3.118e-01 2493.00 0.2745 -6.339 0.0000 3.285 -1.716 3.284 3229.43 0.0000 1.000e+00 0.0000 11.270 19.140 0.0000 10.830 5.157 0.4089 0.0003 1.026e-02 1.537e-08 3.659e-04 3.118e-01 -1.579 7.505e-02 -4.754e-02 1.072e-04 3.071e-08 -2.9840 0.0002 -2.926 0.0018 -3.8080 0.1356 28.38 26.81 1.050e-01 1.177e-01 -1.5720 5.2890 6.188e-03 2.064e-01 3.442e+08 0.7139 -4.1790 0.0000 9.265 -3.212 9.260 5.155e+08 0.0000 1.000e+00 0.0000 8.363 14.8400 0.0001 8.374 2.8890 -3.2640 0.0186 1.356e-01 3.403e-04 1.804e-03 2.064e-01 -2.0620 2.611e+00 -1.266e+00 6.242e-03 1.142e-04
58 Rv0288 Mtb_R24 351848 352138 291 + artemis gene undefined undefined Rv0288 Rv0288 esxH low molecular weight protein antigen 7 CDS -4.142 0.0000 -4.131 0.0000 -3.695 0.0008 13.270 9.015 1.115e-01 1.201e-01 -4.2500 15.190 1.106e-04 7.336e-02 3470.00 0.4798 -8.632 0.0000 17.253 -4.109 17.224 5056.95 0.0000 1.000e+00 0.0000 11.750 54.020 0.0000 9.602 8.198 4.5740 0.0000 8.289e-04 9.970e-16 6.564e-11 7.333e-02 -4.102 3.605e-03 -8.790e-04 1.667e-06 8.337e-12 -6.0110 0.0000 -5.964 0.0000 -11.9300 0.2601 27.31 23.20 4.284e-01 1.691e-01 -4.1080 9.2890 1.632e-03 1.160e-01 1.357e+08 0.8283 -7.2570 0.0000 77.034 -6.267 76.252 2.286e+08 0.0000 1.000e+00 0.0000 7.008 35.9900 0.0000 3.256 2.1300 -3.8650 0.0631 2.601e-01 6.494e-12 1.088e-07 1.160e-01 -4.8100 4.161e+00 -8.650e-01 2.105e-02 1.329e-03
76 Rv0397A undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined undefined -4.053 1.0000 -4.012 0.0022 -2.149 0.2556 6.778 5.850 9.813e+00 2.857e-02 -0.9271 1.207 3.501e-01 6.301e-01 365.40 1.3170 -3.078 1.0000 15.720 -3.974 15.486 503.93 0.0000 0.000e+00 0.0000 8.480 14.810 0.0001 6.188 1.849 -5.0610 0.0914 2.556e-01 1.000e+00 2.188e-03 6.301e-01 -3.447 1.030e+00 -2.987e-01 3.638e-01 3.056e-01 -1.8420 0.0350 -1.988 0.1135 -1.5290 0.2648 31.22 30.68 1.786e-01 4.490e-03 -0.5449 1.5440 2.567e-01 5.188e-01 2.342e+09 0.7146 -2.5780 0.0099 4.831 -2.272 4.831 3.562e+09 0.0000 1.000e+00 0.0000 11.260 5.2840 0.0215 11.850 1.9680 -5.5690 0.0818 2.648e-01 6.460e-02 1.135e-01 5.188e-01 -1.0470 2.258e+00 -2.156e+00 3.776e-02 1.490e-03
218 Rv1174c Mtb_R24 1305669 1306001 333 - artemis gene undefined undefined Rv1174c Rv1174c TB8.4 low molecular weight T-cell antigen Unknown function (secreted protein) -4.715 0.0000 -4.700 0.0000 -4.965 0.0002 16.800 11.840 5.686e-02 2.252e-01 -4.9580 15.830 6.024e-04 1.997e-01 40910.00 0.4377 -10.770 0.0000 25.527 -4.674 25.522 58062.68 0.0000 1.000e+00 0.0000 15.300 29.260 0.0000 13.770 10.070 6.5010 0.0000 2.179e-04 3.085e-24 4.203e-06 1.997e-01 -4.708 0.000e+00 0.000e+00 2.402e-07 1.315e-13 -2.8120 0.0001 -4.834 0.0001 -5.6870 0.0006 32.96 29.76 4.472e-02 2.252e-02 -3.2010 21.9600 5.678e-05 4.981e-02 2.184e+09 0.6574 -4.2770 0.0000 34.803 -5.121 34.800 1.242e+10 0.0000 1.000e+00 0.0000 12.820 20.8300 0.0000 11.730 9.6350 4.4350 0.0000 5.943e-04 2.266e-04 1.295e-04 4.981e-02 -2.7360 3.787e+00 -1.384e+00 1.009e-05 5.913e-11
245 Rv1311 Mtb_R24 1467315 1467680 366 + artemis gene undefined undefined Rv1311 Rv1311 atpC ATP synthase epsilon chain Produces ATP from ADP in the presence of a proton gradient across the membrane [catalytic activity: ATP H(2)O H( )(in) = ADP phosphate H( )(out)] -1.829 0.0005 -1.810 0.0030 -1.892 0.0157 9.900 8.083 1.474e-02 4.552e-01 -1.8170 5.044 3.259e-02 3.118e-01 501.60 0.4360 -4.195 0.0000 3.471 -1.795 3.462 584.20 0.0000 1.000e+00 0.0000 8.966 14.070 0.0002 8.658 4.725 -0.2664 0.0006 1.570e-02 5.166e-04 2.987e-03 3.118e-01 -1.776 5.677e-03 -3.196e-03 2.729e-04 9.361e-08 -1.8530 0.0000 -1.900 0.0101 -2.6830 0.0453 26.81 26.43 6.281e-02 1.759e+00 -0.3822 -0.2826 8.025e-01 8.647e-01 1.498e+08 0.2878 -6.4410 0.0000 4.837 -2.274 4.833 1.892e+08 0.0000 1.000e+00 0.0000 7.154 10.9600 0.0009 7.780 4.5130 -0.8926 0.0016 4.534e-02 1.818e-09 1.013e-02 8.647e-01 -1.2010 1.422e+00 -1.185e+00 8.445e-04 6.473e-07
260 Rv1390 Mtb_R24 1565093 1565425 333 + artemis gene undefined undefined Rv1390 Rv1390 rpoZ DNA-directed RNA polymerase omega chain Promotes RNA polymerase assembly. Latches the N-and C-terminal regions of the beta’ subunit thereby faciltating its interaction with the beta and alpha subunits [catalytic activity: N nucleoside triphosphate = N diphosphate {RNA}N]. -1.330 0.0073 -1.325 0.0211 -1.398 0.0457 7.948 6.293 7.131e-03 2.681e-01 -1.6550 4.589 4.018e-02 3.253e-01 145.50 0.3928 -3.385 0.0007 2.478 -1.309 2.463 166.81 0.0000 1.000e+00 0.0000 7.191 9.352 0.0022 6.992 3.593 -2.1220 0.0042 4.569e-02 7.489e-03 2.110e-02 3.253e-01 -1.311 8.168e-04 -6.230e-04 2.375e-03 3.032e-06 -1.2740 0.0077 -1.317 0.2774 -1.5580 0.2710 25.54 25.62 4.953e-02 1.613e+00 0.0766 -1.0730 3.901e-01 5.188e-01 6.637e+07 0.4049 -3.1460 0.0017 3.209 -1.682 3.206 7.964e+07 0.0000 1.000e+00 0.0000 5.979 3.1080 0.0779 6.131 1.8840 -4.6590 0.0935 2.710e-01 1.425e-02 2.774e-01 5.188e-01 -0.6444 1.267e+00 -1.965e+00 5.767e-02 2.414e-03
265 Rv1411c Mtb_R24 1587772 1588482 711 - artemis gene undefined undefined Rv1411c Rv1411c lprG lipoprotein CDS -1.901 0.0019 -1.878 0.0038 -1.970 0.0238 7.992 5.978 3.916e-01 5.469e-01 -2.0130 3.628 2.326e-02 3.118e-01 134.80 0.4995 -3.805 0.0001 3.635 -1.862 3.598 154.99 0.0000 1.000e+00 0.0000 7.084 13.490 0.0002 6.742 4.287 -0.9120 0.0013 2.381e-02 1.996e-03 3.795e-03 3.118e-01 -1.839 7.752e-03 -4.216e-03 5.500e-04 3.891e-07 -2.1450 0.0000 -2.216 0.0126 -2.4250 0.1285 25.81 25.21 1.191e-01 2.231e+00 -0.5950 0.0699 9.501e-01 9.864e-01 7.966e+07 0.2201 -9.7470 0.0000 6.088 -2.606 6.078 1.002e+08 0.0000 1.000e+00 0.0000 6.244 10.3900 0.0013 6.247 3.3780 -2.3880 0.0086 1.285e-01 5.533e-21 1.260e-02 9.865e-01 -1.3830 1.961e+00 -1.418e+00 3.286e-03 2.153e-05
277 Rv1477 Mtb_R24 1666990 1668408 1419 + artemis gene undefined undefined Rv1477 Rv1477 Rv1477 invasion-associated protein Unknown, but supposed involvement in virulence. -1.541 0.0011 -1.531 0.0052 -1.349 0.0287 10.750 9.190 3.780e-02 7.502e-03 -1.5580 12.930 1.489e-03 2.469e-01 954.90 0.3850 -4.002 0.0001 2.853 -1.512 2.850 1191.84 0.0000 1.000e+00 0.0000 9.891 12.710 0.0004 9.555 4.089 -1.3710 0.0018 2.870e-02 1.096e-03 5.241e-03 2.468e-01 -1.440 2.736e-02 -1.899e-02 7.338e-04 8.357e-07 -1.1410 0.1693 -1.105 0.3820 -0.7287 0.4838 29.50 30.04 7.567e-02 1.609e-02 0.5475 -2.9380 6.557e-02 4.400e-01 1.080e+09 0.6275 -1.8190 0.0689 2.612 -1.385 2.611 1.423e+09 0.0009 9.991e-01 0.0009 10.020 2.3360 0.1264 10.910 1.1900 -6.4530 0.2656 4.838e-01 3.128e-01 3.820e-01 4.400e-01 -0.2437 2.333e+00 -9.574e+00 1.536e-01 1.023e-02
315 Rv1712 Mtb_R24 1939599 1940291 693 + artemis gene undefined undefined Rv1712 Rv1712 cmk cytidylate kinase Catalyzes the transfer of a phosphate group from ATP to either CMP or dCMP to form CDP or dCDP and ADP [catalytic activity: ATP CMP = ADP CDP]. -1.685 0.0120 -1.683 0.0211 -1.832 0.0498 6.322 4.590 2.252e-01 1.601e+00 -1.7320 2.415 1.096e-01 4.456e-01 41.77 0.5232 -3.220 0.0013 3.320 -1.731 3.216 44.73 0.0225 9.775e-01 0.0225 5.441 9.368 0.0022 5.127 3.485 -2.1900 0.0051 4.975e-02 1.232e-02 2.110e-02 4.455e-01 -1.674 2.823e-04 -1.686e-04 2.852e-03 3.889e-06 -1.3390 0.1904 -1.614 0.5242 -2.0220 0.3211 23.07 22.79 4.283e-01 8.370e+00 -0.2798 -0.3414 7.626e-01 8.364e-01 1.504e+07 0.7671 -1.7450 0.0810 4.888 -2.289 4.848 1.739e+07 0.0001 9.999e-01 0.0001 3.788 1.5300 0.2161 4.724 1.6640 -4.5810 0.1318 3.212e-01 3.517e-01 5.242e-01 8.364e-01 -0.8488 1.209e+00 -1.425e+00 1.430e-01 4.658e-03
347 Rv1910c Mtb_R24 2156706 2157299 594 - artemis gene undefined undefined Rv1910c Rv1910c Rv1910c hypothetical exported protein Unknown -1.282 0.0036 -1.273 0.0215 -0.965 0.0823 11.950 10.560 1.229e-01 1.873e-01 -1.3860 4.062 1.667e-02 3.118e-01 2225.00 0.3544 -3.618 0.0003 2.412 -1.270 2.412 2906.00 0.0535 9.465e-01 0.0535 11.110 9.289 0.0023 10.710 3.005 -3.2740 0.0119 8.225e-02 3.709e-03 2.152e-02 3.118e-01 -1.146 5.146e-02 -4.489e-02 4.837e-03 3.853e-05 -1.8340 0.0077 -1.802 0.0523 -2.7230 0.0858 28.96 28.87 1.226e-01 8.045e-02 -0.0936 0.3641 7.349e-01 8.152e-01 6.052e+08 0.5831 -3.1460 0.0017 4.257 -2.090 4.256 8.347e+08 0.0000 1.000e+00 0.0000 9.185 7.1270 0.0076 9.560 3.8370 -2.1610 0.0043 8.584e-02 1.425e-02 5.233e-02 8.151e-01 -1.1630 1.332e+00 -1.146e+00 4.504e-03 8.865e-06
349 Rv1926c Mtb_R24 2178957 2179436 480 - artemis gene undefined undefined Rv1926c Rv1926c mpt63 immunogenic protein CDS -2.740 0.0000 -2.738 0.0004 -2.301 0.0059 15.010 12.120 3.252e-02 7.358e-01 -2.8950 5.641 2.472e-02 3.118e-01 14200.00 0.3648 -7.509 0.0000 6.625 -2.728 6.624 18077.15 0.0000 1.000e+00 0.0000 13.780 18.880 0.0000 13.270 5.665 1.2470 0.0001 5.913e-03 7.892e-12 3.876e-04 3.118e-01 -2.739 0.000e+00 0.000e+00 5.222e-05 6.189e-09 -1.6300 0.0325 -2.501 0.0596 -1.8750 0.3147 33.34 32.55 3.162e-02 4.996e-02 -0.7930 5.2140 7.383e-03 2.086e-01 2.750e+09 0.6248 -2.6080 0.0091 6.906 -2.788 6.906 1.610e+10 0.0000 1.000e+00 0.0000 13.380 6.8390 0.0089 13.980 1.6900 -6.4890 0.1265 3.146e-01 6.001e-02 5.957e-02 2.086e-01 -1.1730 2.185e+00 -1.863e+00 4.817e-02 4.602e-03
350 Rv1932 Mtb_R24 2183372 2183869 498 + artemis gene undefined undefined Rv1932 Rv1932 tpx thiol peroxidase CDS -2.289 0.0070 -2.261 0.0072 -2.007 0.0908 10.380 7.725 1.836e+00 2.041e-01 -2.6590 2.496 1.074e-01 4.424e-01 560.60 0.6733 -3.400 0.0007 4.740 -2.245 4.724 656.94 0.1908 8.092e-01 0.1908 9.117 11.940 0.0005 8.504 2.870 -3.4700 0.0152 9.079e-02 7.211e-03 7.180e-03 4.423e-01 -2.123 6.916e-02 -3.257e-02 5.467e-03 7.075e-05 -1.2140 0.0123 -1.205 0.2042 -0.2484 0.7843 29.25 29.74 3.504e-02 9.779e-01 0.4929 -1.3470 3.026e-01 5.188e-01 1.013e+09 0.4068 -2.9830 0.0029 2.908 -1.540 2.907 1.117e+09 0.0006 9.994e-01 0.0006 9.923 3.8610 0.0494 11.110 0.5361 -7.0430 0.6053 7.843e-01 2.268e-02 2.042e-01 5.188e-01 0.0110 4.429e+00 4.026e+02 2.192e-01 1.124e-01
396 Rv2245 Mtb_R24 2518115 2519365 1251 + artemis gene undefined undefined Rv2245 Rv2245 kasA 3-oxoacyl-[acyl-carrier protein] synthase 1 Involved in fatty acid biosynthesis (mycolic acids synthesis); involved in meromycolate extension. Catalyzes the condensation reaction of fatty acid synthesis by the addition to an acyl acceptor of two carbons from malonyl-ACP [catalytic activity: acyl-[acyl-carrier protein] malonyl-[acyl-carrier protein] = 3-oxoacyl-[acyl-carrier protein] [acyl-carrier protein] CO(2)]. -1.503 0.0064 -1.489 0.0137 -1.538 0.0335 8.907 7.481 1.076e-01 2.628e-01 -1.4260 4.366 1.706e-02 3.118e-01 256.80 0.4365 -3.444 0.0006 2.801 -1.486 2.791 322.92 0.0000 1.000e+00 0.0000 7.999 10.510 0.0012 7.598 3.944 -1.5540 0.0023 3.352e-02 6.553e-03 1.373e-02 3.118e-01 -1.490 1.291e-04 -8.665e-05 1.344e-03 7.432e-07 -1.1030 0.1919 -1.334 0.1481 -0.5823 0.7111 27.11 25.88 1.354e-01 9.775e+00 -1.2250 -0.3442 7.627e-01 8.364e-01 1.651e+08 0.6336 -1.7410 0.0817 4.263 -2.092 4.261 2.084e+08 0.0188 9.812e-01 0.0188 7.239 4.6360 0.0313 8.274 0.7263 -6.4040 0.4868 7.112e-01 3.544e-01 1.481e-01 8.364e-01 -0.1848 3.313e+00 -1.793e+01 1.999e-01 6.235e-02
412 Rv2352c Mtb_R24 2632923 2634098 1176 - artemis gene undefined undefined Rv2352c Rv2352c PPE38 PPE family protein Function unknown -4.263 0.0000 -4.242 0.0000 -4.812 0.0084 10.470 6.237 7.186e-01 7.897e+00 -4.2330 3.111 7.250e-02 3.997e-01 505.80 0.8171 -5.217 0.0000 18.842 -4.236 18.596 693.53 0.0000 1.000e+00 0.0000 8.977 24.980 0.0000 7.343 5.351 0.8000 0.0002 8.450e-03 8.018e-06 2.397e-05 3.996e-01 -4.252 0.000e+00 0.000e+00 7.672e-05 1.748e-08 -5.2050 0.0000 -5.245 0.0000 -11.9600 0.1299 25.03 22.11 5.773e-01 1.900e+00 -2.9170 3.2780 4.409e-02 3.843e-01 4.167e+07 0.5566 -9.3510 0.0000 49.117 -5.618 48.025 6.572e+07 0.0000 1.000e+00 0.0000 5.317 25.0300 0.0000 2.628 3.1710 -2.9400 0.0119 1.299e-01 2.043e-19 2.037e-05 3.843e-01 -4.3150 2.496e+00 -5.786e-01 3.960e-03 4.704e-05
422 Rv2431c Mtb_R24 2727967 2728266 300 - artemis gene undefined undefined Rv2431c Rv2431c PE25 PE family protein Function unknown -1.665 0.0182 -1.647 0.0168 -1.461 0.1025 10.890 9.582 2.257e-01 2.374e-01 -1.3080 4.386 1.183e-02 3.118e-01 1240.00 0.5447 -3.057 0.0022 3.100 -1.632 3.097 1488.41 0.0450 9.550e-01 0.0450 10.270 9.930 0.0016 9.859 2.729 -3.7650 0.0195 1.025e-01 1.864e-02 1.684e-02 3.118e-01 -1.562 2.670e-02 -1.709e-02 7.800e-03 1.035e-04 -1.2910 0.3636 -1.221 0.4688 -4.6610 0.2548 28.68 27.92 2.247e-01 1.645e+00 -0.7582 0.4894 6.637e-01 7.494e-01 5.253e+08 0.9980 -1.2930 0.1960 2.788 -1.479 2.788 6.650e+08 0.0000 1.000e+00 0.0000 8.984 1.8160 0.1778 8.507 2.1480 -4.4020 0.0613 2.548e-01 6.717e-01 4.687e-01 7.494e-01 -0.9732 2.338e-01 -2.402e-01 1.450e-01 5.342e-03
506 Rv2971 Mtb_R24 3326101 3326949 849 + artemis gene undefined undefined Rv2971 Rv2971 Rv2971 oxidoreductase Function unknown; probably involved in cellular metabolism. -1.198 0.0121 -1.190 0.0480 -1.211 0.0498 9.411 8.329 9.005e-02 1.887e-02 -1.0820 6.495 9.095e-03 3.118e-01 562.50 0.3726 -3.214 0.0013 2.248 -1.169 2.244 545.50 0.0063 9.937e-01 0.0063 9.137 7.213 0.0072 8.993 3.456 -2.4450 0.0053 4.975e-02 1.240e-02 4.797e-02 3.118e-01 -1.163 3.118e-03 -2.681e-03 4.626e-03 9.157e-06 -1.0310 0.0000 -1.156 0.0978 -0.1834 0.8755 27.84 27.89 4.201e-02 3.650e+00 0.0552 -0.9631 4.351e-01 5.292e-01 3.794e+08 0.2275 -4.5330 0.0000 3.072 -1.619 3.071 3.923e+08 0.0000 1.000e+00 0.0000 8.477 5.6610 0.0174 9.803 0.3454 -6.8440 0.7380 8.756e-01 7.278e-05 9.778e-02 5.292e-01 0.1338 4.492e+00 3.357e+01 2.518e-01 1.774e-01
523 Rv3044 Mtb_R24 3405136 3406215 1080 + artemis gene undefined undefined Rv3044 Rv3044 fecB Fe(III)-dicitrate-binding periplasmic lipoprotein May be involved in active transport of FeIII-decitrate across the membrane (import). -2.226 0.0006 -2.218 0.0015 -2.139 0.0275 6.844 4.854 5.535e-01 1.712e-01 -1.9900 4.395 1.999e-02 3.118e-01 94.15 0.5382 -4.136 0.0000 4.668 -2.223 4.570 100.15 0.0764 9.236e-01 0.0764 6.583 15.750 0.0001 6.198 4.128 -1.1460 0.0017 2.753e-02 6.320e-04 1.464e-03 3.118e-01 -2.222 0.000e+00 0.000e+00 5.895e-04 8.614e-07 -0.9919 0.0222 -1.081 0.5054 -1.0120 0.3765 24.77 24.90 1.974e-01 2.904e+00 0.1231 -1.1240 3.656e-01 5.188e-01 4.092e+07 0.3573 -2.7760 0.0055 2.845 -1.508 2.840 4.478e+07 0.0012 9.988e-01 0.0012 5.266 1.6170 0.2035 6.419 1.4830 -5.1200 0.1735 3.765e-01 4.093e-02 5.053e-01 5.188e-01 -0.2868 1.626e+00 -5.671e+00 1.275e-01 1.139e-02
545 Rv3208A Mtb_R24 3585677 3585949 273 - artemis gene undefined undefined Rv3208A Rv3208A TB9.4 conserved hypothetical protein Function unknown -1.208 0.0178 -1.211 0.0309 -0.991 0.0955 11.260 9.853 1.493e-01 3.422e-01 -1.4060 3.085 4.449e-02 3.352e-01 1712.00 0.3935 -3.069 0.0021 2.288 -1.194 2.287 1845.15 0.2042 7.958e-01 0.2042 10.740 8.347 0.0039 10.580 2.821 -3.6110 0.0166 9.547e-02 1.825e-02 3.086e-02 3.352e-01 -1.105 3.095e-02 -2.800e-02 7.523e-03 6.198e-05 -1.3170 0.0048 -1.330 0.1258 -0.7288 0.4425 28.64 28.92 3.066e-02 9.948e-01 0.2866 -1.2220 3.399e-01 5.188e-01 7.618e+08 0.3993 -3.2980 0.0010 3.184 -1.671 3.184 7.879e+08 0.0001 9.999e-01 0.0001 9.506 5.0280 0.0249 9.873 1.2850 -6.1950 0.2319 4.426e-01 8.944e-03 1.258e-01 5.188e-01 -0.3789 2.696e+00 -7.115e+00 8.594e-02 1.612e-02
567 Rv3354 Mtb_R24 3769111 3769500 390 + artemis gene undefined undefined Rv3354 Rv3354 Rv3354 conserved hypothetical protein Function unknown -2.585 0.0005 -2.560 0.0013 -2.217 0.0447 13.070 10.120 4.208e-01 8.844e-01 -2.9580 4.108 1.876e-02 3.118e-01 4468.00 0.6087 -4.247 0.0000 5.919 -2.565 5.916 5869.43 0.0208 9.792e-01 0.0208 12.110 16.140 0.0001 11.220 3.642 -2.1260 0.0038 4.471e-02 4.639e-04 1.343e-03 3.118e-01 -2.518 8.748e-03 -3.473e-03 1.308e-03 4.823e-06 -3.1960 0.0001 -3.147 0.0019 -3.6050 0.1390 29.67 28.12 1.380e-02 2.851e-02 -1.5550 13.8600 3.128e-04 6.627e-02 8.283e+08 0.7509 -4.2560 0.0000 10.737 -3.425 10.734 1.277e+09 0.0000 1.000e+00 0.0000 9.633 14.6500 0.0001 8.752 2.8510 -3.5700 0.0198 1.390e-01 2.451e-04 1.943e-03 6.627e-02 -2.1570 2.968e+00 -1.376e+00 6.633e-03 1.290e-04
623 Rv3763 Mtb_R24 4209047 4209526 480 + artemis gene undefined undefined Rv3763 Rv3763 lpqH 19 kda lipoprotein antigen precursor Shown to inhibit gamma interferon regulated HLA-DR protein and mRNA expression in human macrophages -3.418 0.0000 -3.403 0.0000 -3.289 0.0002 11.360 7.669 6.473e-02 2.251e-01 -3.6880 11.220 1.379e-03 2.469e-01 990.70 0.3345 -10.220 0.0000 10.537 -3.397 10.498 1358.02 0.0000 1.000e+00 0.0000 9.937 60.500 0.0000 8.837 10.670 7.0960 0.0000 2.179e-04 5.490e-22 4.870e-12 2.468e-01 -3.468 9.804e-03 -2.827e-03 1.226e-07 4.509e-14 -4.3500 0.0000 -4.364 0.0000 -4.7820 0.0231 28.51 25.82 2.204e-01 8.702e-01 -2.6890 3.9460 2.989e-02 3.237e-01 3.395e+08 0.4120 -10.5600 0.0000 25.755 -4.687 25.717 5.376e+08 0.0000 1.000e+00 0.0000 8.348 45.7000 0.0000 7.124 5.1710 0.0395 0.0007 2.311e-02 1.876e-24 1.099e-09 3.237e-01 -3.0220 5.013e+00 -1.659e+00 2.195e-04 1.445e-07
646 Rv3891c Mtb_R24 4374049 4374372 324 - artemis gene undefined undefined Rv3891c Rv3891c esxD Esat-6 like protein Function unknown -1.434 0.0029 -1.427 0.0137 -1.440 0.0319 8.014 6.693 9.292e-02 2.417e-01 -1.3210 4.368 1.772e-02 3.118e-01 141.90 0.3885 -3.692 0.0002 2.695 -1.430 2.678 177.53 0.0009 9.991e-01 0.0009 7.157 10.530 0.0012 6.813 3.999 -1.4200 0.0021 3.189e-02 2.956e-03 1.373e-02 3.118e-01 -1.410 1.220e-03 -8.651e-04 1.156e-03 8.514e-07 -0.8540 0.8953 -1.337 0.9029 -2.4620 0.8752 0.00 18.95 1.692e+02 1.782e+02 18.9500 -0.6870 5.299e-01 6.152e-01 3.016e+06 3.7250 -0.2293 0.8187 5.142 -2.362 4.928 3.421e+06 0.5210 4.790e-01 0.5210 1.405 0.1011 0.7506 -8.102 0.3470 -4.9560 0.7368 8.751e-01 1.000e+00 9.029e-01 6.152e-01 -0.6622 6.461e-01 -9.757e-01 7.687e-01 1.923e-03
---
title: "Attempt to compare OpenSWATH to EncyclopeDIA."
author: "atb abelew@gmail.com"
date: "`r Sys.Date()`"
output:
  rmdformats::readthedown:
    code_download: true
    code_folding: show
    df_print: paged
    fig_caption: true
    fig_height: 7
    fig_width: 7
    highlight: tango
    width: 300
    keep_md: false
    mode: selfcontained
    toc_float: true
  BiocStyle::html_document:
    code_download: true
    code_folding: show
    fig_caption: true
    fig_height: 7
    fig_width: 7
    highlight: tango
    keep_md: false
    mode: selfcontained
    toc_float: true
  html_document:
    code_download: true
    code_folding: show
    fig_caption: true
    fig_height: 7
    fig_width: 7
    highlight: tango
    keep_md: false
    mode: selfcontained
    number_sections: true
    self_contained: true
    theme: readable
    toc: true
    toc_float:
      collapsed: false
      smooth_scroll: false
---

<style type="text/css">
body, td {
  font-size: 16px;
}
code.r{
  font-size: 16px;
}
pre {
 font-size: 16px
}
</style>

```{r options, include=FALSE}
library("hpgltools")
tt <- devtools::load_all("~/hpgltools")
knitr::opts_knit$set(width=120,
                     progress=TRUE,
                     verbose=TRUE,
                     echo=TRUE)
knitr::opts_chunk$set(error=TRUE,
                      dpi=96)
old_options <- options(digits=4,
                       tibble.width=Inf,
                       stringsAsFactors=FALSE,
                       knitr.duplicate.label="allow")
ggplot2::theme_set(ggplot2::theme_bw(base_size=10))
rundate <- format(Sys.Date(), format="%Y%m%d")
ver <- "20190310"
```

# Use EncyclopeDIA data including more samples.

I think I will just load an existing expressionset generated from openswath.

```{r load_osw}
osw_matrix <- new.env()
loaded <- load("protein_expt-v20181112.rda", envir=osw_matrix)
osw_matrix <- osw_matrix[["expt"]]

osw_counts <- exprs(osw_matrix)
colnames(osw_counts)
```

```{r load_encyclopedia}
enc_metadata <- hpgltools:::read_metadata("sample_sheets/Mtb_dia_samples_encyclopedia_20190327.xlsx")
rownames(enc_metadata) <- paste0("s", enc_metadata[["sampleid"]])
enc_matrix <- read.table("encyclopedia/most_samples_quant_report.elib.proteins.txt", header=TRUE)
rownames(enc_matrix) <- enc_matrix[["Protein"]]
enc_matrix <- enc_matrix[, -1]
enc_matrix <- enc_matrix[, -1]
enc_matrix <- enc_matrix[, -1]
colnames(enc_matrix)
colnames(enc_matrix) <- gsub(pattern="X", replacement="s", x=colnames(enc_matrix))
colnames(enc_matrix) <- gsub(pattern="\\.mzML", replacement="", x=colnames(enc_matrix))
colnames(enc_matrix) <- gsub(pattern="^X", replacement="s", x=colnames(enc_matrix))

colnames(enc_matrix)
rownames(enc_metadata)
na_idx <- is.na(enc_matrix)
enc_matrix[na_idx] <- 0

enc_expt <- create_expt(metadata=enc_metadata, count_dataframe=enc_matrix, gene_info=NULL)
```

Ok so that was 100% weird.  Let us next NA all the entries which are currently 0.

I think that somewhere along the way some set of samples got mis-ordered?

## NA comparisons

```{r na_comp}
enc_test <- normalize_expt(enc_expt, transform="log2", convert="cpm",
                           norm="quant", filter="pofa", p=0.99, A=1000)
test <- plot_pca(enc_test)
test$plot

enc_combined <- concatenate_runs(enc_expt, column="bioreplicate")
combined_test <- normalize_expt(enc_combined, transform="log2", convert="cpm",
                                norm="quant", filter="pofa", p=0.99, A=10000)
test <- plot_pca(combined_test)
test$plot
test <- plot_density(enc_combined)
test$plot

enc_whole <- subset_expt(enc_combined, subset="collectiontype=='whole'")
enc_whole_filt <- normalize_expt(enc_whole, filter="pofa", p=0.99, A=10000)
plot_tsne(enc_whole_filt)$plot
enc_filtrate <- subset_expt(enc_combined, subset="collectiontype=='filtrate'")
enc_filtrate_filt <- normalize_expt(enc_filtrate, filter="pofa", p=0.99, A=10000)
plot_tsne(enc_filtrate_filt)$plot

keeper <- list("delta_wt" = c("delta_filtrate","wt_filtrate"))
enc_whole_de <- all_pairwise(enc_whole, parallel=FALSE,
                             force=TRUE, do_ebseq=TRUE, model_batch=FALSE)
enc_whole_table <- combine_de_tables(
  enc_whole_de,
  keepers=keeper,
  excel=glue::glue("excel/enc_whole_combined_de-v{ver}.xlsx"))
enc_filtrate_de <- all_pairwise(enc_filtrate, parallel=FALSE, force=TRUE,
                                do_ebseq=TRUE, model_batch=FALSE)
enc_filtrate_table <- combine_de_tables(
  keepers=keeper,
  enc_filtrate_de,
  excel=glue::glue("excel/enc_filtrate_combined_de-v{ver}.xlsx"))
```

# Perform OpenSWATH analyses

I am going to mostly copy/paste some of the material from 03_swath2stats_20190327.Rmd here.

```{r osw}
mtb_gff <- "reference/mycobacterium_tuberculosis_h37rv_2.gff.gz"
mtb_annotations <- sm(load_gff_annotations(mtb_gff, type="gene"))
colnames(mtb_annotations) <- gsub(pattern="\\.", replacement="", x=colnames(mtb_annotations))
mtb_annotations[["description"]] <- gsub(pattern="\\+", replacement=" ",
                                         x=mtb_annotations[["description"]])
mtb_annotations[["function"]] <- gsub(pattern="\\+", replacement=" ",
                                      x=mtb_annotations[["function"]])
rownames(mtb_annotations) <- mtb_annotations[["ID"]]

ver <- "20190327"
tric_data <- read.csv(
  paste0("results/tric/", ver, "/whole_8mz_tuberculist/comet_HCD.tsv"), sep="\t")
tric_data[["ProteinName"]] <- gsub(pattern="^(.*)_.*$", replacement="\\1",
                                   x=tric_data[["ProteinName"]])
sample_annot <- extract_metadata(paste0("sample_sheets/Mtb_dia_samples_", ver, ".xlsx"))
kept <- ! grepl(x=rownames(sample_annot), pattern="^s\\.\\.")
sample_annot <- sample_annot[kept, ]
devtools::load_all("~/scratch/git/SWATH2stats")
s2s_exp <- sample_annotation(data=tric_data,
                             sample_annotation=sample_annot,
                             fullpeptidename_column="fullpeptidename")

sample_cor <- plot_correlation_between_samples(
  s2s_exp, size=2,
  fun.aggregate=sum,
  comparison=transition_group_id ~ condition + bioreplicate,
  column.values="intensity")

decoy_lists <- assess_decoy_rate(s2s_exp)
fdr_overall <- assess_fdr_overall(s2s_exp, output="Rconsole", plot=TRUE)
byrun_fdr <- assess_fdr_byrun(s2s_exp, FFT=0.7, plot=TRUE, output="Rconsole")
chosen_mscore <- mscore4assayfdr(s2s_exp, FFT=0.7, fdr_target=0.02)
prot_score <- mscore4protfdr(s2s_exp, FFT=0.7, fdr_target=0.02)
filtered_ms <- filter_mscore(s2s_exp, chosen_mscore)
##filtered_fq <- filter_mscore_freqobs(filtered_ms, 0.01, 0.8, rm.decoy=FALSE)
filtered_ms_fdr <- filter_mscore_fdr(filtered_ms, FFT=0.7,
                                     overall_protein_fdr_target=prot_score,
                                     upper_overall_peptide_fdr_limit=0.05)
filtered_ms_fdr_pr <- filter_proteotypic_peptides(filtered_ms_fdr)
filtered_ms_fdr_pr_all <- filter_all_peptides(filtered_ms_fdr_pr)
filtered_ms_fdr_pr_all_str <- filter_on_max_peptides(data=filtered_ms_fdr_pr_all, n_peptides=10)
filtered_all_filters <- filter_on_min_peptides(data=filtered_ms_fdr_pr_all_str, n_peptides=3)

matrix_prefix <- file.path("results", "swath2stats", ver)
if (!file.exists(matrix_prefix)) {
  dir.create(matrix_prefix)
}
protein_matrix_filtered <- write_matrix_proteins(
  filtered_all_filters, write.csv=TRUE,
  filename=file.path(matrix_prefix, "protein_matrix_filtered.csv"))
dim(protein_matrix_filtered)
peptide_matrix_filtered <- write_matrix_peptides(
  filtered_all_filters, write.csv=TRUE,
  filename=file.path(matrix_prefix, "peptide_matrix_filtered.csv"))
dim(peptide_matrix_filtered)

rt_cor <- plot_correlation_between_samples(
  filtered_all_filters, column.values="intensity", fun.aggregate=sum, size=2)
## I have no effing clue what this plot means.
variation <- plot_variation(filtered_all_filters, fun.aggregate=sum)
cols <- colnames(filtered_all_filters)
disaggregated <- disaggregate(filtered_all_filters, all.columns=TRUE)
msstats_input <- convert_MSstats(disaggregated)

prot_mtrx <- read.csv(file.path("results", "swath2stats", ver, "protein_matrix_filtered.csv"))
rownames(prot_mtrx) <- gsub(pattern="^1\\/", replacement="", x=prot_mtrx[["proteinname"]])
prot_mtrx <- prot_mtrx[, -1]
## Important question: Did SWATH2stats reorder my data?
colnames(prot_mtrx) <- gsub(pattern="^(.*)(2018.*)$", replacement="s\\2", x=colnames(prot_mtrx))

reordered <- colnames(prot_mtrx)
metadata <- sample_annot[reordered, ]
protein_expt <- sm(create_expt(metadata,
                               count_dataframe=prot_mtrx,
                               gene_info=mtb_annotations))

whole_expt <- subset_expt(protein_expt, subset="collectiontype=='whole'")
cf_expt <- subset_expt(protein_expt, subset="collectiontype=='filtrate'")

cf_mean <- mean_by_bioreplicate(cf_expt, fact="figurename")
cf_combined_norm <- normalize_expt(cf_mean, filter=TRUE, convert="cpm",
                                   norm="quant", transform="log2")
plot_pca(cf_combined_norm)$plot


cf_de <- all_pairwise(cf_mean, parallel=FALSE, force=TRUE)
cf_table <- combine_de_tables(cf_de, keepers=keeper)
```

# Compare tables OSW vs. Enc

```{r osw_enc}
osw_table <- cf_table[["data"]][[1]]
enc_table <- enc_filtrate_table[["data"]][[1]]
drop_idx <- ! enc_table[["edger_logfc"]] > 10
enc_table <- enc_table[drop_idx, ]
drop_idx <- ! enc_table[["edger_logfc"]] < -10
enc_table <- enc_table[drop_idx, ]

merged_table <- merge(osw_table, enc_table, by="row.names")
cor.test(merged_table[["edger_logfc.x"]], merged_table[["edger_logfc.y"]])
plot_linear_scatter(merged_table[, c("deseq_logfc.x", "deseq_logfc.y")])$scatter +
  ggplot2::ylim(-5, 5)

favorite_idx <- merged_table[["edger_logfc.x"]] >= 1 &
  merged_table[["edger_logfc.y"]] >= 1 &
  merged_table[["edger_adjp.x"]] <= 0.05
favorites <- merged_table[favorite_idx, ]
write.csv(x=favorites, file="conservative_increased_delta_filtrate.csv")
knitr::kable(favorites)

favorite_idx <- merged_table[["edger_logfc.x"]] <= -1 &
  merged_table[["edger_logfc.y"]] <= -1 &
  merged_table[["edger_adjp.x"]] <= 0.05
favorites <- merged_table[favorite_idx, ]
write.csv(x=favorites, file="conservative_decreased_delta_filtrate.csv")
knitr::kable(favorites)
```
