babette: Step by Step

Richèl J.C. Bilderbeek

2020-11-06

Introduction

This step-by-step demo shows how to run the babette pipeline in detail.

First, load babette:

library(babette)

In all cases, this is done for a short MCMC chain length of 10K:

mcmc <- create_test_mcmc()
sample_interval <- mcmc$tracelog$log_every

Create a ‘BEAST2’ input file

This step is commonly done using BEAUti. With babette, this can be done as follows:

beast2_input_file <- tempfile()
create_beast2_input_file(
  get_babette_path("anthus_aco.fas"),
  output_filename = beast2_input_file,
  mcmc = mcmc
)
testit::assert(file.exists(beast2_input_file))

Display (part of) the ‘BEAST2’ input file

print(head(readLines(beast2_input_file)))
#> [1] "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?><beast beautitemplate='Standard' beautistatus='' namespace=\"beast.core:beast.evolution.alignment:beast.evolution.tree.coalescent:beast.core.util:beast.evolution.nuc:beast.evolution.operators:beast.evolution.sitemodel:beast.evolution.substitutionmodel:beast.evolution.likelihood\" required=\"\" version=\"2.4\">"
#> [2] ""                                                                                                                                                                                                                                                                                                                                                                                   
#> [3] ""                                                                                                                                                                                                                                                                                                                                                                                   
#> [4] "    <data"                                                                                                                                                                                                                                                                                                                                                                          
#> [5] "id=\"anthus_aco\""                                                                                                                                                                                                                                                                                                                                                                  
#> [6] "name=\"alignment\">"

This file can both be loaded by BEAUti and be used by ‘BEAST2’.

The file can be checked if it is indeed a valid input file:

if (is_beast2_installed()) {
  testit::assert(is_beast2_input_file(beast2_input_file))
}

Run MCMC

This step is commonly done using ‘BEAST2’ from the command-line or using its GUI. With babette, this can be done as follows:

log_filename <- get_tracerer_path("beast2_example_output.log")
trees_filename <- get_tracerer_path("beast2_example_output.trees")
state_filename <- get_tracerer_path("beast2_example_output.xml.state")

if (is_beast2_installed()) {

  state_filename <- beastier::create_temp_state_filename()
  run_beast2(
    input_filename = beast2_input_file,
    output_state_filename = state_filename
  )
  testit::assert(file.exists(state_filename))
}

Display (part of) the ‘BEAST2’ output files

The .log file contains the model parameters and parameter estimates:

print(head(readLines(log_filename)))
#> [1] "#"                                                                                                                                          
#> [2] "#model:"                                                                                                                                    
#> [3] "#"                                                                                                                                          
#> [4] "#<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?><input id=\"posterior\" spec=\"beast.core.util.CompoundDistribution\">"        
#> [5] "#    <distribution id=\"prior\" spec=\"beast.core.util.CompoundDistribution\">"                                                             
#> [6] "#        <distribution id=\"BirthDeath.t:test-alignment_to_beast_posterior\" spec=\"beast.evolution.speciation.BirthDeathGernhard08Model\">"
print(tail(readLines(log_filename)))
#> [1] "5000\t-71.17163347186911\t-61.56657103682397\t-9.605062435045134\t-61.56657103682397\t0.7408031787176825\t-2.697307156062997\t1.784950391046604\t0.7085961206445538\t"     
#> [2] "6000\t-69.64175919436143\t-58.7371283874309\t-10.904630806930529\t-58.7371283874309\t1.0967406331819989\t-3.996875527948392\t1.118246617131637\t0.37026052508174184\t"     
#> [3] "7000\t-71.9254632299824\t-61.43600200867863\t-10.489461221303767\t-61.43600200867863\t1.008807793060718\t-3.581705942321631\t1.2610805655688475\t0.40085741953348314\t"    
#> [4] "8000\t-69.59440660430789\t-58.893813736024974\t-10.700592868282925\t-58.893813736024974\t0.8291478974191432\t-3.7928375893007873\t0.3909075824291657\t0.5845426497411835\t"
#> [5] "9000\t-69.69113214841606\t-62.40903887903621\t-7.2820932693798435\t-62.40903887903621\t0.4529636701249372\t-0.37433799039770677\t1.1123237661541558\t0.6983194230410139\t" 
#> [6] "10000\t-71.86883594126418\t-60.73520027244354\t-11.133635668820638\t-60.73520027244354\t0.7693617408342179\t-4.2258803898385\t1.5626755883603614\t0.7107459018616334\t"

The .trees file contains the alignment, taxa and posterior trees:

print(head(readLines(trees_filename)))
#> Warning in readLines(trees_filename): incomplete final line found
#> on '/home/richel/R/x86_64-pc-linux-gnu-library/3.6/tracerer/extdata/
#> beast2_example_output.trees'
#> [1] "#NEXUS"               ""                     "Begin taxa;"         
#> [4] "\tDimensions ntax=5;" "\t\tTaxlabels"        "\t\t\tt1 "
print(tail(readLines(trees_filename)))
#> Warning in readLines(trees_filename): incomplete final line found
#> on '/home/richel/R/x86_64-pc-linux-gnu-library/3.6/tracerer/extdata/
#> beast2_example_output.trees'
#> [1] "tree STATE_6000 = (((1:0.6693276063477953,(3:0.03981380432299871,4:0.03981380432299871):0.6295138020247966):0.08092720614183935,5:0.7502548124896347):0.3464858206923642,2:1.0967406331819989):0.0;"   
#> [2] "tree STATE_7000 = (((1:0.24494355012790703,5:0.24494355012790703):0.7367914630990596,(3:0.06452243075256824,4:0.06452243075256824):0.9172125824743984):0.027072779833751337,2:1.008807793060718):0.0;" 
#> [3] "tree STATE_8000 = (1:0.8291478974191432,((2:0.7608640351177596,5:0.7608640351177596):0.006217429452784806,(3:0.052826647942534374,4:0.052826647942534374):0.71425481662801):0.06206643284859881):0.0;" 
#> [4] "tree STATE_9000 = (((1:0.2090056271345508,2:0.2090056271345508):0.04985571367790104,5:0.2588613408124518):0.1941023293124854,(3:0.07366441485382572,4:0.07366441485382572):0.3792992552711115):0.0;"   
#> [5] "tree STATE_10000 = (1:0.7693617408342179,(2:0.6138287429830985,((3:0.11136214949831788,4:0.11136214949831788):0.22798021162858859,5:0.33934236112690647):0.27448638185619206):0.1555329978511194):0.0;"
#> [6] "End;"

The .xml.state file contains the final state of the MCMC run and the MCMC operator acceptances thus far:

print(head(readLines(state_filename)))
#> [1] "<itsabeastystatewerein version='2.0' sample='10000'>"                                                                                                                                                                                                       
#> [2] "<statenode id='Tree.t:test-alignment_to_beast_posterior'>(0:0.7693617408342179,(1:0.6138287429830985,((2:0.11136214949831788,3:0.11136214949831788)5:0.22798021162858859,4:0.33934236112690647)7:0.27448638185619206)6:0.1555329978511194)8:0.0</statenode>"
#> [3] "<statenode id='birthRate2.t:test-alignment_to_beast_posterior'>birthRate2.t:test-alignment_to_beast_posterior[1 1] (0.0,10000.0): 1.5626755883603614 </statenode>"                                                                                          
#> [4] "<statenode id='relativeDeathRate2.t:test-alignment_to_beast_posterior'>relativeDeathRate2.t:test-alignment_to_beast_posterior[1 1] (0.0,1.0): 0.7107459018616334 </statenode>"                                                                              
#> [5] "</itsabeastystatewerein>"                                                                                                                                                                                                                                   
#> [6] "<!--"
print(tail(readLines(state_filename)))
#> [1] "{\"id\":\"wide.t:test-alignment_to_beast_posterior\",\"p\":\"NaN\",\"accept\":50,\"reject\":349,\"acceptFC\":0,\"rejectFC\":0,\"rejectIv\":236,\"rejectOp\":236},"         
#> [2] "{\"id\":\"WilsonBalding.t:test-alignment_to_beast_posterior\",\"p\":\"NaN\",\"accept\":31,\"reject\":365,\"acceptFC\":0,\"rejectFC\":0,\"rejectIv\":230,\"rejectOp\":230},"
#> [3] "{\"id\":\"BirthRateScaler.t:test-alignment_to_beast_posterior\",\"p\":0.75,\"accept\":347,\"reject\":73,\"acceptFC\":0,\"rejectFC\":0,\"rejectIv\":0,\"rejectOp\":0},"     
#> [4] "{\"id\":\"DeathRateScaler.t:test-alignment_to_beast_posterior\",\"p\":0.75,\"accept\":329,\"reject\":48,\"acceptFC\":0,\"rejectFC\":0,\"rejectIv\":3,\"rejectOp\":3}"      
#> [5] "]}"                                                                                                                                                                        
#> [6] "-->"

Parse output

This step is commonly done using Tracer. With babette, this can be done as follows.

Parsing .log file to obtain the parameter estimates:

knitr::kable(head(parse_beast_log(log_filename)))
Sample posterior likelihood prior treeLikelihood TreeHeight BirthDeath birthRate2 relativeDeathRate2
0 -76.40152 -66.56126 -9.840263 -66.56126 0.6383792 -2.932507 1.0000000 0.5000000
1000 -68.68523 -60.11853 -8.566701 -60.11853 0.7003758 -1.658946 2.8041208 0.4952765
2000 -70.06839 -58.93807 -11.130319 -58.93807 1.4085182 -4.222564 0.7901276 0.3674313
3000 -69.03990 -58.90834 -10.131561 -58.90834 0.9220334 -3.223806 1.8637476 0.2496224
4000 -74.15268 -59.98232 -14.170365 -59.98232 1.8159958 -7.262610 1.7823070 0.3519155
5000 -71.17163 -61.56657 -9.605062 -61.56657 0.7408032 -2.697307 1.7849504 0.7085961

Parsing .trees file to obtain the posterior phylogenies:

plot_densitree(parse_beast_trees(trees_filename))

Parsing .xml.state file to obtain the MCMC operator acceptances:

knitr::kable(head(parse_beast_state_operators(state_filename)))
operator p accept reject acceptFC rejectFC rejectIv rejectOp
treeScaler.t 0.5 184 186 0 0 0 0
treeRootScaler.t 0.5 144 224 0 0 63 63
UniformOperator.t NaN 2219 1667 0 0 0 0
SubtreeSlide.t 1.0 409 1442 0 0 690 690
narrow.t NaN 962 972 0 0 0 0
wide.t NaN 50 349 0 0 236 236