diff --git a/tutorials/data/1.nex b/tutorials/data/1.nex new file mode 100644 index 00000000..2f21c214 --- /dev/null +++ b/tutorials/data/1.nex @@ -0,0 +1,24 @@ +#NEXUS +[Data written by write.nexus.data.R, Fri Aug 4 10:29:15 2023] +BEGIN DATA; + DIMENSIONS NTAX=15 NCHAR=65; + FORMAT DATATYPE=STANDARD MISSING=? GAP=- INTERLEAVE=NO symbols="0123456789"; + MATRIX + Leonhardtina_ORIG_Leonhardtina 42102222200001121210010200211002001101000110011100001101010100101 + missing_ORIG_Procerberus_and_Cimolestes 00121212210012021202002122021220101011101100011100101001010101101 + Brychotherium_ORIG_Brychotherium 31200012100222021011222012111222010100011100000110101001000001111 + Dissopsalis_ORIG_Dissopsalis 33210012000111002002222022121122101110000000001100101100010100111 + Anasinopa_ORIG_Anasinopa 33110110200021021111210100121012211100101110100100001000110100111 + Metasinopa_ORIG_Metasinopa 33120012221002021011210010121202211101111110000100101001000111011 + Masrasector_ORIG_Masrasector 30120012210202020011011010121222100101011010010100011000110110011 + Kyawdawia_ORIG_Kyawdawia 32120002000222021010220110121222110100011000011110111001010000011 + Paratritemnodon_ORIG_Paratritemnodon 32122012200211021100020110221222201110010101000100111001011101011 + Prodissopsalis_ORIG_Prodissopsalis 31120002222212021001222010121212011111001010101000001010010111011 + Eurotherium_ORIG_Eurotherium 31120022222200021011212010120212211101001010101000100100010110101 + Cynohyaenodon_ORIG_Cynohyaenodon 11120022122200021210200000001022211101000110101000101000000100110 + no_age_data_ORIG_Paracynohyaenodon 01110022111202021112010010121222211111000110100000101100010110111 + missing_ORIG_Proviverra_and_Lesmesodon 10120022110022021212211200111102211110001011101000001101100111001 + Allopterodon_ORIG_Allopterodon 00020022110222021011000200121022001101010111111101100100110101101 + ; +END; + diff --git a/tutorials/data/Agnolin_2007a_paleobiodb.nex b/tutorials/data/Agnolin_2007a_paleobiodb.nex new file mode 100644 index 00000000..a4ccea28 --- /dev/null +++ b/tutorials/data/Agnolin_2007a_paleobiodb.nex @@ -0,0 +1,22 @@ +#NEXUS +[File downloaded from graemetlloyd.com] +BEGIN DATA; +DIMENSIONS NTAX=12 NCHAR=51; + FORMAT DATATYPE = STANDARD SYMBOLS= " 0 1 2 3" MISSING=? GAP=- ; +MATRIX +Tinamidae_ORIG_Tinamidae 000000000000000000000000000000000000000000000000000 +Rallidae_ORIG_Rallidae 000000000000000000000000000000000000000000000000000 +no_age_data_ORIG_Paraphysornis 0????0??????0??0?00000010?0100000??010???0001000010 +Patagornis_ORIG_Tolmodus 000000000000?00000000001000100000??0000000001000010 +Phorusrhacos_ORIG_Phorusrhacos 0????0????????0??00???????0100000???0????0001000010 +Galliformes_ORIG_Galliformes 000000000000000011000012000010000000000000000000010 +Diatryma_ORIG_Diatryma 0100010010011010111011130001101010?0001111111101001 +Brontornis_ORIG_Brontornis ?????????????????110?1?11?1??????0???????111110?001 +Dromornithidae_ORIG_Dromornithidae 0100010?101111?0111111111?11??1??00??01111111101001 +no_age_data_ORIG_Anhimidae 011111111111111111111111111111111111111111111101010 +Anseranatidae_ORIG_Anseranatidae 111111111110111111111111111111111111111111111101010 +Anatoidea_ORIG_Anatoidea 111011011010111111111111111110001000001111111111110 +; +END; + + diff --git a/tutorials/data/Egi_etal_2005.nex b/tutorials/data/Egi_etal_2005.nex new file mode 100644 index 00000000..c47365a3 --- /dev/null +++ b/tutorials/data/Egi_etal_2005.nex @@ -0,0 +1,23 @@ +#NEXUS +[File downloaded from graemetlloyd.com] +BEGIN DATA; +DIMENSIONS NTAX=15 NCHAR=65; + FORMAT DATATYPE = STANDARD SYMBOLS= " 0 1 2 3 4" MISSING=? GAP=- ; +MATRIX +missing_ORIG_Procerberus_and_Cimolestes 0000000100000000001000?0010000000000001100000001000000?0000000000 +missing_ORIG_Proviverra_and_Lesmesodon 100????0?00110001001010??0001013111021??20?1100001021010000110000 +Allopterodon_ORIG_Allopterodon 10121110110110111111110?11001121000111101011000002121000020100000 +Leonhardtina_ORIG_Leonhardtina 11121210210011110001000?11000120000110212022120001120000120200001 +Eurotherium_ORIG_Eurotherium 201100101011001011111102110011000011100120111122010211001022000?0 +Prodissopsalis_ORIG_Prodissopsalis 3010001011110001111?1112??0?0010001110010111110201001100110200000 +Cynohyaenodon_ORIG_Cynohyaenodon 11111001011110211111111100011000000120102011010200110101110200000 +no_age_data_ORIG_Paracynohyaenodon 111110010111112111111101110?111000012010201111020112110110?200000 +Brychotherium_ORIG_Brychotherium 10110????11112?1111111021001200100012020201210221201120102?300111 +Metasinopa_ORIG_Metasinopa 21???????00112101110110??1111002000120202011111212111111100200010 +Dissopsalis_ORIG_Dissopsalis 420100121122011111202222101?1?02000022100111111212011101010300110 +Anasinopa_ORIG_Anasinopa 311??011112210101120110??0000001000020200121111212110111110201010 +Paratritemnodon_ORIG_Paratritemnodon 100101121101121011101122010?0?11??0021101102011212110101001000010 +Masrasector_ORIG_Masrasector 10100002?00112?01111001??1011010000020200111011112110001101101010 +Kyawdawia_ORIG_Kyawdawia 420102121110121011111110010???00?0?1201001120111121?0001101100010 +; +END; diff --git a/tutorials/divrate/div_rate_intro.html b/tutorials/divrate/div_rate_intro.html index d3735cad..0a8a8ad6 100644 --- a/tutorials/divrate/div_rate_intro.html +++ b/tutorials/divrate/div_rate_intro.html @@ -357,14 +357,14 @@

The birth-death br
  • Condamine F.L., Rolland J., Höhna S., Sperling F.A.H., Sanmartín I. 2018. Testing the role of the Red Queen and Court Jester as drivers of the macroevolution of Apollo butterflies. Systematic Biology. 67:940–964.
  • -
  • Höhna S. 2014. Likelihood Inference of Non-Constant Diversification Rates with Incomplete Taxon Sampling. PLoS One. 9:e84184. +
  • Höhna S. 2013. Fast simulation of reconstructed phylogenies under global time-dependent birth-death processes. Bioinformatics. 29:1367–1374. -10.1371/journal.pone.0084184 +10.1093/bioinformatics/btt153
  • -
  • Höhna S. 2013. Fast simulation of reconstructed phylogenies under global time-dependent birth-death processes. Bioinformatics. 29:1367–1374. +
  • Höhna S. 2014. Likelihood Inference of Non-Constant Diversification Rates with Incomplete Taxon Sampling. PLoS One. 9:e84184. -10.1093/bioinformatics/btt153 +10.1371/journal.pone.0084184
  • Höhna S. 2015. The time-dependent reconstructed evolutionary process with a key-role for mass-extinction events. Journal of Theoretical Biology. 380:321–331. diff --git a/tutorials/fbd_simple/index.html b/tutorials/fbd_simple/index.html index b97fe382..369efd0b 100644 --- a/tutorials/fbd_simple/index.html +++ b/tutorials/fbd_simple/index.html @@ -888,10 +888,10 @@

    Summary

    10.1093/molbev/mss075
  • -
  • Freyman W.A., Höhna S. 2019. Stochastic character mapping of state-dependent diversification reveals the tempo of evolutionary decline in self-compatible Onagraceae lineages. Systematic Biology. 68:505–519. +
  • Freyman W.A., Höhna S. 2018. Cladogenetic and anagenetic models of chromosome number evolution: a Bayesian model averaging approach. Systematic Biology. 67:1995–215.
  • -
  • Freyman W.A., Höhna S. 2018. Cladogenetic and anagenetic models of chromosome number evolution: a Bayesian model averaging approach. Systematic Biology. 67:1995–215. +
  • Freyman W.A., Höhna S. 2019. Stochastic character mapping of state-dependent diversification reveals the tempo of evolutionary decline in self-compatible Onagraceae lineages. Systematic Biology. 68:505–519.
  • Gavryushkina A., Heath T.A., Ksepka D.T., Stadler T., Welch D., Drummond A.J. 2017. Bayesian Total-Evidence Dating Reveals the Recent Crown Radiation of Penguins. Systematic Biology. 66:57–73. diff --git a/tutorials/figures/workflow.png b/tutorials/figures/workflow.png new file mode 100644 index 00000000..6afa16a0 Binary files /dev/null and b/tutorials/figures/workflow.png differ diff --git a/tutorials/pps_morpho/data/Agnolin_2007a_paleobiodb.nex b/tutorials/pps_morpho/data/Agnolin_2007a_paleobiodb.nex new file mode 100644 index 00000000..013f6917 --- /dev/null +++ b/tutorials/pps_morpho/data/Agnolin_2007a_paleobiodb.nex @@ -0,0 +1,20 @@ +#NEXUS +[File downloaded from graemetlloyd.com] +BEGIN DATA; +DIMENSIONS NTAX=12 NCHAR=51; + FORMAT DATATYPE = STANDARD SYMBOLS= " 0 1 2 3" MISSING=? GAP=- ; +MATRIX +Tinamidae_ORIG_Tinamidae 000000000000000000000000000000000000000000000000000 +Rallidae_ORIG_Rallidae 000000000000000000000000000000000000000000000000000 +no_age_data_ORIG_Paraphysornis 0????0??????0??0?00000010?0100000??010???0001000010 +Patagornis_ORIG_Tolmodus 000000000000?00000000001000100000??0000000001000010 +Phorusrhacos_ORIG_Phorusrhacos 0????0????????0??00???????0100000???0????0001000010 +Galliformes_ORIG_Galliformes 000000000000000011000012000010000000000000000000010 +Diatryma_ORIG_Diatryma 0100010010011010111011130001101010?0001111111101001 +Brontornis_ORIG_Brontornis ?????????????????110?1?11?1??????0???????111110?001 +Dromornithidae_ORIG_Dromornithidae 0100010?101111?0111111111?11??1??00??01111111101001 +no_age_data_ORIG_Anhimidae 011111111111111111111111111111111111111111111101010 +Anseranatidae_ORIG_Anseranatidae 111111111110111111111111111111111111111111111101010 +Anatoidea_ORIG_Anatoidea 111011011010111111111111111110001000001111111111110 +; +END; \ No newline at end of file diff --git a/tutorials/pps_morpho/data/Egi_etal_2005a_paleobiodb.nex b/tutorials/pps_morpho/data/Egi_etal_2005a_paleobiodb.nex new file mode 100644 index 00000000..c47365a3 --- /dev/null +++ b/tutorials/pps_morpho/data/Egi_etal_2005a_paleobiodb.nex @@ -0,0 +1,23 @@ +#NEXUS +[File downloaded from graemetlloyd.com] +BEGIN DATA; +DIMENSIONS NTAX=15 NCHAR=65; + FORMAT DATATYPE = STANDARD SYMBOLS= " 0 1 2 3 4" MISSING=? GAP=- ; +MATRIX +missing_ORIG_Procerberus_and_Cimolestes 0000000100000000001000?0010000000000001100000001000000?0000000000 +missing_ORIG_Proviverra_and_Lesmesodon 100????0?00110001001010??0001013111021??20?1100001021010000110000 +Allopterodon_ORIG_Allopterodon 10121110110110111111110?11001121000111101011000002121000020100000 +Leonhardtina_ORIG_Leonhardtina 11121210210011110001000?11000120000110212022120001120000120200001 +Eurotherium_ORIG_Eurotherium 201100101011001011111102110011000011100120111122010211001022000?0 +Prodissopsalis_ORIG_Prodissopsalis 3010001011110001111?1112??0?0010001110010111110201001100110200000 +Cynohyaenodon_ORIG_Cynohyaenodon 11111001011110211111111100011000000120102011010200110101110200000 +no_age_data_ORIG_Paracynohyaenodon 111110010111112111111101110?111000012010201111020112110110?200000 +Brychotherium_ORIG_Brychotherium 10110????11112?1111111021001200100012020201210221201120102?300111 +Metasinopa_ORIG_Metasinopa 21???????00112101110110??1111002000120202011111212111111100200010 +Dissopsalis_ORIG_Dissopsalis 420100121122011111202222101?1?02000022100111111212011101010300110 +Anasinopa_ORIG_Anasinopa 311??011112210101120110??0000001000020200121111212110111110201010 +Paratritemnodon_ORIG_Paratritemnodon 100101121101121011101122010?0?11??0021101102011212110101001000010 +Masrasector_ORIG_Masrasector 10100002?00112?01111001??1011010000020200111011112110001101101010 +Kyawdawia_ORIG_Kyawdawia 420102121110121011111110010???00?0?1201001120111121?0001101100010 +; +END; diff --git a/tutorials/pps_morpho/figures/Ma_results.png b/tutorials/pps_morpho/figures/Ma_results.png index f15f5bcd..875749d1 100644 Binary files a/tutorials/pps_morpho/figures/Ma_results.png and b/tutorials/pps_morpho/figures/Ma_results.png differ diff --git a/tutorials/pps_morpho/pps_data_morpho.html b/tutorials/pps_morpho/pps_data_morpho.html index c324a453..70827b26 100644 --- a/tutorials/pps_morpho/pps_data_morpho.html +++ b/tutorials/pps_morpho/pps_data_morpho.html @@ -55,7 +55,7 @@

    Assessing Phylogenetic Reliability Using RevBayes and $P^{3}$

    Model adequacy testing using posterior prediction (Morphological Models).

    Laura Mulvey

    -
    Last modified on January 19, 2024
    +
    Last modified on January 25, 2024
    @@ -95,8 +95,12 @@

    Data files and scripts

    +
  • Agnolin_2007a_paleobiodb.nex
  • +
  • Egi_etal_2005.nex
  • +
  • Egi_etal_2005a_paleobiodb.nex
  • + @@ -123,6 +127,20 @@

    Data files and scripts

    + + Other + + @@ -140,13 +158,13 @@

    Overview

    Introduction


    -

    Understanding morphological evolution is an extremely difficult task. Within -palaeobiology there are a small number of relatively simple evolutionary models to -describe this complex process. Assessing the fit of these models to individual data -sets is therefore crucial in order to have confidence in the inference results. -Posterior prediction is a Bayesian approach to assess the fit of a model to a given -data set (Bollback 2002; Brown 2014; Höhna et al. 2018). PPS works by -simulating data sets based on parameters sampled from the posterior distribution, +

    Understanding morphological evolution is a difficult task. Within palaeobiology +there are a small number of relatively simple evolutionary models to describe +this complex process. Assessing the fit of these models to individual data sets is +therefore crucial in order to have confidence in the inference results. Posterior +prediction is a Bayesian approach to assess the fit of a model to a given data set +(Bollback 2002; Brown 2014; Höhna et al. 2018). PPS works by simulating data sets +based on parameters sampled from the posterior distribution () . If the simulated data is found to be similar to the empirical data, according to the chosen test statistics, you can have confidence that the model is capturing the properties of the empirical data @@ -159,12 +177,12 @@

    Introduction

    the other as we are determining the absolute fit of the model to the data set.

    -
    A schematic presentation of Posterior predictive simulation for morphological +
    A schematic presentation of posterior predictive simulation for morphological substitution models. Step 1 involves a carrying out an inference under the model of interest. Step 2 a number of data sets are then simulated using parameters taken from the posterior distribution estimated in step 1. Step 3 uses tests statistics to assess -how similar the simulated data sets are to the empirical. The test statistic employed -here are consistency index and retention index. P-values or effect sizes can be used +how similar the simulated data sets are to the empirical. The test statistics employed +here are consistency index and retention index. P-values or effect sizes can be used to determine whether a model is adequate or not for a given data set.
    @@ -183,25 +201,61 @@

    Substitution Models

    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Model & ExtensionsAssumptions
    Mkall transition rates are equal (Lewis 2001)
    Gallows for variation in substitution rates among sites (Yang 1994)
    Vaccounts for ascertainment bias (Lewis 2001)
    Ppartitions the data based on the number of character states
    + +
    The assumptions of the morphological models used in this tutorial.
    +
    +

    Assessing Model Fit with Posterior Prediction


    -

    Here we will highlight the main parts of the analysis. The morpholgoical data used here is of Proviverrine hyaenodontids from (missing reference). The data set contains 15 taxa, 65 characters with 5 character states. To quickly run the entire PPS pipeline -you can run the pps_analysis_Mk.Rev script. To run this type the following command into -RevBayes

    +

    Here we will highlight the main parts of the analysis. The morpholgoical data used here is of a family of extinct birds from the Miocene (Agnolin 2007). The data set contains 12 taxa, +51 characters with a maximum 4 character states. To quickly run the entire PPS pipeline you can run +the pps_analysis_Mk.Rev script. To run this type the following command into RevBayes

    -
        source("scripts/pps_analysis_Mk.Rev")
    +
    source("scripts/pps_analysis_Mk.Rev")
     
    -

    The test statistics and P-values can then be calculated by running the Test-stats.r scripts in r studio.

    +

    The test statistics and P-values can then be calculated by running the Test-stats.r scripts in r studio.

    Empirical MCMC Analysis


    The initial step in PPS involves generating a posterior distribution from which we -will later sample from, in order to simulated new data sets. +will later sample parameter values from, in order to simulated new data sets. Here we will specify our dataset, evolutionary model, and run a normal MCMC analysis. -This code is in the pps_analysis_Mk.rev script.

    +This code is in the pps_analysis_Mk.Rev script.

    Set up the workspace


    @@ -209,34 +263,35 @@

    Set up the workspace

    First, let’s set up some workspace variables we’ll need, and define which substitution model we will use for the analysis, either Mk or MkVP+G.

    -
        analysis_name = "pps_morpho_example"
    -    model_name = "Mk"
    -    model_file_name = "scripts/pps_"+model_name+"_Model.Rev"
    +
    analysis_name = "pps_morpho_example"
    +model_name = "Mk"
    +model_file_name = "scripts/pps_"+model_name+"_Model.Rev"
     

    Next specify and read in the data file.

    -
        inFile = "data/Egi_etal_2005.nex"
    -    morpho <- readDiscreteCharacterData(inFile)
    +
    inFile = "data/Agnolin_2007a_paleobiodb.nex"
    +morpho <- readDiscreteCharacterData(inFile)
     
    -
       Successfully read one character matrix from file data/Egi_etal_2005.nex"
    +
       Successfully read one character matrix from file data/Agnolin_2007a_paleobiodb.nex"
     

    Specify the maximum number of states in the morphological alignment. For morphological data -sets this value will change depending on the data set. We need this to construct the +sets this value will change depending on the data set. We need this to construct a Q-matrix of the correct size.

    -
        num_states ="5"
    +
    num_states ="4"
     
    -

    Create vectors to store your moves and monitors for the analysis

    +

    Create vectors to store your moves and monitors for the analysis.

    moves    = VectorMoves()
     monitors = VectorMonitors()
     
    -

    For the rest of the tutorial you can choose to either use the source() function to run the scripts or copy and paste the each line into revbayes as they are explained.

    +

    For the rest of the tutorial you can choose to either use the source() function to run the scripts or +copy and paste the each line into revbayes as they are explained.

    Specifying the model


    @@ -252,7 +307,7 @@

    Specifying the model

    Opening the model file, we can see how we have set it up.

    The first part of the file specifies the tree model. -We put a prior on the branch lengths using an exponential distribution with a rate 0.2

    +We put a prior on the branch lengths using an exponential distribution with a rate 0.2.

    br_len_lambda ~ dnExp(0.2)
     moves.append(mvScale(br_len_lambda, weight=2))
    @@ -268,21 +323,21 @@ 

    Specifying the model

    tree_length := phylogeny.treeLength()
    -

    In this script we are specifying an Mk model, where K is the maximum number of +

    In this script we are specifying an Mk model, where k is the maximum number of observed states. As the Mk model is a generalization of the JC model this is done using the fnJC() function. This creates a Q-matrix of size num_states. The resulting Q-matrix will have equal transition probabilities between all states.

    Q := fnJC(int(num_states)) 
     
    -

    We then create PhyloCTMC object. This joins all the model parameters we created to model -the morphological data. The is then clamped to the sequence evolution model.

    +

    We then create a PhyloCTMC object. This joins all the model parameters we created to model +the morphological data. The is then clamped to the morphological evolution model.

    seq ~ dnPhyloCTMC(tree=phylogeny, Q=Q, type="Standard")
     seq.clamp(morpho)
     
    -

    Finally, we can create a workspace model variable using the model()function. We provide +

    Finally, we can create a workspace model variable using the model() function. We provide the model function a single node here using (phylogeny).

    mymodel = model(phylogeny)
    @@ -295,8 +350,8 @@ 

    Run the MCMC

    source("scripts/pps_MCMC.Rev")
     
    -

    In this scripts, we are specifying the number of iterations, the monitors and the burn. -For this analysis we add a different file then the normal set up, the .var file. This file +

    In this script, we are specifying the number of iterations, the monitors and the burn. +For this analysis we add a different file than the normal set up, the .var file. This file will be used in the next step to simulate the new data sets.

    monitors.append( mnModel(filename="output_" + model_name + "/" + analysis_name + "_posterior.log",printgen=10, separator = TAB)) 
    @@ -306,7 +361,7 @@ 

    Run the MCMC

    Now that we have our model, moves, and monitors set up we can define our MCMC. This is done -using mcmc() function.nrunsspecifiies that there are two independent mcmc runs during the analysis.

    +using the mcmc() function. nruns specifiies that there are two independent mcmc runs during the analysis.

    mymcmc = mcmc(mymodel, monitors, moves, nruns=2, combine="mixed")
     mymcmc.burnin(generations=200,tuningInterval=200)
    @@ -353,7 +408,7 @@ 

    MCMC output

    convergence at this stage.

    Before moving on to the next section it is important to clear the RevBayes environment of all the variables created -during the previous step. This can be done using the following clear()function.

    +during the previous step. This can be done using the following clear() function.

    clear()
     
    @@ -371,11 +426,11 @@

    Posterior Predi analysis_name = "pps_morpho_example" morpho <- readDiscreteCharacterData(inFile) model_name = "Mk" -num_states ="5" +num_states ="4" source( model_file_name )

    -

    The scripts pps_Simulation.Rev contains the code to simulate new data.

    +

    The script pps_Simulation.Rev contains the code to simulate new data.

    First we read in the trace file created during the previous MCMC.

    @@ -383,13 +438,13 @@

    Posterior Predi

    Now we use the posdteriorPredictiveSimulation() function to set up the simulations. We provide the function with the - model, and output directory and the trace file.

    + model, output directory, and the trace file.

    pps = posteriorPredictiveSimulation(mymodel, directory="output_" + model_name + "/" + analysis_name + "_post_sims", trace)
     
    -

    To start the simulation we use the pps.run()function. Here we can specify how many data sets we want to simulate by -setting the thinning. The .varfile` contains a number of phylogenetic trees. If thinning is set to the default (0) the +

    To start the simulation we use the pps.run() function. Here we can specify how many data sets we want to simulate by +setting the thinning. The .var file contains a number of phylogenetic trees. If thinning is set to the default (0) the pps.run() function will simulate data for each tree. Setting thinning to 2 will simulate along every second tree and so on. In this way setting thinning = 2 simulates data for half of the trees in the .var file. Simulating 500 data sets has been found to be sufficient for these types of analysis. If you increase the number of iterations in the MCMC @@ -401,17 +456,17 @@

    Posterior Predi

    Once you run this command a file should be created in the output_Mk directory called pps_morpho_example_post_sims. This is where all of the data simulated in revbayes will be stored. This simulation step should only take a few minutes.

    -

    Calculating the P-values and effect sizes for the Test Statistics

    +

    Calculating the P-values and effect sizes for the Test Statistics


    To determine if a model is adequate or not we need to compare the empirical data with the newly simulated data sets. If the simulated data sets are not significantly different from the empirical, we can conclude that the model is adequate for the data set. Here we use two test statistics, Consistency Index and Retention Index. -Consistency Index is a measure of homoplasy in the data and retention index measures the number of synapomorphies. These -test statistics are calculated in R using the Test-stats.r script.

    +Consistency Index is a measure of homoplasy in the data and retention index measures the degree to which potential +synapomorphy is exhibited on the tree. These test statistics are calculated in R using the Test-stats.r script.

    -

    This script calculates posterior p-values and effect sizes to determine the adequacy of the model. Here we calculate 4 -p-values, lower 1-tailed, Upper 1-tailed, 2-tailed, and the mid-point p-value. Values of less than 0.25 or greater the +

    This script calculates posterior P-values and effect sizes to determine the adequacy of the model. Here we calculate 4 +p-values, lower 1-tailed, Upper 1-tailed, 2-tailed, and the mid-point p-value. Values of less than 0.025 or greater the 0.975 indicate significance. The effect sizes can also be used to determine the adequacy of a model, though the p-values are considered more conservative. For the effect sizes, if the maximum and minimum value range through zero a model can be considered adequate.

    @@ -419,26 +474,37 @@

    Visualising the results


    -

    +

    Histrograms show the distribution of values calculated for each test statistic from the simulated data sets. The green line shows the empirical value while the blue shows the median simulated value. The box plots show the effect sizes calculated for the test statistics. The median value should be closer to zero, models are considered adequate if the min and max values are range through zero.
    -

    The Mk model would not be considered accurate for this data set. While it does well according to the retention index, +

    The Mk model would not be considered adequate for this data set. While it does well according to the retention index, the consistency index determined the simulated data to be significantly different to the empirical.

    Additional Exercise


    -

    Now that you know the Mk model is not appropriate for the data set try the MKVP+G model. Is this model better? Use the -following command to rerun the entire analysis using the other model.

    +
      +
    • Now that you know the Mk model is not appropriate for the data set try the MkVP+G model. This model is more complex than the Mk as it relaxes some of the assumption. Is this model adequate for the data set? Use the following command to rerun the +entire analysis using the other model.
    • +
    source("scripts/pps_analysis_MkVP+G")
     
    -
    1. Bollback J.P. 2002. Bayesian model adequacy and choice in phylogenetics. Molecular Biology and Evolution. 19:1171–1180. +
        +
      • Try testing the models with the other data set in this tutorial. This morpholgoical data used here is of Proviverrine hyaenodontids (an extinct family of hypercarnivorous mammals) from (Egi et al. 2005). The data set contains 15 taxa, 65 characters with a maximum of 5 character states. Are is the same model adequate for this data?
      • +
      + +
      1. Agnolin F. 2007. Brontornis burmeisteri Moreno & Mercerat, un Anseriformes (Aves) gigante del Mioceno Medio de Patagonia, Argentina. Revista del Museo Argentino de Ciencias Naturales nueva serie. 9:15–25. + +10.22179/revmacn.9.361 + +
      2. +
      3. Bollback J.P. 2002. Bayesian model adequacy and choice in phylogenetics. Molecular Biology and Evolution. 19:1171–1180. 10.1093/oxfordjournals.molbev.a004175 @@ -447,6 +513,11 @@

        Additional Exercise

        10.1093/sysbio/syu009 +
      4. +
      5. Egi N., Holroyd P.A., Tsubamoto T., Soe A.N., Takai M., Ciochon R.L. 2005. Proviverrine hyaenodontids (Creodonta: Mammalia) from the Eocene of Myanmar and a phylogenetic analysis of the proviverrines from the Para-Tethys area. Journal of Systematic Palaeontology. 3:337–358. + +10.1017/S1477201905001707 +
      6. Höhna S., Coghill L.M., Mount G.G., Thomson R.C., Brown J.M. 2018. P^3: Phylogenetic Posterior Prediction in RevBayes. Molecular Biology and Evolution. 35:1028–1034. @@ -457,6 +528,11 @@

        Additional Exercise

        10.1080/106351501753462876 +
      7. +
      8. Yang Z. 1994. Maximum Likelihood Phylogenetic Estimation from DNA Sequences with Variable Rates Over Sites: Approximate Methods. Journal of Molecular Evolution. 39:306–314. + +10.1007/BF00160154 +