From 5fffb550c3c11d5ec8694dbe4a977e1ae04b68c8 Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Mon, 20 Mar 2023 11:20:19 -0700 Subject: [PATCH] Improved import of mutations from tskit tree sequences. (#1113) * The time field of a mutation is now used as the origin time. This allows us to support both fwdpy11- and tskit- generated mutations. * Add tests of evolve/export/import/evolve work flows. * C++ back end now catches invalid mutation origin times, resulting in earlier failures when inputs are invalid. --- cpp/fwdpy11_types/DiploidPopulation.cc | 5 +- doc/misc/changelog.md | 9 ++++ fwdpy11/_types/diploid_population.py | 11 +++-- .../fwdpy11/types/DiploidPopulation.hpp | 33 ++++++++++++- tests/test_import_tskit_mutations.py | 17 ++++--- tests/test_recreate_pop_from_own_treeseq.py | 47 +++++++++++++++++-- 6 files changed, 102 insertions(+), 20 deletions(-) diff --git a/cpp/fwdpy11_types/DiploidPopulation.cc b/cpp/fwdpy11_types/DiploidPopulation.cc index 664160cd9..b84bc5ed1 100644 --- a/cpp/fwdpy11_types/DiploidPopulation.cc +++ b/cpp/fwdpy11_types/DiploidPopulation.cc @@ -111,8 +111,9 @@ init_DiploidPopulation(py::module& m) .def("_set_mutations", [](fwdpy11::DiploidPopulation& self, const std::vector& mutations, - const std::vector& mutation_nodes) { - self.set_mutations(mutations, mutation_nodes); + const std::vector& mutation_nodes, + const std::vector& origin_times) { + self.set_mutations(mutations, mutation_nodes, origin_times); }) .def(py::pickle( [](const fwdpy11::DiploidPopulation& pop) -> py::object { diff --git a/doc/misc/changelog.md b/doc/misc/changelog.md index 3e638832b..70a54fd97 100644 --- a/doc/misc/changelog.md +++ b/doc/misc/changelog.md @@ -3,6 +3,15 @@ Major changes are listed below. Each release likely contains fiddling with back-end code, updates to latest `fwdpp` version, etc. +## 0.19.9 + +Big fix + +* Use the mutation's time field rather than + the metadata "origin" time when creating + a DiploidPopulation from a tskit tree sequence. + PR {pr}`1113` + ## 0.19.8 Big fix/new feature (take your pick) diff --git a/fwdpy11/_types/diploid_population.py b/fwdpy11/_types/diploid_population.py index 4f2f48e13..c3fe9004e 100644 --- a/fwdpy11/_types/diploid_population.py +++ b/fwdpy11/_types/diploid_population.py @@ -113,10 +113,10 @@ def create_from_tskit(cls, ts: tskit.TreeSequence, if import_mutations is True: import fwdpy11.tskit_tools for mutation in ts.mutations(): - if mutation.metadata["origin"] < 0.0: - msg = "all origin fields in mutation metadata" - msg += "must be non-negative" - raise ValueError(msg) + # if mutation.metadata["origin"] < 0.0: + # msg = "all origin fields in mutation metadata" + # msg += "must be non-negative" + # raise ValueError(msg) if not mutation.time.is_integer(): raise ValueError("mutation times cannot contain decimals") mutation_nodes = [i.node for i in ts.mutations()] @@ -144,7 +144,8 @@ def create_from_tskit(cls, ts: tskit.TreeSequence, for i in decoded_md: if i is not None: mvec.append(i) - ll._set_mutations(mvec, mutation_nodes) + ll._set_mutations(mvec, mutation_nodes, + [int(i.time) for i in ts.mutations()]) return cls(0, 0.0, ll_pop=ll) @ classmethod diff --git a/fwdpy11/headers/fwdpy11/types/DiploidPopulation.hpp b/fwdpy11/headers/fwdpy11/types/DiploidPopulation.hpp index 0ec4ff64e..91c37e197 100644 --- a/fwdpy11/headers/fwdpy11/types/DiploidPopulation.hpp +++ b/fwdpy11/headers/fwdpy11/types/DiploidPopulation.hpp @@ -4,6 +4,8 @@ #include "Population.hpp" #include "Diploid.hpp" #include "fwdpy11/types/Mutation.hpp" +#include +#include #include #include #include @@ -223,7 +225,8 @@ namespace fwdpy11 */ void set_mutations(const std::vector &mutations, - const std::vector &mutation_nodes) + const std::vector &mutation_nodes, + const std::vector &origin_times) { if (this->is_simulating) { @@ -266,7 +269,7 @@ namespace fwdpy11 } this->mutations.emplace_back( Mutation(mutations[i].neutral, mutations[i].pos, mutations[i].s, - mutations[i].h, -mutations[i].g, mutations[i].esizes, + mutations[i].h, -origin_times[i], mutations[i].esizes, mutations[i].heffects, mutations[i].xtra)); this->mut_lookup.emplace(mutations[i].pos, i); this->tables->emplace_back_site(mutations[i].pos, std::int8_t{0}); @@ -319,6 +322,32 @@ namespace fwdpy11 fwdpp::ts::samples_iterator si( sv.current_tree(), m->node, fwdpp::ts::convert_sample_index_to_nodes(true)); + if (m->key >= this->mutations.size()) + { + throw std::runtime_error("invalid mutation key"); + } + auto parent_node = sv.current_tree().parents[m->node]; + auto node_time = this->tables->nodes[m->node].time; + + if (this->mutations[m->key].g > node_time) + { + std::ostringstream o; + o << "invalid mutation origin time"; + throw std::runtime_error(o.str()); + } + if (parent_node >= 0) + { + auto parent_time + = this->tables + ->nodes[sv.current_tree().parents[m->node]] + .time; + if (this->mutations[m->key].g <= parent_time) + { + std::ostringstream o; + o << "invalid mutation origin time"; + throw std::runtime_error(o.str()); + } + } auto s = si(); while (s != fwdpp::ts::NULL_INDEX) { diff --git a/tests/test_import_tskit_mutations.py b/tests/test_import_tskit_mutations.py index 2371789db..e5153c469 100644 --- a/tests/test_import_tskit_mutations.py +++ b/tests/test_import_tskit_mutations.py @@ -182,11 +182,14 @@ def bad_dominance(md): # The "origin time" in the metadata # must equal the mutation's time value - def bad_origin(md): - if md['origin'] > 0.0: - md['origin'] = -1*md['origin'] - else: - md['origin'] = -1 + # NOTE: disabled in 0.19.9 to allow + # creating of pops from fwdpy11-generated + # mutations + # def bad_origin(md): + # if md['origin'] > 0.0: + # md['origin'] = -1*md['origin'] + # else: + # md['origin'] = -1 # NOTE: disable in 0.19.8 to # fix #1109 but this should be @@ -203,8 +206,8 @@ def bad_origin(md): # def neutrality_mismatch1(md): # md['neutral'] = 1 - # NOTE: bring back bad_origin2 in 0.20.0 if possible - for callback in [bad_effect_size, bad_dominance, bad_origin]: + # NOTE: bring back bad_origin and bad_origin2 in 0.20.0 if possible + for callback in [bad_effect_size, bad_dominance]: ts = make_treeseq_with_one_row_containing_bad_metadata(seed2, callback) with pytest.raises(ValueError): _ = fwdpy11.DiploidPopulation.create_from_tskit( diff --git a/tests/test_recreate_pop_from_own_treeseq.py b/tests/test_recreate_pop_from_own_treeseq.py index 7575b5256..6bc0e73a6 100644 --- a/tests/test_recreate_pop_from_own_treeseq.py +++ b/tests/test_recreate_pop_from_own_treeseq.py @@ -23,10 +23,29 @@ import tskit -# Example from Aaron Ragsdale. -# Modified to run much faster -@pytest.fixture -def pop(): +# NOTE: this is redundant with +# internal checks in the C++ back end. +# However, this code makes use of +# different code paths on the C++ side. +def validate_mutation_table(pop): + # assert(all([i.time > 0 for i in pop.tables.nodes])) + ti = fwdpy11.TreeIterator(pop.tables, [i for i in range(2*pop.N)]) + + for t in ti: + for m in t.mutations(): + g = pop.mutations[m.key].g + nt = pop.tables.nodes[m.node].time + # Internal mutation times + # are meant to be "forwards in time", + # so a variant must always have an origin + # time <= that of its node time + assert g <= nt + if t.parent(m.node) >= 0: + pt = pop.tables.nodes[t.parent(m.node)].time + assert g > pt + + +def pop_rng_params(): # Set up parameters r = 1e-8 L = 1e8 @@ -60,6 +79,15 @@ def pop(): rng = fwdpy11.GSLrng(424242) + return pop, rng, params + + +# Example from Aaron Ragsdale. +# Modified to run much faster +@pytest.fixture +def pop(): + pop, rng, params = pop_rng_params() + fwdpy11.evolvets(rng, pop, params, 100, suppress_table_indexing=True) assert pop.generation == 3 @@ -77,6 +105,17 @@ def test_recreate_pop_from_own_treeseq(pop): ts, import_mutations=True) assert len(pop.tables.mutations) == nm assert pop.generation == 0 + validate_mutation_table(pop) + + +def test_multiple_export_recreate_evolve_steps(): + pop, rng, params = pop_rng_params() + for i in range(5): + fwdpy11.evolvets(rng, pop, params, 100, suppress_table_indexing=True) + assert pop.generation == 3 + ts = pop.dump_tables_to_tskit() + pop = fwdpy11.DiploidPopulation.create_from_tskit( + ts, import_mutations=True) def test_start_pop_from_treeseq_without_mutations(pop):