Skip to content

Commit

Permalink
Improved import of mutations from tskit tree sequences. (#1113)
Browse files Browse the repository at this point in the history
* The time field of a mutation is now used as the origin time.
  This allows us to support both fwdpy11- and tskit- generated
  mutations.
* Add tests of evolve/export/import/evolve work flows.
* C++ back end now catches invalid mutation origin times,
  resulting in earlier failures when inputs are invalid.
  • Loading branch information
molpopgen authored Mar 20, 2023
1 parent 3f31c4d commit 5fffb55
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 20 deletions.
5 changes: 3 additions & 2 deletions cpp/fwdpy11_types/DiploidPopulation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@ init_DiploidPopulation(py::module& m)
.def("_set_mutations",
[](fwdpy11::DiploidPopulation& self,
const std::vector<fwdpy11::Mutation>& mutations,
const std::vector<std::int32_t>& mutation_nodes) {
self.set_mutations(mutations, mutation_nodes);
const std::vector<std::int32_t>& mutation_nodes,
const std::vector<fwdpy11::mutation_origin_time>& origin_times) {
self.set_mutations(mutations, mutation_nodes, origin_times);
})
.def(py::pickle(
[](const fwdpy11::DiploidPopulation& pop) -> py::object {
Expand Down
9 changes: 9 additions & 0 deletions doc/misc/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
Major changes are listed below. Each release likely contains fiddling with back-end code,
updates to latest `fwdpp` version, etc.

## 0.19.9

Big fix

* Use the mutation's time field rather than
the metadata "origin" time when creating
a DiploidPopulation from a tskit tree sequence.
PR {pr}`1113`

## 0.19.8

Big fix/new feature (take your pick)
Expand Down
11 changes: 6 additions & 5 deletions fwdpy11/_types/diploid_population.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,10 @@ def create_from_tskit(cls, ts: tskit.TreeSequence,
if import_mutations is True:
import fwdpy11.tskit_tools
for mutation in ts.mutations():
if mutation.metadata["origin"] < 0.0:
msg = "all origin fields in mutation metadata"
msg += "must be non-negative"
raise ValueError(msg)
# if mutation.metadata["origin"] < 0.0:
# msg = "all origin fields in mutation metadata"
# msg += "must be non-negative"
# raise ValueError(msg)
if not mutation.time.is_integer():
raise ValueError("mutation times cannot contain decimals")
mutation_nodes = [i.node for i in ts.mutations()]
Expand Down Expand Up @@ -144,7 +144,8 @@ def create_from_tskit(cls, ts: tskit.TreeSequence,
for i in decoded_md:
if i is not None:
mvec.append(i)
ll._set_mutations(mvec, mutation_nodes)
ll._set_mutations(mvec, mutation_nodes,
[int(i.time) for i in ts.mutations()])
return cls(0, 0.0, ll_pop=ll)

@ classmethod
Expand Down
33 changes: 31 additions & 2 deletions fwdpy11/headers/fwdpy11/types/DiploidPopulation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "Population.hpp"
#include "Diploid.hpp"
#include "fwdpy11/types/Mutation.hpp"
#include <sstream>
#include <iostream>
#include <stdexcept>
#include <limits>
#include <unordered_set>
Expand Down Expand Up @@ -223,7 +225,8 @@ namespace fwdpy11
*/
void
set_mutations(const std::vector<Mutation> &mutations,
const std::vector<std::int32_t> &mutation_nodes)
const std::vector<std::int32_t> &mutation_nodes,
const std::vector<fwdpy11::mutation_origin_time> &origin_times)
{
if (this->is_simulating)
{
Expand Down Expand Up @@ -266,7 +269,7 @@ namespace fwdpy11
}
this->mutations.emplace_back(
Mutation(mutations[i].neutral, mutations[i].pos, mutations[i].s,
mutations[i].h, -mutations[i].g, mutations[i].esizes,
mutations[i].h, -origin_times[i], mutations[i].esizes,
mutations[i].heffects, mutations[i].xtra));
this->mut_lookup.emplace(mutations[i].pos, i);
this->tables->emplace_back_site(mutations[i].pos, std::int8_t{0});
Expand Down Expand Up @@ -319,6 +322,32 @@ namespace fwdpy11
fwdpp::ts::samples_iterator si(
sv.current_tree(), m->node,
fwdpp::ts::convert_sample_index_to_nodes(true));
if (m->key >= this->mutations.size())
{
throw std::runtime_error("invalid mutation key");
}
auto parent_node = sv.current_tree().parents[m->node];
auto node_time = this->tables->nodes[m->node].time;

if (this->mutations[m->key].g > node_time)
{
std::ostringstream o;
o << "invalid mutation origin time";
throw std::runtime_error(o.str());
}
if (parent_node >= 0)
{
auto parent_time
= this->tables
->nodes[sv.current_tree().parents[m->node]]
.time;
if (this->mutations[m->key].g <= parent_time)
{
std::ostringstream o;
o << "invalid mutation origin time";
throw std::runtime_error(o.str());
}
}
auto s = si();
while (s != fwdpp::ts::NULL_INDEX)
{
Expand Down
17 changes: 10 additions & 7 deletions tests/test_import_tskit_mutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,14 @@ def bad_dominance(md):
# The "origin time" in the metadata
# must equal the mutation's time value

def bad_origin(md):
if md['origin'] > 0.0:
md['origin'] = -1*md['origin']
else:
md['origin'] = -1
# NOTE: disabled in 0.19.9 to allow
# creating of pops from fwdpy11-generated
# mutations
# def bad_origin(md):
# if md['origin'] > 0.0:
# md['origin'] = -1*md['origin']
# else:
# md['origin'] = -1

# NOTE: disable in 0.19.8 to
# fix #1109 but this should be
Expand All @@ -203,8 +206,8 @@ def bad_origin(md):
# def neutrality_mismatch1(md):
# md['neutral'] = 1

# NOTE: bring back bad_origin2 in 0.20.0 if possible
for callback in [bad_effect_size, bad_dominance, bad_origin]:
# NOTE: bring back bad_origin and bad_origin2 in 0.20.0 if possible
for callback in [bad_effect_size, bad_dominance]:
ts = make_treeseq_with_one_row_containing_bad_metadata(seed2, callback)
with pytest.raises(ValueError):
_ = fwdpy11.DiploidPopulation.create_from_tskit(
Expand Down
47 changes: 43 additions & 4 deletions tests/test_recreate_pop_from_own_treeseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,29 @@
import tskit


# Example from Aaron Ragsdale.
# Modified to run much faster
@pytest.fixture
def pop():
# NOTE: this is redundant with
# internal checks in the C++ back end.
# However, this code makes use of
# different code paths on the C++ side.
def validate_mutation_table(pop):
# assert(all([i.time > 0 for i in pop.tables.nodes]))
ti = fwdpy11.TreeIterator(pop.tables, [i for i in range(2*pop.N)])

for t in ti:
for m in t.mutations():
g = pop.mutations[m.key].g
nt = pop.tables.nodes[m.node].time
# Internal mutation times
# are meant to be "forwards in time",
# so a variant must always have an origin
# time <= that of its node time
assert g <= nt
if t.parent(m.node) >= 0:
pt = pop.tables.nodes[t.parent(m.node)].time
assert g > pt


def pop_rng_params():
# Set up parameters
r = 1e-8
L = 1e8
Expand Down Expand Up @@ -60,6 +79,15 @@ def pop():

rng = fwdpy11.GSLrng(424242)

return pop, rng, params


# Example from Aaron Ragsdale.
# Modified to run much faster
@pytest.fixture
def pop():
pop, rng, params = pop_rng_params()

fwdpy11.evolvets(rng, pop, params, 100, suppress_table_indexing=True)

assert pop.generation == 3
Expand All @@ -77,6 +105,17 @@ def test_recreate_pop_from_own_treeseq(pop):
ts, import_mutations=True)
assert len(pop.tables.mutations) == nm
assert pop.generation == 0
validate_mutation_table(pop)


def test_multiple_export_recreate_evolve_steps():
pop, rng, params = pop_rng_params()
for i in range(5):
fwdpy11.evolvets(rng, pop, params, 100, suppress_table_indexing=True)
assert pop.generation == 3
ts = pop.dump_tables_to_tskit()
pop = fwdpy11.DiploidPopulation.create_from_tskit(
ts, import_mutations=True)


def test_start_pop_from_treeseq_without_mutations(pop):
Expand Down

0 comments on commit 5fffb55

Please sign in to comment.