From 57703f22dde63c4d1be66ff448f27d70d686da88 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Mon, 5 Aug 2024 13:13:57 +0100 Subject: [PATCH 1/3] Malloc Q rather than from stack in multi-merger Fixes segfault reported in #2307 --- lib/msprime.c | 18 +++++++++++++++-- lib/tests/test_ancestry.c | 42 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/lib/msprime.c b/lib/msprime.c index 474f7ca7e..063a2d61d 100644 --- a/lib/msprime.c +++ b/lib/msprime.c @@ -6824,7 +6824,8 @@ msp_dirac_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t { int ret = 0; uint32_t j, n, num_participants, num_parental_copies; - avl_tree_t *ancestors, Q[4]; /* MSVC won't let us use num_pots here */ + avl_tree_t *ancestors; + avl_tree_t *Q = NULL; avl_node_t *x_node, *y_node; segment_t *x, *y; double nC2, p; @@ -6837,6 +6838,11 @@ msp_dirac_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t } else { num_parental_copies = 2 * self->ploidy; } + Q = tsk_malloc(num_parental_copies * sizeof(*Q)); + if (Q == NULL) { + ret = MSP_ERR_NO_MEMORY; + goto out; + } ancestors = &self->populations[pop_id].ancestors[label]; n = avl_count(ancestors); @@ -6889,6 +6895,7 @@ msp_dirac_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t } } out: + tsk_safe_free(Q); return ret; } @@ -7035,7 +7042,8 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l { int ret = 0; uint32_t j, n, num_participants, num_parental_copies; - avl_tree_t *ancestors, Q[4]; /* MSVC won't let us use num_pots here */ + avl_tree_t *ancestors; + avl_tree_t *Q = NULL; double alpha = self->model.params.beta_coalescent.alpha; double truncation_point = beta_compute_truncation(self); double beta_x, u, increment; @@ -7047,6 +7055,11 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l } else { num_parental_copies = 2 * self->ploidy; } + Q = tsk_malloc(num_parental_copies * sizeof(*Q)); + if (Q == NULL) { + ret = MSP_ERR_NO_MEMORY; + goto out; + } for (j = 0; j < num_parental_copies; j++) { avl_init_tree(&Q[j], cmp_segment_queue, NULL); @@ -7106,6 +7119,7 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l } out: + tsk_safe_free(Q); return ret; } diff --git a/lib/tests/test_ancestry.c b/lib/tests/test_ancestry.c index 9974758a1..26a5a7be4 100644 --- a/lib/tests/test_ancestry.c +++ b/lib/tests/test_ancestry.c @@ -1515,6 +1515,47 @@ test_multiple_mergers_simulation(void) gsl_rng_free(rng); } +static void +test_multiple_mergers_ploidy(void) +{ + int ret; + size_t j, ploidy; + uint32_t n = 10; + long seed = 10; + msp_t msp; + gsl_rng *rng = safe_rng_alloc(); + tsk_table_collection_t tables; + + for (j = 0; j < 2; j++) { + gsl_rng_set(rng, seed); + for (ploidy = 1; ploidy < 12; ploidy++) { + ret = build_sim(&msp, &tables, rng, 10, 1, NULL, n); + CU_ASSERT_EQUAL(ret, 0); + CU_ASSERT_EQUAL_FATAL(msp_set_recombination_rate(&msp, 0.1), 0); + if (j == 0) { + ret = msp_set_simulation_model_dirac(&msp, 0.9, 10); + } else { + ret = msp_set_simulation_model_beta(&msp, 1.8, 1); + } + CU_ASSERT_EQUAL(ret, 0); + + msp_set_ploidy(&msp, ploidy); + ret = msp_initialise(&msp); + + ret = msp_run(&msp, DBL_MAX, ULONG_MAX); + CU_ASSERT_EQUAL_FATAL(ret, 0); + CU_ASSERT_TRUE(msp_is_completed(&msp)); + CU_ASSERT_TRUE(msp.time > 0); + msp_verify(&msp, 0); + + ret = msp_free(&msp); + CU_ASSERT_EQUAL(ret, 0); + tsk_table_collection_free(&tables); + } + } + gsl_rng_free(rng); +} + static void test_multiple_mergers_growth_rate(void) { @@ -3875,6 +3916,7 @@ main(int argc, char **argv) { "test_gc_rates", test_gc_rates }, { "test_multiple_mergers_simulation", test_multiple_mergers_simulation }, + { "test_multiple_mergers_ploidy", test_multiple_mergers_ploidy }, { "test_multiple_mergers_growth_rate", test_multiple_mergers_growth_rate }, { "test_dirac_coalescent_bad_parameters", test_dirac_coalescent_bad_parameters }, { "test_beta_coalescent_bad_parameters", test_beta_coalescent_bad_parameters }, From 0dc740e0c7f2cd7cef2a68cc28fa2bc11dd25b61 Mon Sep 17 00:00:00 2001 From: Jere Koskela Date: Tue, 6 Aug 2024 19:14:29 +0100 Subject: [PATCH 2/3] Multiple merger polyploid scaling Fixup C code --- CHANGELOG.md | 6 ++ lib/msprime.c | 160 ++++++++++++++++----------------------- msprime/_msprimemodule.c | 4 +- tests/test_ancestry.py | 2 +- tests/test_lowlevel.py | 4 +- verification.py | 26 ++++--- 6 files changed, 92 insertions(+), 110 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed5bd0ee3..b4030913f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## [1.3.3] - 2024-XX-XX + +**Bug fixes**: + +- Correct the Dirac coalescent time scaling with polyploidy and population growth. + ## [1.3.2] - 2024-07-08 - Add `record_provenance` argument to `sim_mutations` ({issue}`2272`, {pr}`2273`, {user}`peterlharp`) diff --git a/lib/msprime.c b/lib/msprime.c index 063a2d61d..51e01f3ef 100644 --- a/lib/msprime.c +++ b/lib/msprime.c @@ -6763,33 +6763,18 @@ msp_dirac_get_common_ancestor_waiting_time_from_rate( msp_t *self, population_t *pop, double lambda) { double ret = DBL_MAX; - double alpha = pop->growth_rate; + double alpha = 2.0 * pop->growth_rate; double t = self->time; double u, dt, z; + double pop_size = pop->initial_size; if (lambda > 0.0) { u = gsl_ran_exponential(self->rng, 1.0 / lambda); if (alpha == 0.0) { - if (self->ploidy == 1) { - ret = pop->initial_size * pop->initial_size * u; - } else { - /* For ploidy > 1 we assume N/2 two-parent families, so that the rate - * with which 2 lineages belong to a common family is (2/N)^2 */ - ret = pop->initial_size * pop->initial_size * u / 4.0; - } + ret = pop_size * pop_size * u; } else { dt = t - pop->start_time; - if (self->ploidy == 1) { - z = 1 - + alpha * pop->initial_size * pop->initial_size * exp(-alpha * dt) - * u; - } else { - /* For ploidy > 1 we assume N/2 two-parent families, so that the rate - * with which 2 lineages belong to a common family is (2/N)^2 */ - z = 1 - + alpha * pop->initial_size * pop->initial_size * exp(-alpha * dt) - * u / 4.0; - } + z = 1 + alpha * pop_size * pop_size * exp(-alpha * dt) * u; /* if z is <= 0 no coancestry can occur */ if (z > 0) { ret = log(z) / alpha; @@ -6808,13 +6793,7 @@ msp_dirac_get_common_ancestor_waiting_time( { population_t *pop = &self->populations[pop_id]; unsigned int n = (unsigned int) avl_count(&pop->ancestors[label]); - double c = self->model.params.dirac_coalescent.c; - double lambda = n * (n - 1.0) / 2.0; - if (self->ploidy == 1) { - lambda += c; - } else { - lambda += c / (2.0 * self->ploidy); - } + double lambda = gsl_sf_choose(n, 2) + self->model.params.dirac_coalescent.c; return msp_dirac_get_common_ancestor_waiting_time_from_rate(self, pop, lambda); } @@ -6831,67 +6810,60 @@ msp_dirac_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t double nC2, p; double psi = self->model.params.dirac_coalescent.psi; - /* We assume haploid reproduction is single-parent, while all other ploidies - * are two-parent */ - if (self->ploidy == 1) { - num_parental_copies = 1; - } else { - num_parental_copies = 2 * self->ploidy; - } - Q = tsk_malloc(num_parental_copies * sizeof(*Q)); - if (Q == NULL) { - ret = MSP_ERR_NO_MEMORY; - goto out; - } - ancestors = &self->populations[pop_id].ancestors[label]; n = avl_count(ancestors); nC2 = gsl_sf_choose(n, 2); - if (self->ploidy == 1) { - p = (nC2 / (nC2 + self->model.params.dirac_coalescent.c)); - } else { - p = (nC2 / (nC2 + self->model.params.dirac_coalescent.c / (2.0 * self->ploidy))); - } + p = nC2 / (nC2 + self->model.params.dirac_coalescent.c); + if (gsl_rng_uniform(self->rng) < p) { - /* When 2 * ploidy parental chromosomes are available, Mendelian segregation - * results in a merger only 1 / (2 * ploidy) of the time. */ - if (self->ploidy == 1 - || gsl_rng_uniform(self->rng) < 1.0 / (2.0 * self->ploidy)) { - /* Choose x and y */ - n = avl_count(ancestors); - j = (uint32_t) gsl_rng_uniform_int(self->rng, n); - x_node = avl_at(ancestors, j); - tsk_bug_assert(x_node != NULL); - x = (segment_t *) x_node->item; - avl_unlink_node(ancestors, x_node); - j = (uint32_t) gsl_rng_uniform_int(self->rng, n - 1); - y_node = avl_at(ancestors, j); - tsk_bug_assert(y_node != NULL); - y = (segment_t *) y_node->item; - avl_unlink_node(ancestors, y_node); - self->num_ca_events++; - msp_free_avl_node(self, x_node); - msp_free_avl_node(self, y_node); - ret = msp_merge_two_ancestors(self, pop_id, label, x, y, TSK_NULL, NULL); - } + /* Choose x and y */ + n = avl_count(ancestors); + j = (uint32_t) gsl_rng_uniform_int(self->rng, n); + x_node = avl_at(ancestors, j); + tsk_bug_assert(x_node != NULL); + x = (segment_t *) x_node->item; + avl_unlink_node(ancestors, x_node); + j = (uint32_t) gsl_rng_uniform_int(self->rng, n - 1); + y_node = avl_at(ancestors, j); + tsk_bug_assert(y_node != NULL); + y = (segment_t *) y_node->item; + avl_unlink_node(ancestors, y_node); + self->num_ca_events++; + msp_free_avl_node(self, x_node); + msp_free_avl_node(self, y_node); + ret = msp_merge_two_ancestors(self, pop_id, label, x, y, TSK_NULL, NULL); } else { - for (j = 0; j < num_parental_copies; j++) { - avl_init_tree(&Q[j], cmp_segment_queue, NULL); - } num_participants = gsl_ran_binomial(self->rng, psi, n); - ret = msp_multi_merger_common_ancestor_event( - self, ancestors, Q, num_participants, num_parental_copies); - if (ret < 0) { - goto out; - } - /* All the lineages that have been assigned to the particular pots can now be - * merged. - */ - for (j = 0; j < num_parental_copies; j++) { - ret = msp_merge_ancestors(self, &Q[j], pop_id, label, TSK_NULL, NULL); + if (num_participants > 1) { + /* We assume haploid reproduction is single-parent, while all other ploidies + * are two-parent */ + if (self->ploidy == 1) { + num_parental_copies = 1; + } else { + num_parental_copies = 2 * self->ploidy; + } + Q = tsk_malloc(num_parental_copies * sizeof(*Q)); + if (Q == NULL) { + ret = MSP_ERR_NO_MEMORY; + goto out; + } + for (j = 0; j < num_parental_copies; j++) { + avl_init_tree(&Q[j], cmp_segment_queue, NULL); + } + ret = msp_multi_merger_common_ancestor_event( + self, ancestors, Q, num_participants, num_parental_copies); if (ret < 0) { goto out; } + /* All the lineages that have been assigned to the particular pots can now be + * merged. + */ + for (j = 0; j < num_parental_copies; j++) { + ret = msp_merge_ancestors(self, &Q[j], pop_id, label, TSK_NULL, NULL); + if (ret < 0) { + goto out; + } + } } } out: @@ -7048,22 +7020,6 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l double truncation_point = beta_compute_truncation(self); double beta_x, u, increment; - /* We assume haploid reproduction is single-parent, while all other ploidies - * are two-parent */ - if (self->ploidy == 1) { - num_parental_copies = 1; - } else { - num_parental_copies = 2 * self->ploidy; - } - Q = tsk_malloc(num_parental_copies * sizeof(*Q)); - if (Q == NULL) { - ret = MSP_ERR_NO_MEMORY; - goto out; - } - - for (j = 0; j < num_parental_copies; j++) { - avl_init_tree(&Q[j], cmp_segment_queue, NULL); - } ancestors = &self->populations[pop_id].ancestors[label]; n = avl_count(ancestors); beta_x = ran_inc_beta(self->rng, 2.0 - alpha, alpha, truncation_point); @@ -7101,6 +7057,22 @@ msp_beta_common_ancestor_event(msp_t *self, population_id_t pop_id, label_id_t l num_participants = 2 + gsl_ran_binomial(self->rng, beta_x, n - 2); } while (gsl_rng_uniform(self->rng) > 1 / gsl_sf_choose(num_participants, 2)); + /* We assume haploid reproduction is single-parent, while all other ploidies + * are two-parent */ + if (self->ploidy == 1) { + num_parental_copies = 1; + } else { + num_parental_copies = 2 * self->ploidy; + } + Q = tsk_malloc(num_parental_copies * sizeof(*Q)); + if (Q == NULL) { + ret = MSP_ERR_NO_MEMORY; + goto out; + } + + for (j = 0; j < num_parental_copies; j++) { + avl_init_tree(&Q[j], cmp_segment_queue, NULL); + } ret = msp_multi_merger_common_ancestor_event( self, ancestors, Q, num_participants, num_parental_copies); if (ret < 0) { diff --git a/msprime/_msprimemodule.c b/msprime/_msprimemodule.c index 285fbbc0e..e9af9c429 100644 --- a/msprime/_msprimemodule.c +++ b/msprime/_msprimemodule.c @@ -1177,8 +1177,8 @@ Simulator_parse_simulation_model(Simulator *self, PyObject *py_model) goto out; } c = PyFloat_AsDouble(value); - if (psi <= 0 || psi >= 1.0) { - PyErr_SetString(PyExc_ValueError, "Must have 0 < psi < 1"); + if (psi <= 0 || psi > 1.0) { + PyErr_SetString(PyExc_ValueError, "Must have 0 < psi <= 1"); goto out; } if (c < 0) { diff --git a/tests/test_ancestry.py b/tests/test_ancestry.py index 82e20e1f9..a2f4113b3 100644 --- a/tests/test_ancestry.py +++ b/tests/test_ancestry.py @@ -206,7 +206,7 @@ def test_multimerger_dirac(self): recombination_rate=0.1, record_full_arg=True, sequence_length=10, - model=msprime.DiracCoalescent(psi=0.5, c=5), + model=msprime.DiracCoalescent(psi=0.5, c=100), ) self.verify(sim, multiple_mergers=True) diff --git a/tests/test_lowlevel.py b/tests/test_lowlevel.py index 9c483a61e..57d2d4d71 100644 --- a/tests/test_lowlevel.py +++ b/tests/test_lowlevel.py @@ -1327,7 +1327,7 @@ def test_dirac_simulation_model(self): make_sim(model=model) with pytest.raises(ValueError): make_sim(model=get_simulation_model("dirac")) - for bad_psi in [-1, 0, -1e-6, 1, 1e6]: + for bad_psi in [-1, 0, -1e-6, 1 + 1e-6, 1e6]: with pytest.raises(ValueError): make_sim( model=get_simulation_model("dirac", c=1, psi=bad_psi), @@ -1337,7 +1337,7 @@ def test_dirac_simulation_model(self): make_sim( model=get_simulation_model("dirac", psi=0.5, c=bad_c), ) - for psi in [0.99, 0.2, 1e-4]: + for psi in [1, 0.2, 1e-4]: for c in [5.0, 1e2, 1e-4]: model = get_simulation_model("dirac", psi=psi, c=c) sim = make_sim(model=model) diff --git a/verification.py b/verification.py index 7ef49d638..d60fadc16 100644 --- a/verification.py +++ b/verification.py @@ -3431,12 +3431,14 @@ def _run(self, pop_size, alpha, growth_rate, num_replicates=10000): logging.debug(f"running Beta growth for {pop_size} {alpha} {growth_rate}") b = growth_rate * (alpha - 1) model = (msprime.BetaCoalescent(alpha=alpha),) - ploidy = 2 - a = 1 / (2 * ploidy * self.compute_beta_timescale(pop_size, alpha, ploidy)) - name = f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}" - self.compare_tmrca( - pop_size, growth_rate, model, num_replicates, a, b, ploidy, name - ) + for ploidy in range(2, 7): + a = 1 / (2 * ploidy * self.compute_beta_timescale(pop_size, alpha, ploidy)) + name = ( + f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}" + ) + self.compare_tmrca( + pop_size, growth_rate, model, num_replicates, a, b, ploidy, name + ) ploidy = 1 a = 1 / self.compute_beta_timescale(pop_size, alpha, ploidy) name = f"N={pop_size}_alpha={alpha}_growth_rate={growth_rate}_ploidy={ploidy}" @@ -3474,12 +3476,14 @@ def test_100000_11_001(self): class DiracGrowth(XiGrowth): def _run(self, pop_size, c, psi, growth_rate, num_replicates=10000): logging.debug(f"running Dirac growth for {pop_size} {c} {psi} {growth_rate}") - b = growth_rate + b = 2 * growth_rate model = (msprime.DiracCoalescent(psi=psi, c=c),) - p = 2 - a = (1 + c * psi * psi / (2 * p)) / (pop_size * pop_size) - name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}" - self.compare_tmrca(pop_size, growth_rate, model, num_replicates, a, b, p, name) + for p in range(2, 7): + a = (1 + c * psi * psi / (2 * p)) / (pop_size * pop_size) + name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}" + self.compare_tmrca( + pop_size, growth_rate, model, num_replicates, a, b, p, name + ) p = 1 a = (1 + c * psi * psi) / (pop_size * pop_size) name = f"N={pop_size}_c={c}_psi={psi}_growth_rate={growth_rate}_ploidy={p}" From dcc3d2cde656ba236987e670cc05f8e431f14ac2 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Wed, 7 Aug 2024 10:09:50 +0100 Subject: [PATCH 3/3] Update changelog for bugfix release --- CHANGELOG.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4030913f..8ec8e7033 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,17 @@ # Changelog -## [1.3.3] - 2024-XX-XX +## [1.3.3] - 2024-08-07 + +Bugfix release for issues with Dirac and Beta coalescent models. **Bug fixes**: -- Correct the Dirac coalescent time scaling with polyploidy and population growth. +- Fix segfault for in Dirac and Beta coalescent models for ploidy > 2 ({issue}`2307`, {pr}`{2308}`) + +- Correct the Dirac coalescent time scaling with polyploidy and population growth. ({pr}`{2310}, + {user}`JereKoskela`) + +- Allow psi = 1 in the Dirac coalescent ({pr}`{2310}, {user}`JereKoskela`). ## [1.3.2] - 2024-07-08