From b845b62b9116865075a89cf16c9ae2d46d4953aa Mon Sep 17 00:00:00 2001 From: Peter Ralph Date: Sun, 22 Dec 2024 20:58:37 -0800 Subject: [PATCH 1/4] finalize DFE development docs; closes #1085 (#1625) * finalize DFE development docs; closes #1085 * dfe qc issue template --- .../ISSUE_TEMPLATE/dfe-qc-issue-template.md | 17 ++++++++++ docs/development.rst | 33 +++++++++++-------- 2 files changed, 37 insertions(+), 13 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/dfe-qc-issue-template.md diff --git a/.github/ISSUE_TEMPLATE/dfe-qc-issue-template.md b/.github/ISSUE_TEMPLATE/dfe-qc-issue-template.md new file mode 100644 index 000000000..47e42e33e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/dfe-qc-issue-template.md @@ -0,0 +1,17 @@ +--- +name: DFE QC issue template +about: Quality control process for DFE addition +title: QC for {dfe_id} ({species}) +labels: DFE QC +assignees: '' + +--- + +**PR for new model:** {link to Pull Request} +**Original paper:** {ink to original paper} +**Parameter values:** {reference to where parameter values can be found} + +**Potential issues that might lead to differences between the model implementations:** +{description of any potential issues} + +**QC'er requests:** {tag potential developers to QC the model} diff --git a/docs/development.rst b/docs/development.rst index e8234a1a8..92fb82b58 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -1652,7 +1652,7 @@ Testing your DFE model and submitting a PR After you finished your implementation, and specified all the necessary citations, -we recommend that you run some basic local tests to see that +we recommend that you run some basic local checks to see that the model was successfully loaded to ``stdpopsim``. You may follow the process outlined for `Testing your demographic model and submitting a PR`_. @@ -1667,24 +1667,31 @@ of the development team before it is fully incorporated into ``stdpopsim``. This will likely require additional feedback from you, so, stay tuned for discussion during the review process. +To facilitate this, there is one more step: please open a +`new issue `__, +using the "DFE QC" template. +The template asks for the basic information that someone will need +to independently verify the implemented DFE. + --------------------- Reviewing a DFE model --------------------- -The review process for DFE models is currently being developed. -For now, we suggest that you -`open a new blank issue `__ -and specify the following information: +The process for reviewing a DFE is essentially the same +as for reviewing a demographic model (see `Overview of the stdpopsim review process`_). +Briefly, you will re-implement the DFE "blind", i.e., without looking at the DFE +implementation already added to the code. +Then, the unit tests check whether the implementations are equivalent. +To do this, you add your implementation to ``stdpopsim/qc/.py``, +followed by a call like + +.. code-block:: python -1. **PR for new model:** -2. **Original paper:** -3. **Parameter values:** -4. **Potential issues:** -5. **QC'er requests:** + _species.get_dfe(_MODEL_ID_).register_qc(_your_review_function()) -A reviewer will be assigned to check your implementation and approve it. -All discussion about the review can be conducted in the **QC issue** -mentioned above. +where ``_MODEL_ID_`` is the string specified as the ID for the original DFE, +and ``_your_review_function()`` is the function you've added to the QC file +that returns a DFE object. **************** Coding standards From dc30adbe9799058e922fa8f788d22282b49bd8ef Mon Sep 17 00:00:00 2001 From: Peter Ralph Date: Sun, 22 Dec 2024 20:59:16 -0800 Subject: [PATCH 2/4] check mutations are added at the correct time; closes #1496 (#1627) --- tests/test_slim_engine.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test_slim_engine.py b/tests/test_slim_engine.py index d9a85f9ef..65940802d 100644 --- a/tests/test_slim_engine.py +++ b/tests/test_slim_engine.py @@ -321,6 +321,30 @@ def test_recap_and_rescale_on_external_slim_run(self, tmp_path): assert tables1.edges == tables2.edges assert tables1.mutations == tables2.mutations + def test_recap_start_time(self): + # check that the correct time to start adding mutations from + # is actually the value recorded in metadata (whether it is or + # differs by 1 or 2 depends on the stages in which the simulation + # is set up and written out; see pyslim:#308 + engine = stdpopsim.get_engine("slim") + species = stdpopsim.get_species("AnaPla") + contig = species.get_contig(length=1e3) + model = stdpopsim.PiecewiseConstantSize(100) + samples = {"pop_0": 10} + ts = engine.simulate( + demographic_model=model, + contig=contig, + samples=samples, + slim_burn_in=3, + _recap_and_rescale=False, + ) + root_times = set([ts.node(n).time for t in ts.trees() for n in t.roots]) + assert root_times == set( + [ + ts.metadata["SLiM"]["cycle"], + ] + ) + def test_assert_min_version(self): engine = stdpopsim.get_engine("slim") with mock.patch( From c56f8c0b5b03f7dfccdd4a4e388102bd7a7a0d8e Mon Sep 17 00:00:00 2001 From: Peter Ralph Date: Sun, 22 Dec 2024 21:09:04 -0800 Subject: [PATCH 3/4] add seeds to slim tests; closes #1201 (#1628) --- tests/test_slim_engine.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/tests/test_slim_engine.py b/tests/test_slim_engine.py index 65940802d..7db37f325 100644 --- a/tests/test_slim_engine.py +++ b/tests/test_slim_engine.py @@ -1173,6 +1173,7 @@ def test_chr1(self, Q): samples=samples, slim_burn_in=0.1, verbosity=3, + seed=888, ) self.verify_recombination_map(contig, ts) @@ -1195,6 +1196,7 @@ def test_off_by_one(self): samples=samples, slim_burn_in=0.1, verbosity=3, + seed=456, ) self.verify_recombination_map(contig, ts) assert list(ts.breakpoints()) == [0.0, midpoint, contig.length] @@ -1550,6 +1552,7 @@ def test_default_dfe(self): contig=contig, samples=self.samples, verbosity=3, # to get metadata output + seed=135, ) self.verify_genomic_elements(contig, ts) self.verify_mutation_rates(contig, ts) @@ -1569,6 +1572,7 @@ def test_multiple_dfes(self): contig=contig, samples=self.samples, verbosity=3, # to get metadata output + seed=246, ) self.verify_genomic_elements(contig, ts) self.verify_mutation_rates(contig, ts) @@ -1586,6 +1590,7 @@ def test_unused_dfe(self): contig=contig, samples=self.samples, verbosity=3, # to get metadata output + seed=357, ) self.verify_genomic_elements(contig, ts) self.verify_mutation_rates(contig, ts) @@ -1620,6 +1625,7 @@ def test_same_dfes(self): contig=contig, samples=self.samples, verbosity=3, # to get metadata output + seed=468, ) self.verify_genomic_elements(contig, ts) self.verify_mutation_rates(contig, ts) @@ -1648,6 +1654,7 @@ def test_slim_produces_mutations(self): slim_scaling_factor=10, slim_burn_in=0.1, verbosity=3, # to get metadata output + seed=579, _recap_and_rescale=False, ) assert ts.num_sites > 0 @@ -1664,6 +1671,7 @@ def test_no_neutral_mutations_are_simulated_by_slim(self): slim_scaling_factor=10, slim_burn_in=0.1, verbosity=3, + seed=147, _recap_and_rescale=False, ) assert ts.num_sites == 0 @@ -1682,6 +1690,7 @@ def test_no_neutral_mutations_are_simulated_by_slim(self): slim_scaling_factor=10, slim_burn_in=0.1, verbosity=3, + seed=258, _recap_and_rescale=False, ) assert ts.num_sites == 0 @@ -1700,6 +1709,7 @@ def test_neutral_dfe_slim_proportions(self): slim_scaling_factor=Q, slim_burn_in=0.1, verbosity=3, + seed=369, _recap_and_rescale=False, ) ge_types = self.slim_metadata_key0( @@ -1732,6 +1742,7 @@ def test_neutral_dfe_slim_proportions(self): slim_scaling_factor=Q, slim_burn_in=0.1, verbosity=3, + seed=470, _recap_and_rescale=False, ) ge_types = self.slim_metadata_key0( @@ -1781,6 +1792,7 @@ def test_chromosomal_segment(self): contig=contig, samples=self.samples, verbosity=3, # to get metadata output + seed=159, ) self.verify_genomic_elements(contig, ts) self.verify_mutation_rates(contig, ts) @@ -1861,6 +1873,7 @@ def test_dominance_coeff_list(self): contig=contig, samples=self.samples, verbosity=3, # to get metadata output + seed=260, ) assert len(ts.metadata["stdpopsim"]["DFEs"]) == len(contig.dfe_list) + 1 # slim mutation type IDs with dominance coeff lists: @@ -2244,6 +2257,7 @@ def test_drawn_mutation_not_lost(self): extended_events=extended_events, slim_scaling_factor=10, slim_burn_in=0.1, + seed=321, _recap_and_rescale=False, ) assert ts.num_mutations == 1 @@ -2276,6 +2290,7 @@ def test_drawn_mutation_is_lost(self): extended_events=extended_events, slim_scaling_factor=10, slim_burn_in=0.1, + seed=432, _recap_and_rescale=False, ) assert ts.num_mutations == 0 @@ -2502,6 +2517,7 @@ def test_referenced_single_site_is_nonneutral(self): contig=contig, samples=self.samples, extended_events=extended_events, + seed=543, ) referenced_dfe = ts.metadata["stdpopsim"]["DFEs"][1] assert referenced_dfe["id"] == "one" @@ -2718,6 +2734,7 @@ def test_sweep(self, tmp_path): slim_burn_in=1, logfile=logfile, logfile_interval=1, + seed=654, ) in_sweep, outside_sweep, _ = self._fitness_per_generation( logfile=logfile, @@ -2761,6 +2778,7 @@ def test_sweep_meets_min_freq_at_start(self, tmp_path): slim_burn_in=1, logfile=logfile, logfile_interval=1, + seed=654, ) in_sweep, outside_sweep, rejections = self._fitness_per_generation( logfile=logfile, @@ -2808,6 +2826,7 @@ def test_sweep_meets_min_freq_at_end(self, tmp_path): slim_burn_in=1, logfile=logfile, logfile_interval=1, + seed=765, ) in_sweep, outside_sweep, rejections = self._fitness_per_generation( logfile=logfile, @@ -2889,6 +2908,7 @@ def test_global_sweep(self, tmp_path): slim_burn_in=1, logfile=logfile, logfile_interval=1, + seed=876, ) p0_in_sweep, p0_outside_sweep, _ = self._fitness_per_generation( logfile=logfile, @@ -2941,6 +2961,7 @@ def test_local_sweep(self, tmp_path): slim_burn_in=1, logfile=logfile, logfile_interval=1, + seed=987, ) p0_in_sweep, p0_outside_sweep, _ = self._fitness_per_generation( logfile=logfile, @@ -2996,6 +3017,7 @@ def test_sweeps_at_multiple_sites(self, tmp_path): slim_burn_in=1, logfile=logfile, logfile_interval=1, + seed=531, ) for i, _ in enumerate(pop_ids): in_sweep, outside_sweep, _ = self._fitness_per_generation( @@ -3048,6 +3070,7 @@ def test_sweep_with_background_selection(self, tmp_path): slim_burn_in=1, logfile=logfile, logfile_interval=1, + seed=642, ) p0_in_sweep, p0_outside_sweep, _ = self._fitness_per_generation( logfile=logfile, @@ -3095,6 +3118,7 @@ def test_stacked(self): contig=contig, samples=self.samples, slim_burn_in=10, + seed=753, ) is_stacked = [len(m.metadata["mutation_list"]) > 1 for m in ts.mutations()] if any(is_stacked): @@ -3117,6 +3141,7 @@ def test_msprime(self): demographic_model=self.model, contig=contig, samples=self.samples, + seed=864, ) if ts.num_mutations > 0: break @@ -3133,6 +3158,7 @@ def test_errors(self): demographic_model=self.model, contig=contig, samples=self.samples, + seed=975, ) if ts.num_mutations > 0: break @@ -3280,7 +3306,7 @@ def test_slim_population_size_diploid(self, caplog): contig = stdpopsim.Contig.basic_contig(length=1000, ploidy=2) model = stdpopsim.PiecewiseConstantSize(N) with caplog.at_level(logging.DEBUG): - engine.simulate(model, contig, samples={"pop_0": 2}, verbosity=2) + engine.simulate(model, contig, samples={"pop_0": 2}, verbosity=2, seed=9) log_str = " ".join([rec.getMessage() for rec in caplog.records]) # match: "1: p = sim.addSubpop(0, );" extract_ne = re.compile(".+1: p = sim.addSubpop\\(0, ([0-9]+)\\).+") @@ -3318,7 +3344,7 @@ def test_individual_ploidy(self): engine = stdpopsim.get_engine("slim") for ploidy in [1, 2]: contig = stdpopsim.Contig.basic_contig(length=1000, ploidy=ploidy) - ts = engine.simulate(model, contig, samples={"pop_0": 2}) + ts = engine.simulate(model, contig, samples={"pop_0": 2}, seed=8) assert ts.num_individuals == 2 assert ts.num_samples == 2 * ploidy individual = ts.tables.nodes.individual @@ -3330,7 +3356,7 @@ def test_haploidize_individuals(self): model = stdpopsim.PiecewiseConstantSize(N) engine = stdpopsim.get_engine("slim") contig = stdpopsim.Contig.basic_contig(length=1000, ploidy=2) - ts = engine.simulate(model, contig, samples={"pop_0": 3}) + ts = engine.simulate(model, contig, samples={"pop_0": 3}, seed=7) ts_hap = stdpopsim.utils.haploidize_individuals(ts) assert ts_hap.num_individuals == ts.num_individuals * 2 for i, j in zip(ts.samples(), ts_hap.samples()): From d6e040677997a233a9bf6ddf9632956620e60da5 Mon Sep 17 00:00:00 2001 From: Gregor Gorjanc Date: Mon, 23 Dec 2024 05:11:02 +0000 Subject: [PATCH 4/4] Squash one test warning that was not intended/needed (#1626) --- tests/test_models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_models.py b/tests/test_models.py index e4a499e57..24fa5df3c 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -438,7 +438,9 @@ def test_recombination_rate_match(self): contig = species.get_contig(length=100) assert model.recombination_rate != contig.recombination_map.mean_rate contig = species.get_contig( - length=100, recombination_rate=model.recombination_rate + length=100, + mutation_rate=model.mutation_rate, + recombination_rate=model.recombination_rate, ) assert model.recombination_rate == contig.recombination_map.mean_rate