Skip to content

Commit

Permalink
Plumbing for next gen measures dummy data
Browse files Browse the repository at this point in the history
  • Loading branch information
rebkwok committed Oct 24, 2024
1 parent ec01d84 commit 3925a8c
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 2 deletions.
14 changes: 14 additions & 0 deletions ehrql/measures/measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,20 @@ def configure_dummy_data(self, *, population_size):
```
"""
self.dummy_data_config.population_size = population_size
self.dummy_data_config.next_gen = False

def configure_next_gen_dummy_data(self, *, population_size):
"""
Configure the dummy data to be generated, using the 'next generation' dummy data.
Note that this feature is currently experimental and is not fully documented yet.
```py
measures.configure_next_gen_dummy_data(population_size=10000)
```
"""
self.dummy_data_config.population_size = population_size
self.dummy_data_config.next_gen = True

def configure_disclosure_control(self, *, enabled=True):
"""
Expand Down
28 changes: 28 additions & 0 deletions tests/integration/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,34 @@ def test_generate_measures_dummy_data_generated(tmp_path, disclosure_control_ena
)


@pytest.mark.parametrize("disclosure_control_enabled", [False, True])
def test_generate_measures_next_gen_dummy_data_generated(
tmp_path, disclosure_control_enabled
):
measure_definitions = tmp_path / "measures.py"
measure_definitions.write_text(
MEASURE_DEFINITIONS
+ "\nmeasures.configure_next_gen_dummy_data(population_size=10)"
)
output_file = tmp_path / "output.csv"

generate_measures(
measure_definitions,
output_file,
# Defaults
dsn=None,
backend_class=None,
query_engine_class=None,
dummy_tables_path=None,
dummy_data_file=None,
environ={},
user_args=(),
)
assert output_file.read_text().startswith(
"measure,interval_start,interval_end,ratio,numerator,denominator,sex"
)


@pytest.mark.parametrize("disclosure_control_enabled", [False, True])
def test_generate_measures_dummy_data_supplied(tmp_path, disclosure_control_enabled):
measure_definitions = tmp_path / "measures.py"
Expand Down
10 changes: 8 additions & 2 deletions tests/unit/measures/test_dummy_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from datetime import date

import pytest

from ehrql import years
from ehrql.measures import INTERVAL, DummyMeasuresDataGenerator, Measures
from ehrql.tables import Constraint, EventFrame, PatientFrame, Series, table
Expand Down Expand Up @@ -87,7 +89,11 @@ def test_population_is_nonzero_when_no_groups():
assert generator.generator.population_size > 0


def test_configured_population_size():
@pytest.mark.parametrize(
"configure_dummy_data_method",
["configure_dummy_data", "configure_next_gen_dummy_data"],
)
def test_configured_population_size(configure_dummy_data_method):
measures = Measures()
measures.define_measure(
"had_event",
Expand All @@ -96,7 +102,7 @@ def test_configured_population_size():
intervals=years(1).starting_on("2020-01-01"),
)

measures.configure_dummy_data(population_size=10)
getattr(measures, configure_dummy_data_method)(population_size=10)

generator = DummyMeasuresDataGenerator(measures, measures.dummy_data_config)
assert generator.generator.population_size == 10

0 comments on commit 3925a8c

Please sign in to comment.