diff --git a/analysis/dataset_definition.py b/analysis/dataset_definition.py
new file mode 100644
index 0000000..6202e23
--- /dev/null
+++ b/analysis/dataset_definition.py
@@ -0,0 +1,19 @@
+# Note: If you are using ehrQL to define your study population you need to:
+# (1) uncomment the ehrQL action in the project.yaml file (lines 14-18),
+# (2) delete the cohort-extractor action from the project.yaml file (lines 8-12), and
+# (3) delete the study_definition.py file.
+
+from ehrql import Dataset
+from ehrql.tables.beta.tpp import patients, practice_registrations
+
+dataset = Dataset()
+
+index_date = "2020-03-31"
+
+has_registration = practice_registrations.for_patient_on(
+    index_date
+).exists_for_patient()
+
+dataset.age = patients.age_on(index_date)
+
+dataset.define_population(has_registration & (dataset.age > 17))
diff --git a/analysis/study_definition.py b/analysis/study_definition.py
index 3b111d1..c9bb2d9 100644
--- a/analysis/study_definition.py
+++ b/analysis/study_definition.py
@@ -1,3 +1,7 @@
+# Note: If you are using cohortextrator to define your study population you need to
+# (1) delete the ehrQL action in the project.yaml file (lines 14-18) and
+# (2) delete the dataset_definition.py file.
+
 from cohortextractor import StudyDefinition, patients, codelist, codelist_from_csv  # NOQA
 
 
diff --git a/project.yaml b/project.yaml
index fbdb81b..a369a7d 100644
--- a/project.yaml
+++ b/project.yaml
@@ -10,3 +10,9 @@ actions:
     outputs:
       highly_sensitive:
         cohort: output/input.csv.gz
+
+  # generate_dataset:
+  #   run: ehrql:v0 generate-dataset analysis/dataset_definition.py --output output/dataset.csv.gz
+  #   outputs:
+  #     highly_sensitive:
+  #       cohort: output/dataset.csv.gz