From b0451806eff855cca76f03a53d8ed654461eadb2 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Mon, 14 Aug 2023 12:42:11 -0700 Subject: [PATCH] [CT-2526] Add ability to automatically create metrics from semantic model measures (#8310) * Update semantic model parsing test to check `create_metric = true` functionality * Add `create_metric` boolean property to unparsed measure objects * Begin creating metrics from measures with `create_metric = True` * Add test ensuring partial parsing handles metrics generated from measures * Ensure partial parsing appropriately deletes metrics generated from semantic models * Add changie doc for addition * Separate generated metrics from parsed metrics for partial parsing I was doing a demo earlier today of this branch (minus this commit) and noticed something odd. When I changes a semantic model, metrics that should have been technically uneffected would get dropped. Basically if I made a change to a semantic model which had metrics in the same file, and then ran parse, those metrics defined in the same file would get dropped. Then with no other changes, if I ran parse again they would come back. What was happening was that parsed metrics and generated metrics were getting tracked the same way on the file objects for partial parsing. In 0787a7c7b67b10a55b0d7727eeb744df831d503c we began dropping all metrics tracked in a file objects when changes to semantic models were detected. Since parsed metrics and generated metrics were being tracked together on the file object, the parsed metrics were getting dropped as well. In this commit we begin separating out the tracking of generated metrics and parsed metrics on the file object, and now only drop the generated metrics when semantic models have a detected change. * Assert in test that semantic model partial parsing doesn't clobber regular metrics --- .../unreleased/Features-20230803-151824.yaml | 6 ++ core/dbt/contracts/files.py | 2 + core/dbt/contracts/graph/manifest.py | 7 ++- core/dbt/contracts/graph/unparsed.py | 1 + core/dbt/parser/partial.py | 8 +++ core/dbt/parser/schema_yaml_readers.py | 22 +++++++- .../test_semantic_model_parsing.py | 55 +++++++++++++++++-- 7 files changed, 93 insertions(+), 8 deletions(-) create mode 100644 .changes/unreleased/Features-20230803-151824.yaml diff --git a/.changes/unreleased/Features-20230803-151824.yaml b/.changes/unreleased/Features-20230803-151824.yaml new file mode 100644 index 00000000000..11d3184816a --- /dev/null +++ b/.changes/unreleased/Features-20230803-151824.yaml @@ -0,0 +1,6 @@ +kind: Features +body: 'Allow specification of `create_metric: true` on measures' +time: 2023-08-03T15:18:24.351003-07:00 +custom: + Author: QMalcolm + Issue: "8125" diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index f54533c38c1..955757a02f7 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -225,6 +225,8 @@ class SchemaSourceFile(BaseSourceFile): sources: List[str] = field(default_factory=list) exposures: List[str] = field(default_factory=list) metrics: List[str] = field(default_factory=list) + # metrics generated from semantic_model measures + generated_metrics: List[str] = field(default_factory=list) groups: List[str] = field(default_factory=list) # node patches contain models, seeds, snapshots, analyses ndp: List[str] = field(default_factory=list) diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index fdb607c45d8..e3dfe976986 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -1331,10 +1331,13 @@ def add_exposure(self, source_file: SchemaSourceFile, exposure: Exposure): self.exposures[exposure.unique_id] = exposure source_file.exposures.append(exposure.unique_id) - def add_metric(self, source_file: SchemaSourceFile, metric: Metric): + def add_metric(self, source_file: SchemaSourceFile, metric: Metric, generated: bool = False): _check_duplicates(metric, self.metrics) self.metrics[metric.unique_id] = metric - source_file.metrics.append(metric.unique_id) + if not generated: + source_file.metrics.append(metric.unique_id) + else: + source_file.generated_metrics.append(metric.unique_id) def add_group(self, source_file: SchemaSourceFile, group: Group): _check_duplicates(group, self.groups) diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index 585ac9cc3e0..52d46b4b5f6 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -701,6 +701,7 @@ class UnparsedMeasure(dbtClassMixin): agg_params: Optional[MeasureAggregationParameters] = None non_additive_dimension: Optional[UnparsedNonAdditiveDimension] = None agg_time_dimension: Optional[str] = None + create_metric: bool = False @dataclass diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py index edcae83574a..3c6bc46bd74 100644 --- a/core/dbt/parser/partial.py +++ b/core/dbt/parser/partial.py @@ -895,6 +895,14 @@ def delete_schema_semantic_model(self, schema_file, semantic_model_dict): elif unique_id in self.saved_manifest.disabled: self.delete_disabled(unique_id, schema_file.file_id) + metrics = schema_file.generated_metrics.copy() + for unique_id in metrics: + if unique_id in self.saved_manifest.metrics: + self.saved_manifest.metrics.pop(unique_id) + schema_file.generated_metrics.remove(unique_id) + elif unique_id in self.saved_manifest.disabled: + self.delete_disabled(unique_id, schema_file.file_id) + def get_schema_element(self, elem_list, elem_name): for element in elem_list: if "name" in element and element["name"] == elem_name: diff --git a/core/dbt/parser/schema_yaml_readers.py b/core/dbt/parser/schema_yaml_readers.py index 2f2a3eb18e6..927bfdb9a9f 100644 --- a/core/dbt/parser/schema_yaml_readers.py +++ b/core/dbt/parser/schema_yaml_readers.py @@ -303,7 +303,7 @@ def _get_metric_type_params(self, type_params: UnparsedMetricTypeParams) -> Metr # input_measures=?, ) - def parse_metric(self, unparsed: UnparsedMetric): + def parse_metric(self, unparsed: UnparsedMetric, generated: bool = False): package_name = self.project.project_name unique_id = f"{NodeType.Metric}.{package_name}.{unparsed.name}" path = self.yaml.path.relative_path @@ -358,7 +358,7 @@ def parse_metric(self, unparsed: UnparsedMetric): # if the metric is disabled we do not want it included in the manifest, only in the disabled dict if parsed.config.enabled: - self.manifest.add_metric(self.yaml.file, parsed) + self.manifest.add_metric(self.yaml.file, parsed, generated) else: self.manifest.add_disabled(self.yaml.file, parsed) @@ -509,6 +509,19 @@ def _get_measures(self, unparsed_measures: List[UnparsedMeasure]) -> List[Measur ) return measures + def _create_metric(self, measure: UnparsedMeasure, enabled: bool) -> None: + unparsed_metric = UnparsedMetric( + name=measure.name, + label=measure.name, + type="simple", + type_params=UnparsedMetricTypeParams(measure=measure.name, expr=measure.name), + description=measure.description or f"Metric created from measure {measure.name}", + config={"enabled": enabled}, + ) + + parser = MetricParser(self.schema_parser, yaml=self.yaml) + parser.parse_metric(unparsed=unparsed_metric, generated=True) + def parse_semantic_model(self, unparsed: UnparsedSemanticModel): package_name = self.project.project_name unique_id = f"{NodeType.SemanticModel}.{package_name}.{unparsed.name}" @@ -550,6 +563,11 @@ def parse_semantic_model(self, unparsed: UnparsedSemanticModel): # No ability to disable a semantic model at this time self.manifest.add_semantic_model(self.yaml.file, parsed) + # Create a metric for each measure with `create_metric = True` + for measure in unparsed.measures: + if measure.create_metric is True: + self._create_metric(measure=measure, enabled=parsed.config.enabled) + def parse(self): for data in self.get_key_dicts(): try: diff --git a/tests/functional/semantic_models/test_semantic_model_parsing.py b/tests/functional/semantic_models/test_semantic_model_parsing.py index 6b0fe643691..0266641d75c 100644 --- a/tests/functional/semantic_models/test_semantic_model_parsing.py +++ b/tests/functional/semantic_models/test_semantic_model_parsing.py @@ -27,6 +27,7 @@ expr: revenue agg: sum agg_time_dimension: ds + create_metric: true - name: sum_of_things expr: 2 agg: sum @@ -65,12 +66,11 @@ type: primary metrics: - - name: records_with_revenue - label: "Number of records with revenue" - description: Total number of records with revenue + - name: simple_metric + label: Simple Metric type: simple type_params: - measure: has_revenue + measure: sum_of_things """ schema_without_semantic_model_yml = """models: @@ -126,6 +126,10 @@ def test_semantic_model_parsing(self, project): == f'"dbt"."{project.test_schema}"."fct_revenue"' ) assert len(semantic_model.measures) == 5 + # manifest should have one metric (that was created from a measure) + assert len(manifest.metrics) == 2 + metric = manifest.metrics["metric.test.txn_revenue"] + assert metric.name == "txn_revenue" def test_semantic_model_error(self, project): # Next, modify the default schema.yml to remove the semantic model. @@ -187,3 +191,46 @@ def test_semantic_model_deleted_partial_parsing(self, project): # Finally, verify that the manifest reflects the deletion assert "semantic_model.test.revenue" not in result.result.semantic_models + + def test_semantic_model_flipping_create_metric_partial_parsing(self, project): + generated_metric = "metric.test.txn_revenue" + # First, use the default schema.yml to define our semantic model, and + # run the dbt parse command + write_file(schema_yml, project.project_root, "models", "schema.yml") + runner = dbtRunner() + result = runner.invoke(["parse"]) + assert result.success + + # Verify the metric created by `create_metric: true` exists + metric = result.result.metrics[generated_metric] + assert metric.name == "txn_revenue" + + # --- Next, modify the default schema.yml to have no `create_metric: true` --- + no_create_metric_schema_yml = schema_yml.replace( + "create_metric: true", "create_metric: false" + ) + write_file(no_create_metric_schema_yml, project.project_root, "models", "schema.yml") + + # Now, run the dbt parse command again. + result = runner.invoke(["parse"]) + assert result.success + + # Verify the metric originally created by `create_metric: true` was removed + assert result.result.metrics.get(generated_metric) is None + + # Verify that partial parsing didn't clobber the normal metric + assert result.result.metrics.get("metric.test.simple_metric") is not None + + # --- Now bring it back --- + create_metric_schema_yml = schema_yml.replace( + "create_metric: false", "create_metric: true" + ) + write_file(create_metric_schema_yml, project.project_root, "models", "schema.yml") + + # Now, run the dbt parse command again. + result = runner.invoke(["parse"]) + assert result.success + + # Verify the metric originally created by `create_metric: true` was removed + metric = result.result.metrics[generated_metric] + assert metric.name == "txn_revenue"