diff --git a/.changes/unreleased/Fixes-20240716-133703.yaml b/.changes/unreleased/Fixes-20240716-133703.yaml new file mode 100644 index 00000000000..e7063d696bf --- /dev/null +++ b/.changes/unreleased/Fixes-20240716-133703.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix over deletion of generated_metrics in partial parsing +time: 2024-07-16T13:37:03.49651-04:00 +custom: + Author: gshank + Issue: "10450" diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index 2c78e97f977..793451c0b00 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -192,8 +192,13 @@ class SchemaSourceFile(BaseSourceFile): sources: List[str] = field(default_factory=list) exposures: List[str] = field(default_factory=list) metrics: List[str] = field(default_factory=list) - # metrics generated from semantic_model measures + # The following field will no longer be used. Leaving + # here to avoid breaking existing projects. To be removed + # later if possible. generated_metrics: List[str] = field(default_factory=list) + # metrics generated from semantic_model measures. The key is + # the name of the semantic_model, so that we can find it later. + metrics_from_measures: Dict[str, Any] = field(default_factory=dict) groups: List[str] = field(default_factory=list) # node patches contain models, seeds, snapshots, analyses ndp: List[str] = field(default_factory=list) @@ -259,6 +264,40 @@ def get_tests(self, yaml_key, name): return self.data_tests[yaml_key][name] return [] + def add_metrics_from_measures(self, semantic_model_name: str, metric_unique_id: str): + if self.generated_metrics: + # Probably not needed, but for safety sake, convert the + # old generated_metrics to metrics_from_measures. + self.fix_metrics_from_measures() + if semantic_model_name not in self.metrics_from_measures: + self.metrics_from_measures[semantic_model_name] = [] + self.metrics_from_measures[semantic_model_name].append(metric_unique_id) + + def fix_metrics_from_measures(self): + # Temporary method to fix up existing projects with a partial parse file. + # This should only be called if SchemaSourceFile in a msgpack + # pack manifest has an existing "generated_metrics" list, to turn it + # it into a "metrics_from_measures" dictionary, so that we can + # correctly partially parse. + # This code can be removed when "generated_metrics" is removed. + generated_metrics = self.generated_metrics + self.generated_metrics = [] # Should never be needed again + # For each metric_unique_id we loop through the semantic models + # looking for the name of the "measure" which generated the metric. + # When it's found, add it to "metrics_from_measures", with a key + # of the semantic_model name, and a list of metrics. + for metric_unique_id in generated_metrics: + parts = metric_unique_id.split(".") + # get the metric_name + metric_name = parts[-1] + if "semantic_models" in self.dict_from_yaml: + for sem_model in self.dict_from_yaml["semantic_models"]: + if "measures" in sem_model: + for measure in sem_model["measures"]: + if measure["name"] == metric_name: + self.add_metrics_from_measures(sem_model["name"], metric_unique_id) + break + def get_key_and_name_for_test(self, test_unique_id): yaml_key = None block_name = None diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index 267abc6a23a..46b2521eb2c 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -1565,13 +1565,15 @@ def add_exposure(self, source_file: SchemaSourceFile, exposure: Exposure): self.exposures[exposure.unique_id] = exposure source_file.exposures.append(exposure.unique_id) - def add_metric(self, source_file: SchemaSourceFile, metric: Metric, generated: bool = False): + def add_metric( + self, source_file: SchemaSourceFile, metric: Metric, generated_from: Optional[str] = None + ): _check_duplicates(metric, self.metrics) self.metrics[metric.unique_id] = metric - if not generated: + if not generated_from: source_file.metrics.append(metric.unique_id) else: - source_file.generated_metrics.append(metric.unique_id) + source_file.add_metrics_from_measures(generated_from, metric.unique_id) def add_group(self, source_file: SchemaSourceFile, group: Group): _check_duplicates(group, self.groups) diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py index c5060745299..db3400dbc58 100644 --- a/core/dbt/parser/partial.py +++ b/core/dbt/parser/partial.py @@ -968,13 +968,17 @@ def delete_schema_semantic_model(self, schema_file, semantic_model_dict): elif unique_id in self.saved_manifest.disabled: self.delete_disabled(unique_id, schema_file.file_id) - metrics = schema_file.generated_metrics.copy() - for unique_id in metrics: - if unique_id in self.saved_manifest.metrics: - self.saved_manifest.metrics.pop(unique_id) - schema_file.generated_metrics.remove(unique_id) - elif unique_id in self.saved_manifest.disabled: - self.delete_disabled(unique_id, schema_file.file_id) + if schema_file.generated_metrics: + # If this partial parse file has an old "generated_metrics" list, + # call code to fix it up before processing. + schema_file.fix_metrics_from_measures() + if semantic_model_name in schema_file.metrics_from_measures: + for unique_id in schema_file.metrics_from_measures[semantic_model_name]: + if unique_id in self.saved_manifest.metrics: + self.saved_manifest.metrics.pop(unique_id) + elif unique_id in self.saved_manifest.disabled: + self.delete_disabled(unique_id, schema_file.file_id) + del schema_file.metrics_from_measures[semantic_model_name] def delete_schema_unit_test(self, schema_file, unit_test_dict): unit_test_name = unit_test_dict["name"] diff --git a/core/dbt/parser/schema_yaml_readers.py b/core/dbt/parser/schema_yaml_readers.py index 86ac10d7545..13c6dd3de0a 100644 --- a/core/dbt/parser/schema_yaml_readers.py +++ b/core/dbt/parser/schema_yaml_readers.py @@ -389,7 +389,7 @@ def _get_metric_type_params(self, unparsed_metric: UnparsedMetric) -> MetricType # input_measures=?, ) - def parse_metric(self, unparsed: UnparsedMetric, generated: bool = False) -> None: + def parse_metric(self, unparsed: UnparsedMetric, generated_from: Optional[str] = None) -> None: package_name = self.project.project_name unique_id = f"{NodeType.Metric}.{package_name}.{unparsed.name}" path = self.yaml.path.relative_path @@ -446,7 +446,7 @@ def parse_metric(self, unparsed: UnparsedMetric, generated: bool = False) -> Non # if the metric is disabled we do not want it included in the manifest, only in the disabled dict assert isinstance(self.yaml.file, SchemaSourceFile) if parsed.config.enabled: - self.manifest.add_metric(self.yaml.file, parsed, generated) + self.manifest.add_metric(self.yaml.file, parsed, generated_from) else: self.manifest.add_disabled(self.yaml.file, parsed) @@ -601,7 +601,12 @@ def _get_measures(self, unparsed_measures: List[UnparsedMeasure]) -> List[Measur ) return measures - def _create_metric(self, measure: UnparsedMeasure, enabled: bool) -> None: + def _create_metric( + self, + measure: UnparsedMeasure, + enabled: bool, + semantic_model_name: str, + ) -> None: unparsed_metric = UnparsedMetric( name=measure.name, label=measure.name, @@ -612,7 +617,7 @@ def _create_metric(self, measure: UnparsedMeasure, enabled: bool) -> None: ) parser = MetricParser(self.schema_parser, yaml=self.yaml) - parser.parse_metric(unparsed=unparsed_metric, generated=True) + parser.parse_metric(unparsed=unparsed_metric, generated_from=semantic_model_name) def _generate_semantic_model_config( self, target: UnparsedSemanticModel, fqn: List[str], package_name: str, rendered: bool @@ -708,7 +713,9 @@ def parse_semantic_model(self, unparsed: UnparsedSemanticModel) -> None: # Create a metric for each measure with `create_metric = True` for measure in unparsed.measures: if measure.create_metric is True: - self._create_metric(measure=measure, enabled=parsed.config.enabled) + self._create_metric( + measure=measure, enabled=parsed.config.enabled, semantic_model_name=parsed.name + ) def parse(self) -> None: for data in self.get_key_dicts(): diff --git a/tests/functional/semantic_models/fixtures.py b/tests/functional/semantic_models/fixtures.py index 8084af09c33..46b7fd86d06 100644 --- a/tests/functional/semantic_models/fixtures.py +++ b/tests/functional/semantic_models/fixtures.py @@ -319,3 +319,81 @@ select * from final """ + +multi_sm_schema_yml = """ +models: + - name: fct_revenue + description: This is the model fct_revenue. + +semantic_models: + - name: revenue + description: This is the first semantic model. + model: ref('fct_revenue') + + defaults: + agg_time_dimension: ds + + measures: + - name: txn_revenue + expr: revenue + agg: sum + agg_time_dimension: ds + create_metric: true + - name: sum_of_things + expr: 2 + agg: sum + agg_time_dimension: ds + + dimensions: + - name: ds + type: time + expr: created_at + type_params: + time_granularity: day + + entities: + - name: user + type: foreign + expr: user_id + - name: id + type: primary + + - name: alt_revenue + description: This is the second revenue semantic model. + model: ref('fct_revenue') + + defaults: + agg_time_dimension: ads + + measures: + - name: alt_txn_revenue + expr: revenue + agg: sum + agg_time_dimension: ads + create_metric: true + - name: alt_sum_of_things + expr: 2 + agg: sum + agg_time_dimension: ads + + dimensions: + - name: ads + type: time + expr: created_at + type_params: + time_granularity: day + + entities: + - name: user + type: foreign + expr: user_id + - name: id + type: primary + +metrics: + - name: simple_metric + label: Simple Metric + type: simple + type_params: + measure: sum_of_things +""" diff --git a/tests/functional/semantic_models/test_semantic_model_parsing.py b/tests/functional/semantic_models/test_semantic_model_parsing.py index 6382d1ed908..e89c41f9d8d 100644 --- a/tests/functional/semantic_models/test_semantic_model_parsing.py +++ b/tests/functional/semantic_models/test_semantic_model_parsing.py @@ -3,13 +3,14 @@ import pytest from dbt.contracts.graph.manifest import Manifest -from dbt.tests.util import write_file +from dbt.tests.util import run_dbt, write_file from dbt_common.events.base_types import BaseEvent from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity from tests.functional.assertions.test_runner import dbtTestRunner from tests.functional.semantic_models.fixtures import ( fct_revenue_sql, metricflow_time_spine_sql, + multi_sm_schema_yml, schema_without_semantic_model_yml, schema_yml, ) @@ -146,3 +147,28 @@ def test_semantic_model_flipping_create_metric_partial_parsing(self, project): # Verify the metric originally created by `create_metric: true` was removed metric = result.result.metrics[generated_metric] assert metric.name == "txn_revenue" + + +class TestSemanticModelPartialParsingGeneratedMetrics: + @pytest.fixture(scope="class") + def models(self): + return { + "schema.yml": multi_sm_schema_yml, + "fct_revenue.sql": fct_revenue_sql, + "metricflow_time_spine.sql": metricflow_time_spine_sql, + } + + def test_generated_metrics(self, project): + manifest = run_dbt(["parse"]) + expected = { + "metric.test.simple_metric", + "metric.test.txn_revenue", + "metric.test.alt_txn_revenue", + } + assert set(manifest.metrics.keys()) == expected + + # change description of 'revenue' semantic model + modified_schema_yml = multi_sm_schema_yml.replace("first", "FIRST") + write_file(modified_schema_yml, project.project_root, "models", "schema.yml") + manifest = run_dbt(["parse"]) + assert set(manifest.metrics.keys()) == expected diff --git a/tests/unit/contracts/files/test_schema_source_file.py b/tests/unit/contracts/files/test_schema_source_file.py new file mode 100644 index 00000000000..6886c262bf6 --- /dev/null +++ b/tests/unit/contracts/files/test_schema_source_file.py @@ -0,0 +1,116 @@ +from dbt.contracts.files import SchemaSourceFile + + +def test_fix_metrics_from_measure(): + # This is a test for converting "generated_metrics" to "metrics_from_measures" + schema_source_file = { + "path": { + "searched_path": "models", + "relative_path": "schema.yml", + "modification_time": 1721228094.7544806, + "project_root": "/Users/a_user/sample_project", + }, + "checksum": { + "name": "sha256", + "checksum": "63130d480a44a481aa0adc0a8469dccbb72ea36cc09f06683a584a31339f362e", + }, + "project_name": "test", + "parse_file_type": "schema", + "dfy": { + "models": [{"name": "fct_revenue", "description": "This is the model fct_revenue."}], + "semantic_models": [ + { + "name": "revenue", + "description": "This is the FIRST semantic model.", + "model": "ref('fct_revenue')", + "defaults": {"agg_time_dimension": "ds"}, + "measures": [ + { + "name": "txn_revenue", + "expr": "revenue", + "agg": "sum", + "agg_time_dimension": "ds", + "create_metric": True, + }, + { + "name": "sum_of_things", + "expr": 2, + "agg": "sum", + "agg_time_dimension": "ds", + }, + ], + "dimensions": [ + { + "name": "ds", + "type": "time", + "expr": "created_at", + "type_params": {"time_granularity": "day"}, + } + ], + "entities": [ + {"name": "user", "type": "foreign", "expr": "user_id"}, + {"name": "id", "type": "primary"}, + ], + }, + { + "name": "alt_revenue", + "description": "This is the second revenue semantic model.", + "model": "ref('fct_revenue')", + "defaults": {"agg_time_dimension": "ads"}, + "measures": [ + { + "name": "alt_txn_revenue", + "expr": "revenue", + "agg": "sum", + "agg_time_dimension": "ads", + "create_metric": True, + }, + { + "name": "alt_sum_of_things", + "expr": 2, + "agg": "sum", + "agg_time_dimension": "ads", + }, + ], + "dimensions": [ + { + "name": "ads", + "type": "time", + "expr": "created_at", + "type_params": {"time_granularity": "day"}, + } + ], + "entities": [ + {"name": "user", "type": "foreign", "expr": "user_id"}, + {"name": "id", "type": "primary"}, + ], + }, + ], + "metrics": [ + { + "name": "simple_metric", + "label": "Simple Metric", + "type": "simple", + "type_params": {"measure": "sum_of_things"}, + } + ], + }, + "data_tests": {}, + "metrics": ["metric.test.simple_metric"], + "generated_metrics": ["metric.test.txn_revenue", "metric.test.alt_txn_revenue"], + "metrics_from_measures": {}, + "ndp": ["model.test.fct_revenue"], + "semantic_models": ["semantic_model.test.revenue", "semantic_model.test.alt_revenue"], + "mcp": {}, + "env_vars": {}, + } + + expected_metrics_from_measures = { + "revenue": ["metric.test.txn_revenue"], + "alt_revenue": ["metric.test.alt_txn_revenue"], + } + ssf = SchemaSourceFile.from_dict(schema_source_file) + assert ssf + ssf.fix_metrics_from_measures() + assert ssf.generated_metrics == [] + assert ssf.metrics_from_measures == expected_metrics_from_measures