Skip to content

Commit

Permalink
Fix multiple semantic models with generated metrics (#10451)
Browse files Browse the repository at this point in the history
  • Loading branch information
gshank authored Jul 17, 2024
1 parent bef2d20 commit 471b816
Show file tree
Hide file tree
Showing 8 changed files with 295 additions and 17 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240716-133703.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix over deletion of generated_metrics in partial parsing
time: 2024-07-16T13:37:03.49651-04:00
custom:
Author: gshank
Issue: "10450"
41 changes: 40 additions & 1 deletion core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,13 @@ class SchemaSourceFile(BaseSourceFile):
sources: List[str] = field(default_factory=list)
exposures: List[str] = field(default_factory=list)
metrics: List[str] = field(default_factory=list)
# metrics generated from semantic_model measures
# The following field will no longer be used. Leaving
# here to avoid breaking existing projects. To be removed
# later if possible.
generated_metrics: List[str] = field(default_factory=list)
# metrics generated from semantic_model measures. The key is
# the name of the semantic_model, so that we can find it later.
metrics_from_measures: Dict[str, Any] = field(default_factory=dict)
groups: List[str] = field(default_factory=list)
# node patches contain models, seeds, snapshots, analyses
ndp: List[str] = field(default_factory=list)
Expand Down Expand Up @@ -259,6 +264,40 @@ def get_tests(self, yaml_key, name):
return self.data_tests[yaml_key][name]
return []

def add_metrics_from_measures(self, semantic_model_name: str, metric_unique_id: str):
if self.generated_metrics:
# Probably not needed, but for safety sake, convert the
# old generated_metrics to metrics_from_measures.
self.fix_metrics_from_measures()
if semantic_model_name not in self.metrics_from_measures:
self.metrics_from_measures[semantic_model_name] = []
self.metrics_from_measures[semantic_model_name].append(metric_unique_id)

def fix_metrics_from_measures(self):
# Temporary method to fix up existing projects with a partial parse file.
# This should only be called if SchemaSourceFile in a msgpack
# pack manifest has an existing "generated_metrics" list, to turn it
# it into a "metrics_from_measures" dictionary, so that we can
# correctly partially parse.
# This code can be removed when "generated_metrics" is removed.
generated_metrics = self.generated_metrics
self.generated_metrics = [] # Should never be needed again
# For each metric_unique_id we loop through the semantic models
# looking for the name of the "measure" which generated the metric.
# When it's found, add it to "metrics_from_measures", with a key
# of the semantic_model name, and a list of metrics.
for metric_unique_id in generated_metrics:
parts = metric_unique_id.split(".")
# get the metric_name
metric_name = parts[-1]
if "semantic_models" in self.dict_from_yaml:
for sem_model in self.dict_from_yaml["semantic_models"]:
if "measures" in sem_model:
for measure in sem_model["measures"]:
if measure["name"] == metric_name:
self.add_metrics_from_measures(sem_model["name"], metric_unique_id)
break

def get_key_and_name_for_test(self, test_unique_id):
yaml_key = None
block_name = None
Expand Down
8 changes: 5 additions & 3 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1565,13 +1565,15 @@ def add_exposure(self, source_file: SchemaSourceFile, exposure: Exposure):
self.exposures[exposure.unique_id] = exposure
source_file.exposures.append(exposure.unique_id)

def add_metric(self, source_file: SchemaSourceFile, metric: Metric, generated: bool = False):
def add_metric(
self, source_file: SchemaSourceFile, metric: Metric, generated_from: Optional[str] = None
):
_check_duplicates(metric, self.metrics)
self.metrics[metric.unique_id] = metric
if not generated:
if not generated_from:
source_file.metrics.append(metric.unique_id)
else:
source_file.generated_metrics.append(metric.unique_id)
source_file.add_metrics_from_measures(generated_from, metric.unique_id)

def add_group(self, source_file: SchemaSourceFile, group: Group):
_check_duplicates(group, self.groups)
Expand Down
18 changes: 11 additions & 7 deletions core/dbt/parser/partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -968,13 +968,17 @@ def delete_schema_semantic_model(self, schema_file, semantic_model_dict):
elif unique_id in self.saved_manifest.disabled:
self.delete_disabled(unique_id, schema_file.file_id)

metrics = schema_file.generated_metrics.copy()
for unique_id in metrics:
if unique_id in self.saved_manifest.metrics:
self.saved_manifest.metrics.pop(unique_id)
schema_file.generated_metrics.remove(unique_id)
elif unique_id in self.saved_manifest.disabled:
self.delete_disabled(unique_id, schema_file.file_id)
if schema_file.generated_metrics:
# If this partial parse file has an old "generated_metrics" list,
# call code to fix it up before processing.
schema_file.fix_metrics_from_measures()
if semantic_model_name in schema_file.metrics_from_measures:
for unique_id in schema_file.metrics_from_measures[semantic_model_name]:
if unique_id in self.saved_manifest.metrics:
self.saved_manifest.metrics.pop(unique_id)
elif unique_id in self.saved_manifest.disabled:
self.delete_disabled(unique_id, schema_file.file_id)
del schema_file.metrics_from_measures[semantic_model_name]

def delete_schema_unit_test(self, schema_file, unit_test_dict):
unit_test_name = unit_test_dict["name"]
Expand Down
17 changes: 12 additions & 5 deletions core/dbt/parser/schema_yaml_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def _get_metric_type_params(self, unparsed_metric: UnparsedMetric) -> MetricType
# input_measures=?,
)

def parse_metric(self, unparsed: UnparsedMetric, generated: bool = False) -> None:
def parse_metric(self, unparsed: UnparsedMetric, generated_from: Optional[str] = None) -> None:
package_name = self.project.project_name
unique_id = f"{NodeType.Metric}.{package_name}.{unparsed.name}"
path = self.yaml.path.relative_path
Expand Down Expand Up @@ -449,7 +449,7 @@ def parse_metric(self, unparsed: UnparsedMetric, generated: bool = False) -> Non
# if the metric is disabled we do not want it included in the manifest, only in the disabled dict
assert isinstance(self.yaml.file, SchemaSourceFile)
if parsed.config.enabled:
self.manifest.add_metric(self.yaml.file, parsed, generated)
self.manifest.add_metric(self.yaml.file, parsed, generated_from)
else:
self.manifest.add_disabled(self.yaml.file, parsed)

Expand Down Expand Up @@ -604,7 +604,12 @@ def _get_measures(self, unparsed_measures: List[UnparsedMeasure]) -> List[Measur
)
return measures

def _create_metric(self, measure: UnparsedMeasure, enabled: bool) -> None:
def _create_metric(
self,
measure: UnparsedMeasure,
enabled: bool,
semantic_model_name: str,
) -> None:
unparsed_metric = UnparsedMetric(
name=measure.name,
label=measure.name,
Expand All @@ -615,7 +620,7 @@ def _create_metric(self, measure: UnparsedMeasure, enabled: bool) -> None:
)

parser = MetricParser(self.schema_parser, yaml=self.yaml)
parser.parse_metric(unparsed=unparsed_metric, generated=True)
parser.parse_metric(unparsed=unparsed_metric, generated_from=semantic_model_name)

def _generate_semantic_model_config(
self, target: UnparsedSemanticModel, fqn: List[str], package_name: str, rendered: bool
Expand Down Expand Up @@ -711,7 +716,9 @@ def parse_semantic_model(self, unparsed: UnparsedSemanticModel) -> None:
# Create a metric for each measure with `create_metric = True`
for measure in unparsed.measures:
if measure.create_metric is True:
self._create_metric(measure=measure, enabled=parsed.config.enabled)
self._create_metric(
measure=measure, enabled=parsed.config.enabled, semantic_model_name=parsed.name
)

def parse(self) -> None:
for data in self.get_key_dicts():
Expand Down
78 changes: 78 additions & 0 deletions tests/functional/semantic_models/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,81 @@
select *
from final
"""

multi_sm_schema_yml = """
models:
- name: fct_revenue
description: This is the model fct_revenue.
semantic_models:
- name: revenue
description: This is the first semantic model.
model: ref('fct_revenue')
defaults:
agg_time_dimension: ds
measures:
- name: txn_revenue
expr: revenue
agg: sum
agg_time_dimension: ds
create_metric: true
- name: sum_of_things
expr: 2
agg: sum
agg_time_dimension: ds
dimensions:
- name: ds
type: time
expr: created_at
type_params:
time_granularity: day
entities:
- name: user
type: foreign
expr: user_id
- name: id
type: primary
- name: alt_revenue
description: This is the second revenue semantic model.
model: ref('fct_revenue')
defaults:
agg_time_dimension: ads
measures:
- name: alt_txn_revenue
expr: revenue
agg: sum
agg_time_dimension: ads
create_metric: true
- name: alt_sum_of_things
expr: 2
agg: sum
agg_time_dimension: ads
dimensions:
- name: ads
type: time
expr: created_at
type_params:
time_granularity: day
entities:
- name: user
type: foreign
expr: user_id
- name: id
type: primary
metrics:
- name: simple_metric
label: Simple Metric
type: simple
type_params:
measure: sum_of_things
"""
28 changes: 27 additions & 1 deletion tests/functional/semantic_models/test_semantic_model_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
import pytest

from dbt.contracts.graph.manifest import Manifest
from dbt.tests.util import write_file
from dbt.tests.util import run_dbt, write_file
from dbt_common.events.base_types import BaseEvent
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from tests.functional.assertions.test_runner import dbtTestRunner
from tests.functional.semantic_models.fixtures import (
fct_revenue_sql,
metricflow_time_spine_sql,
multi_sm_schema_yml,
schema_without_semantic_model_yml,
schema_yml,
)
Expand Down Expand Up @@ -146,3 +147,28 @@ def test_semantic_model_flipping_create_metric_partial_parsing(self, project):
# Verify the metric originally created by `create_metric: true` was removed
metric = result.result.metrics[generated_metric]
assert metric.name == "txn_revenue"


class TestSemanticModelPartialParsingGeneratedMetrics:
@pytest.fixture(scope="class")
def models(self):
return {
"schema.yml": multi_sm_schema_yml,
"fct_revenue.sql": fct_revenue_sql,
"metricflow_time_spine.sql": metricflow_time_spine_sql,
}

def test_generated_metrics(self, project):
manifest = run_dbt(["parse"])
expected = {
"metric.test.simple_metric",
"metric.test.txn_revenue",
"metric.test.alt_txn_revenue",
}
assert set(manifest.metrics.keys()) == expected

# change description of 'revenue' semantic model
modified_schema_yml = multi_sm_schema_yml.replace("first", "FIRST")
write_file(modified_schema_yml, project.project_root, "models", "schema.yml")
manifest = run_dbt(["parse"])
assert set(manifest.metrics.keys()) == expected
Loading

0 comments on commit 471b816

Please sign in to comment.