Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix multiple semantic models with generated metrics #10451

Merged
merged 7 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240716-133703.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Fix over deletion of generated_metrics in partial parsing
time: 2024-07-16T13:37:03.49651-04:00
custom:
Author: gshank
Issue: "10450"
41 changes: 40 additions & 1 deletion core/dbt/contracts/files.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to add some unittest to test this new logic added? I see the coverage is only 10%-ish.

Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,13 @@
sources: List[str] = field(default_factory=list)
exposures: List[str] = field(default_factory=list)
metrics: List[str] = field(default_factory=list)
# metrics generated from semantic_model measures
# The following field will no longer be used. Leaving
# here to avoid breaking existing projects. To be removed
# later if possible.
generated_metrics: List[str] = field(default_factory=list)
# metrics generated from semantic_model measures. The key is
# the name of the semantic_model, so that we can find it later.
metrics_from_measures: Dict[str, Any] = field(default_factory=dict)
groups: List[str] = field(default_factory=list)
# node patches contain models, seeds, snapshots, analyses
ndp: List[str] = field(default_factory=list)
Expand Down Expand Up @@ -259,6 +264,40 @@
return self.data_tests[yaml_key][name]
return []

def add_metrics_from_measures(self, semantic_model_name: str, metric_unique_id: str):
if self.generated_metrics:
# Probably not needed, but for safety sake, convert the
# old generated_metrics to metrics_from_measures.
self.fix_metrics_from_measures()

Check warning on line 271 in core/dbt/contracts/files.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/contracts/files.py#L271

Added line #L271 was not covered by tests
if semantic_model_name not in self.metrics_from_measures:
self.metrics_from_measures[semantic_model_name] = []
self.metrics_from_measures[semantic_model_name].append(metric_unique_id)

def fix_metrics_from_measures(self):
# Temporary method to fix up existing projects with a partial parse file.
# This should only be called if SchemaSourceFile in a msgpack
# pack manifest has an existing "generated_metrics" list, to turn it
# it into a "metrics_from_measures" dictionary, so that we can
# correctly partially parse.
# This code can be removed when "generated_metrics" is removed.
generated_metrics = self.generated_metrics
self.generated_metrics = [] # Should never be needed again
# For each metric_unique_id we loop through the semantic models
# looking for the name of the "measure" which generated the metric.
# When it's found, add it to "metrics_from_measures", with a key
# of the semantic_model name, and a list of metrics.
for metric_unique_id in generated_metrics:
parts = metric_unique_id.split(".")
# get the metric_name
metric_name = parts[-1]
if "semantic_models" in self.dict_from_yaml:
for sem_model in self.dict_from_yaml["semantic_models"]:
if "measures" in sem_model:
for measure in sem_model["measures"]:
if measure["name"] == metric_name:
self.add_metrics_from_measures(sem_model["name"], metric_unique_id)
break

def get_key_and_name_for_test(self, test_unique_id):
yaml_key = None
block_name = None
Expand Down
8 changes: 5 additions & 3 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1565,13 +1565,15 @@ def add_exposure(self, source_file: SchemaSourceFile, exposure: Exposure):
self.exposures[exposure.unique_id] = exposure
source_file.exposures.append(exposure.unique_id)

def add_metric(self, source_file: SchemaSourceFile, metric: Metric, generated: bool = False):
def add_metric(
self, source_file: SchemaSourceFile, metric: Metric, generated_from: Optional[str] = None
):
_check_duplicates(metric, self.metrics)
self.metrics[metric.unique_id] = metric
if not generated:
if not generated_from:
source_file.metrics.append(metric.unique_id)
else:
source_file.generated_metrics.append(metric.unique_id)
source_file.add_metrics_from_measures(generated_from, metric.unique_id)

def add_group(self, source_file: SchemaSourceFile, group: Group):
_check_duplicates(group, self.groups)
Expand Down
18 changes: 11 additions & 7 deletions core/dbt/parser/partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -968,13 +968,17 @@
elif unique_id in self.saved_manifest.disabled:
self.delete_disabled(unique_id, schema_file.file_id)

metrics = schema_file.generated_metrics.copy()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So here we were deleting all generated metrics instead of just the one related to a semantic model

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct.

for unique_id in metrics:
if unique_id in self.saved_manifest.metrics:
self.saved_manifest.metrics.pop(unique_id)
schema_file.generated_metrics.remove(unique_id)
elif unique_id in self.saved_manifest.disabled:
self.delete_disabled(unique_id, schema_file.file_id)
if schema_file.generated_metrics:
# If this partial parse file has an old "generated_metrics" list,
# call code to fix it up before processing.
schema_file.fix_metrics_from_measures()

Check warning on line 974 in core/dbt/parser/partial.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/parser/partial.py#L974

Added line #L974 was not covered by tests
if semantic_model_name in schema_file.metrics_from_measures:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a bit of comment here?

for unique_id in schema_file.metrics_from_measures[semantic_model_name]:
if unique_id in self.saved_manifest.metrics:
self.saved_manifest.metrics.pop(unique_id)
elif unique_id in self.saved_manifest.disabled:
self.delete_disabled(unique_id, schema_file.file_id)

Check warning on line 980 in core/dbt/parser/partial.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/parser/partial.py#L979-L980

Added lines #L979 - L980 were not covered by tests
del schema_file.metrics_from_measures[semantic_model_name]

def delete_schema_unit_test(self, schema_file, unit_test_dict):
unit_test_name = unit_test_dict["name"]
Expand Down
17 changes: 12 additions & 5 deletions core/dbt/parser/schema_yaml_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def _get_metric_type_params(self, unparsed_metric: UnparsedMetric) -> MetricType
# input_measures=?,
)

def parse_metric(self, unparsed: UnparsedMetric, generated: bool = False) -> None:
def parse_metric(self, unparsed: UnparsedMetric, generated_from: Optional[str] = None) -> None:
package_name = self.project.project_name
unique_id = f"{NodeType.Metric}.{package_name}.{unparsed.name}"
path = self.yaml.path.relative_path
Expand Down Expand Up @@ -446,7 +446,7 @@ def parse_metric(self, unparsed: UnparsedMetric, generated: bool = False) -> Non
# if the metric is disabled we do not want it included in the manifest, only in the disabled dict
assert isinstance(self.yaml.file, SchemaSourceFile)
if parsed.config.enabled:
self.manifest.add_metric(self.yaml.file, parsed, generated)
self.manifest.add_metric(self.yaml.file, parsed, generated_from)
else:
self.manifest.add_disabled(self.yaml.file, parsed)

Expand Down Expand Up @@ -601,7 +601,12 @@ def _get_measures(self, unparsed_measures: List[UnparsedMeasure]) -> List[Measur
)
return measures

def _create_metric(self, measure: UnparsedMeasure, enabled: bool) -> None:
def _create_metric(
self,
measure: UnparsedMeasure,
enabled: bool,
semantic_model_name: str,
) -> None:
unparsed_metric = UnparsedMetric(
name=measure.name,
label=measure.name,
Expand All @@ -612,7 +617,7 @@ def _create_metric(self, measure: UnparsedMeasure, enabled: bool) -> None:
)

parser = MetricParser(self.schema_parser, yaml=self.yaml)
parser.parse_metric(unparsed=unparsed_metric, generated=True)
parser.parse_metric(unparsed=unparsed_metric, generated_from=semantic_model_name)

def _generate_semantic_model_config(
self, target: UnparsedSemanticModel, fqn: List[str], package_name: str, rendered: bool
Expand Down Expand Up @@ -708,7 +713,9 @@ def parse_semantic_model(self, unparsed: UnparsedSemanticModel) -> None:
# Create a metric for each measure with `create_metric = True`
for measure in unparsed.measures:
if measure.create_metric is True:
self._create_metric(measure=measure, enabled=parsed.config.enabled)
self._create_metric(
measure=measure, enabled=parsed.config.enabled, semantic_model_name=parsed.name
)

def parse(self) -> None:
for data in self.get_key_dicts():
Expand Down
78 changes: 78 additions & 0 deletions tests/functional/semantic_models/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,81 @@
select *
from final
"""

multi_sm_schema_yml = """
models:
- name: fct_revenue
description: This is the model fct_revenue.
semantic_models:
- name: revenue
description: This is the first semantic model.
model: ref('fct_revenue')
defaults:
agg_time_dimension: ds
measures:
- name: txn_revenue
expr: revenue
agg: sum
agg_time_dimension: ds
create_metric: true
- name: sum_of_things
expr: 2
agg: sum
agg_time_dimension: ds
dimensions:
- name: ds
type: time
expr: created_at
type_params:
time_granularity: day
entities:
- name: user
type: foreign
expr: user_id
- name: id
type: primary
- name: alt_revenue
description: This is the second revenue semantic model.
model: ref('fct_revenue')
defaults:
agg_time_dimension: ads
measures:
- name: alt_txn_revenue
expr: revenue
agg: sum
agg_time_dimension: ads
create_metric: true
- name: alt_sum_of_things
expr: 2
agg: sum
agg_time_dimension: ads
dimensions:
- name: ads
type: time
expr: created_at
type_params:
time_granularity: day
entities:
- name: user
type: foreign
expr: user_id
- name: id
type: primary
metrics:
- name: simple_metric
label: Simple Metric
type: simple
type_params:
measure: sum_of_things
"""
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
import pytest

from dbt.contracts.graph.manifest import Manifest
from dbt.tests.util import write_file
from dbt.tests.util import run_dbt, write_file
from dbt_common.events.base_types import BaseEvent
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
from tests.functional.assertions.test_runner import dbtTestRunner
from tests.functional.semantic_models.fixtures import (
fct_revenue_sql,
metricflow_time_spine_sql,
multi_sm_schema_yml,
schema_without_semantic_model_yml,
schema_yml,
)
Expand Down Expand Up @@ -146,3 +147,28 @@ def test_semantic_model_flipping_create_metric_partial_parsing(self, project):
# Verify the metric originally created by `create_metric: true` was removed
metric = result.result.metrics[generated_metric]
assert metric.name == "txn_revenue"


class TestSemanticModelPartialParsingGeneratedMetrics:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice re-pro case!!!

@pytest.fixture(scope="class")
def models(self):
return {
"schema.yml": multi_sm_schema_yml,
"fct_revenue.sql": fct_revenue_sql,
"metricflow_time_spine.sql": metricflow_time_spine_sql,
}

def test_generated_metrics(self, project):
manifest = run_dbt(["parse"])
expected = {
"metric.test.simple_metric",
"metric.test.txn_revenue",
"metric.test.alt_txn_revenue",
}
assert set(manifest.metrics.keys()) == expected

# change description of 'revenue' semantic model
modified_schema_yml = multi_sm_schema_yml.replace("first", "FIRST")
write_file(modified_schema_yml, project.project_root, "models", "schema.yml")
manifest = run_dbt(["parse"])
assert set(manifest.metrics.keys()) == expected
Loading
Loading