Skip to content

Commit

Permalink
Clean up FilterLinkableInstancesWithLeadingLink logic and simplify co…
Browse files Browse the repository at this point in the history
…mments
  • Loading branch information
courtneyholcomb committed Nov 2, 2024
1 parent 7ae25ac commit 7dc4e60
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 32 deletions.
42 changes: 17 additions & 25 deletions metricflow/plan_conversion/dataflow_to_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,25 +496,17 @@ def visit_join_on_entities_node(self, node: JoinOnEntitiesNode) -> SqlDataSet:
sql_join_descs.append(sql_join_desc)

if join_on_entity:
# Remove the linkable instances with the join_on_entity as the leading link as the next step adds the
# link. This is to avoid cases where there is a primary entity and a dimension in the data set, and we
# create an instance in the next step that has the same entity link.
# e.g. a data set has the dimension "listing__country_latest" and "listing" is a primary entity in the
# data set. The next step would create an instance like "listing__listing__country_latest" without this
# filter.
# TODO: test if this transformation is necessary and remove it if not. This adds a lot of clutter to the function.
right_instance_set_filtered = FilterLinkableInstancesWithLeadingLink(join_on_entity).transform(
right_data_set.instance_set
)

# After the right data set is joined, we need to change the links to indicate that they a join was used to
# satisfy them. For example, if the right dataset contains the "country" dimension, and "user_id" is the
# join_on_entity, then the joined data set should have the "user__country" dimension.
# Remove any instances that already have the join_on_entity as the leading link. This will prevent a duplicate
# entity link when we add it in the next step.
right_instance_set_filtered = FilterLinkableInstancesWithLeadingLink(
join_on_entity.reference
).transform(right_data_set.instance_set)

# After the right data set is joined, update the entity links to indicate that joining on the entity was
# required to reach the spec. If the "country" dimension was joined and "user_id" is the join_on_entity,
# then the joined data set should have the "user__country" dimension.
new_instances: Tuple[MdoInstance, ...] = ()
for original_instance in right_instance_set_filtered.linkable_instances:
# Is this necessary? Does it even work? i.e. diff types here
if original_instance.spec == join_on_entity:
continue
new_instance = original_instance.with_entity_prefix(
join_on_entity.reference, column_association_resolver=self._column_association_resolver
)
Expand Down Expand Up @@ -1167,9 +1159,9 @@ def visit_metric_time_dimension_transform_node(self, node: MetricTimeDimensionTr
spec=metric_time_dimension_spec,
)
)
output_column_to_input_column[metric_time_dimension_column_association.column_name] = (
matching_time_dimension_instance.associated_column.column_name
)
output_column_to_input_column[
metric_time_dimension_column_association.column_name
] = matching_time_dimension_instance.associated_column.column_name

output_instance_set = InstanceSet(
measure_instances=tuple(output_measure_instances),
Expand Down Expand Up @@ -1406,11 +1398,11 @@ def visit_join_to_time_spine_node(self, node: JoinToTimeSpineNode) -> SqlDataSet
f"indicates it may have been configured incorrectly. Expected: {agg_time_dim_for_join_with_base_grain};"
f" Got: {[instance.spec for instance in time_spine_dataset.instance_set.time_dimension_instances]}"
)
time_spine_column_select_expr: Union[SqlColumnReferenceExpression, SqlDateTruncExpression] = (
SqlColumnReferenceExpression.create(
SqlColumnReference(
table_alias=time_spine_alias, column_name=original_time_spine_dim_instance.spec.qualified_name
)
time_spine_column_select_expr: Union[
SqlColumnReferenceExpression, SqlDateTruncExpression
] = SqlColumnReferenceExpression.create(
SqlColumnReference(
table_alias=time_spine_alias, column_name=original_time_spine_dim_instance.spec.qualified_name
)
)

Expand Down
12 changes: 5 additions & 7 deletions metricflow/plan_conversion/instance_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from itertools import chain
from typing import Dict, List, Optional, Sequence, Tuple

from dbt_semantic_interfaces.references import MetricReference, SemanticModelReference
from dbt_semantic_interfaces.references import EntityReference, MetricReference, SemanticModelReference
from dbt_semantic_interfaces.type_enums.aggregation_type import AggregationType
from dbt_semantic_interfaces.type_enums.date_part import DatePart
from dbt_semantic_interfaces.type_enums.time_granularity import TimeGranularity
Expand All @@ -29,7 +29,6 @@
from metricflow_semantics.model.semantics.metric_lookup import MetricLookup
from metricflow_semantics.model.semantics.semantic_model_lookup import SemanticModelLookup
from metricflow_semantics.specs.column_assoc import ColumnAssociationResolver
from metricflow_semantics.specs.entity_spec import LinklessEntitySpec
from metricflow_semantics.specs.instance_spec import InstanceSpec, LinkableInstanceSpec
from metricflow_semantics.specs.measure_spec import MeasureSpec, MetricInputMeasureSpec
from metricflow_semantics.specs.spec_set import InstanceSpecSet
Expand Down Expand Up @@ -390,15 +389,14 @@ class FilterLinkableInstancesWithLeadingLink(InstanceSetTransform[InstanceSet]):
e.g. Remove "listing__country" if the specified link is "listing".
"""

def __init__(self, entity_link: LinklessEntitySpec) -> None:
def __init__(self, entity_link: EntityReference) -> None:
"""Remove elements with this link as the first element in "entity_links"."""
self._entity_link = entity_link

def _should_pass(self, linkable_spec: LinkableInstanceSpec) -> bool:
return (
len(linkable_spec.entity_links) == 0
or LinklessEntitySpec.from_reference(linkable_spec.entity_links[0]) != self._entity_link
)
if len(linkable_spec.entity_links) == 0:
return not linkable_spec.reference == self._entity_link
return linkable_spec.entity_links[0] != self._entity_link

def transform(self, instance_set: InstanceSet) -> InstanceSet: # noqa: D102
# Normal to not filter anything if the instance set has no instances with links.
Expand Down

0 comments on commit 7dc4e60

Please sign in to comment.