From 438007b73d2282aa04cfcf327963e701209b9c6a Mon Sep 17 00:00:00 2001 From: tlento Date: Fri, 1 Mar 2024 17:50:37 -0800 Subject: [PATCH] Remove unsupported legacy MetricFlow Client API In the original version of MetricFlow we supported a Python client for programmatically issuing queries. This was a thin wrapper over our internal APIs for running queries against SQL engines. Since the acquisition of Transform by dbt Labs the direction of MetricFlow has shifted heavily (and appropriately) in the direction of being a support library for building and rendering metric queries against target query dialects (currently all SQL). This legacy public API has not been maintained at all - it uses legacy string-typed inputs, it does not support saved queries, and it allows for a highly limited scope set of operations. Furthermore, it is in a package directory path that indicates we have a public API. Since we do not, in fact, have a supported public API just yet, it behooves us to remove this in order to unblock some efforts to simplify our internals and move us closer to the point where we can build out a publicly supported API, whenever that time may be. --- metricflow/__init__.py | 1 - metricflow/api/__init__.py | 0 metricflow/api/metricflow_client.py | 224 ------------------ metricflow/test/api/__init__.py | 0 metricflow/test/api/conftest.py | 20 -- metricflow/test/api/test_metricflow_client.py | 131 ---------- 6 files changed, 376 deletions(-) delete mode 100644 metricflow/api/__init__.py delete mode 100644 metricflow/api/metricflow_client.py delete mode 100644 metricflow/test/api/__init__.py delete mode 100644 metricflow/test/api/conftest.py delete mode 100644 metricflow/test/api/test_metricflow_client.py diff --git a/metricflow/__init__.py b/metricflow/__init__.py index 8b0f2ca071..e69de29bb2 100644 --- a/metricflow/__init__.py +++ b/metricflow/__init__.py @@ -1 +0,0 @@ -from metricflow.api.metricflow_client import MetricFlowClient # noqa: D diff --git a/metricflow/api/__init__.py b/metricflow/api/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/metricflow/api/metricflow_client.py b/metricflow/api/metricflow_client.py deleted file mode 100644 index 51bbe14145..0000000000 --- a/metricflow/api/metricflow_client.py +++ /dev/null @@ -1,224 +0,0 @@ -from __future__ import annotations - -import datetime as dt -import logging -from typing import Dict, List, Optional - -from dateutil.parser import parse -from dbt_semantic_interfaces.protocols.semantic_manifest import SemanticManifest -from dbt_semantic_interfaces.validations.semantic_manifest_validator import SemanticManifestValidator -from dbt_semantic_interfaces.validations.validator_helpers import SemanticManifestValidationResults - -from metricflow.engine.metricflow_engine import ( - MetricFlowEngine, - MetricFlowExplainResult, - MetricFlowQueryRequest, - MetricFlowQueryResult, -) -from metricflow.engine.models import Dimension, Metric -from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup -from metricflow.protocols.sql_client import SqlClient -from metricflow.sql.optimizer.optimization_levels import SqlQueryOptimizationLevel - -logger = logging.getLogger(__name__) - - -class MetricFlowClient: - """MetricFlow Python client for running basic queries and other standard commands.""" - - def __init__( - self, - sql_client: SqlClient, - semantic_manifest: SemanticManifest, - ): - """Initializer for MetricFlowClient. - - Args: - sql_client: Client that is connected to your data warehouse. - semantic_manifest: Model containing all the information about your metric configs. - """ - self.sql_client = sql_client - self.semantic_manifest = semantic_manifest - self.semantic_manifest_lookup = SemanticManifestLookup(self.semantic_manifest) - self.engine = MetricFlowEngine( - semantic_manifest_lookup=self.semantic_manifest_lookup, - sql_client=self.sql_client, - ) - - def _create_mf_request( - self, - metrics: List[str], - dimensions: List[str] = [], - limit: Optional[int] = None, - start_time: Optional[str] = None, - end_time: Optional[str] = None, - where: Optional[str] = None, - order: Optional[List[str]] = None, - as_table: Optional[str] = None, - sql_optimization_level: int = 4, - ) -> MetricFlowQueryRequest: - """Build MetricFlowQueryRequest given common query parameters.""" - parsed_optimization_level = SqlQueryOptimizationLevel(f"O{sql_optimization_level}") - parsed_start_time = _convert_to_datetime(start_time) - parsed_end_time = _convert_to_datetime(end_time) - return MetricFlowQueryRequest.create_with_random_request_id( - metric_names=metrics, - group_by_names=dimensions, - limit=limit, - time_constraint_start=parsed_start_time, - time_constraint_end=parsed_end_time, - where_constraint=where, - order_by_names=order, - output_table=as_table, - sql_optimization_level=parsed_optimization_level, - ) - - def query( - self, - metrics: List[str], - dimensions: List[str] = [], - limit: Optional[int] = None, - start_time: Optional[str] = None, - end_time: Optional[str] = None, - where: Optional[str] = None, - order: Optional[List[str]] = None, - as_table: Optional[str] = None, - sql_optimization_level: int = 4, - ) -> MetricFlowQueryResult: - """Makes a query for a metric. - - Args: - metrics: Names of the metrics to query. - dimensions: Names of the dimensions and entities to query. - limit: Limit the result to this many rows. - start_time: Get data for the start of this time range. - end_time: Get data for the end of this time range. - where: A SQL string using group by names that can be used like a where clause on the output data. - order: metric and group by names to order by. A "-" can be used to specify reverse order e.g. "-ds" - as_table: If specified, output the result data to this table instead of a result dataframe. - sql_optimization_level: The level of optimization for the generated SQL. Pass integer from 0-4. - - Returns: - MetricFlowQueryResult that contains the result and context of the query. - """ - mf_request = self._create_mf_request( - metrics=metrics, - dimensions=dimensions, - limit=limit, - start_time=start_time, - end_time=end_time, - where=where, - order=order, - as_table=as_table, - sql_optimization_level=sql_optimization_level, - ) - return self.engine.query(mf_request=mf_request) - - def explain( - self, - metrics: List[str], - dimensions: List[str] = [], - limit: Optional[int] = None, - start_time: Optional[str] = None, - end_time: Optional[str] = None, - where: Optional[str] = None, - order: Optional[List[str]] = None, - as_table: Optional[str] = None, - sql_optimization_level: int = 4, - ) -> MetricFlowExplainResult: - """Returns the plan for resolving a query. - - Args: - metrics: Names of the metrics to query. - dimensions: Names of the dimensions and entities to query. - limit: Limit the result to this many rows. - start_time: Get data for the start of this time range. - end_time: Get data for the end of this time range. - where: A SQL string using group by names that can be used like a where clause on the output data. - order: metric and group by names to order by. A "-" can be used to specify reverse order e.g. "-ds" - as_table: If specified, output the result data to this table instead of a result dataframe. - sql_optimization_level: The level of optimization for the generated SQL. Pass integer from 0-4. - - Returns: - MetricFlowExplainResult that contains the context of the query. - """ - mf_request = self._create_mf_request( - metrics=metrics, - dimensions=dimensions, - limit=limit, - start_time=start_time, - end_time=end_time, - where=where, - order=order, - as_table=as_table, - sql_optimization_level=sql_optimization_level, - ) - return self.engine.explain(mf_request=mf_request) - - def list_metrics(self) -> Dict[str, Metric]: - """Retrieves a list of metric names. - - Returns: - A dictionary with metric names as the key and the corresponding Metric object as the value. - """ - return {m.name: m for m in self.engine.list_metrics()} - - def list_dimensions(self, metric_names: List[str]) -> List[Dimension]: - """Retrieves a list of all common dimensions for metric_names. - - "simple" dimensions are the ones that people expect from a UI perspective. For example, if "ds" is a time - dimension at a day granularity, this would not list "ds__week". - - Args: - metric_names: Names of metrics to get common dimensions from. - - Returns: - A list of Dimension objects containing metadata. - """ - return self.engine.simple_dimensions_for_metrics(metric_names=metric_names) - - def get_dimension_values( - self, - metric_names: List[str], - dimension_name: str, - start_time: Optional[str] = None, - end_time: Optional[str] = None, - ) -> List[str]: - """Retrieves a list of dimension values given a [metric_name, dimension_name]. - - Args: - metric_names: Names of metrics that contain the group_by. - dimension_name: Name of group_by to get values from. - start_time: Get data for the start of this time range. - end_time: Get data for the end of this time range. - - Returns: - A list of dimension values as string. - """ - parsed_start_time = _convert_to_datetime(start_time) - parsed_end_time = _convert_to_datetime(end_time) - return self.engine.get_dimension_values( - metric_names=metric_names, - get_group_by_values=dimension_name, - time_constraint_start=parsed_start_time, - time_constraint_end=parsed_end_time, - ) - - def validate_configs(self) -> SemanticManifestValidationResults: - """Validate a model according to configured rules. - - Returns: - Tuple of validation issues with the model provided. - """ - return SemanticManifestValidator[SemanticManifest]().validate_semantic_manifest(self.semantic_manifest) - - -def _convert_to_datetime(datetime_str: Optional[str]) -> Optional[dt.datetime]: - """Callback to convert string to datetime given as an iso8601 timestamp.""" - if datetime_str is None: - return None - - try: - return parse(datetime_str) - except Exception: - raise ValueError(f"'{datetime_str}' is not a valid iso8601 timestamp") diff --git a/metricflow/test/api/__init__.py b/metricflow/test/api/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/metricflow/test/api/conftest.py b/metricflow/test/api/conftest.py deleted file mode 100644 index 1916eaee94..0000000000 --- a/metricflow/test/api/conftest.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import annotations - -import pytest - -from metricflow.api.metricflow_client import MetricFlowClient -from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup -from metricflow.protocols.sql_client import SqlClient - - -@pytest.fixture -def mf_client( - create_source_tables: bool, - sql_client: SqlClient, - simple_semantic_manifest_lookup: SemanticManifestLookup, -) -> MetricFlowClient: - """Fixture for MetricFlowClient.""" - return MetricFlowClient( - sql_client=sql_client, - semantic_manifest=simple_semantic_manifest_lookup.semantic_manifest, - ) diff --git a/metricflow/test/api/test_metricflow_client.py b/metricflow/test/api/test_metricflow_client.py deleted file mode 100644 index a06d268a4e..0000000000 --- a/metricflow/test/api/test_metricflow_client.py +++ /dev/null @@ -1,131 +0,0 @@ -from __future__ import annotations - -from dbt_semantic_interfaces.validations.validator_helpers import SemanticManifestValidationResults - -from metricflow.api.metricflow_client import MetricFlowClient -from metricflow.engine.models import Dimension, Metric -from metricflow.random_id import random_id -from metricflow.sql.sql_table import SqlTable -from metricflow.test.fixtures.setup_fixtures import MetricFlowTestConfiguration - - -def test_query(mf_client: MetricFlowClient, mf_test_configuration: MetricFlowTestConfiguration) -> None: # noqa: D - result = mf_client.query( - ["bookings"], - ["metric_time"], - limit=2, - start_time="2019-01-01", - end_time="2024-01-01", - ) - assert result.query_spec - assert result.dataflow_plan - assert result.sql - assert result.result_df is not None - assert len(result.result_df) == 2 - assert result.result_table is None - - output_table = SqlTable(schema_name=mf_test_configuration.mf_system_schema, table_name=f"test_table_{random_id()}") - result = mf_client.query( - ["bookings"], - ["metric_time"], - limit=2, - start_time="2019-01-01", - end_time="2024-01-01", - as_table=output_table.sql, - ) - assert result.query_spec - assert result.dataflow_plan - assert result.sql - assert result.result_df is None - assert result.result_table == output_table - - -def test_explain(mf_client: MetricFlowClient, mf_test_configuration: MetricFlowTestConfiguration) -> None: # noqa: D - result = mf_client.explain( - ["bookings"], - ["metric_time"], - limit=2, - start_time="2019-01-01", - end_time="2024-01-01", - ) - assert result.query_spec - assert result.dataflow_plan - assert result.execution_plan - assert result.output_table is None - - output_table = SqlTable(schema_name=mf_test_configuration.mf_system_schema, table_name=f"test_table_{random_id()}") - result = mf_client.explain( - ["bookings"], - ["metric_time"], - limit=2, - start_time="2019-01-01", - end_time="2024-01-01", - as_table=output_table.sql, - ) - assert result.query_spec - assert result.dataflow_plan - assert result.execution_plan - assert result.output_table == output_table - - -def test_list_metrics(mf_client: MetricFlowClient) -> None: # noqa: D - metrics = mf_client.list_metrics() - assert metrics - - metric_name, metric_obj = next(iter(metrics.items())) - - assert metric_name == metric_obj.name - assert isinstance(metric_obj, Metric) - - assert metric_obj.dimensions - assert isinstance(metric_obj.dimensions[0], Dimension) - - -def test_list_dimensions(mf_client: MetricFlowClient) -> None: # noqa: D - dimensions = mf_client.list_dimensions(["bookings"]) - - assert dimensions - assert isinstance(dimensions[0], Dimension) - - dimensions = mf_client.list_dimensions(["bookings", "revenue"]) - - assert len(dimensions) == 2 - assert tuple(dim.name for dim in dimensions) == ("metric_time", "metric_time") - - -def test_get_measures_for_metrics(mf_client: MetricFlowClient) -> None: # noqa: D - measures = mf_client.engine.get_measures_for_metrics(["bookings"]) - assert len(measures) == 1 - measure = measures[0] - assert measure.name == "bookings" - assert measure.agg_time_dimension == "ds" - - # Multiple metrics - measures = mf_client.engine.get_measures_for_metrics(["bookings", "revenue"]) - assert len(measures) == 2 - assert {measure.name for measure in measures} == {"bookings", "txn_revenue"} - assert {measure.agg_time_dimension for measure in measures} == {"ds"} - - # Derived metric with multiple metric inputs - measures = mf_client.engine.get_measures_for_metrics(["views_times_booking_value"]) - assert len(measures) == 2 - assert {measure.name for measure in measures} == {"views", "booking_value"} - assert {measure.agg_time_dimension for measure in measures} == {"ds"} - - # Ratio metric with multiple measure inputs - measures = mf_client.engine.get_measures_for_metrics(["bookings_per_booker"]) - assert len(measures) == 2 - assert {measure.name for measure in measures} == {"bookings", "bookers"} - assert {measure.agg_time_dimension for measure in measures} == {"ds"} - - -def test_get_dimension_values(mf_client: MetricFlowClient) -> None: # noqa: D - dim_vals = mf_client.get_dimension_values( - ["bookings"], "metric_time", start_time="2020-01-01", end_time="2024-01-01" - ) - assert dim_vals - - -def test_validate_configs(mf_client: MetricFlowClient) -> None: # noqa: D - issues = mf_client.validate_configs() - assert isinstance(issues, SemanticManifestValidationResults)