From c0ef72886828044c40eb9db8a140e7e8afecb2d1 Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Mon, 4 Dec 2023 13:21:42 -0500 Subject: [PATCH] fix(ingest/powerbi): Allow old parser to parse [db].[schema].[table] table references (#9360) --- .../ingestion/source/powerbi/config.py | 1 + .../source/powerbi/m_query/resolver.py | 28 +++++--- .../tests/unit/test_powerbi_parser.py | 65 +++++++++++++++++++ 3 files changed, 84 insertions(+), 10 deletions(-) create mode 100644 metadata-ingestion/tests/unit/test_powerbi_parser.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py index b8cc34c234ffa..f71afac737ca6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py @@ -314,6 +314,7 @@ class PowerBiDashboardSourceConfig( description="Configure how is ownership ingested", ) modified_since: Optional[str] = pydantic.Field( + default=None, description="Get only recently modified workspaces based on modified_since datetime '2023-02-10T00:00:00.0000000Z', excludePersonalWorkspaces and excludeInActiveWorkspaces limit to last 30 days", ) extract_dashboards: bool = pydantic.Field( diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py index e200ff41f71c2..930841f1f0df2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py @@ -617,16 +617,25 @@ def create_urn_using_old_parser( tables: List[str] = native_sql_parser.get_tables(query) - for table in tables: - schema_and_table: List[str] = table.split(".") - if len(schema_and_table) == 1: - # schema name is not present. set default schema - schema_and_table.insert(0, MSSqlDataPlatformTableCreator.DEFAULT_SCHEMA) - - qualified_table_name = ( - f"{db_name}.{schema_and_table[0]}.{schema_and_table[1]}" - ) + for parsed_table in tables: + # components: List[str] = [v.strip("[]") for v in parsed_table.split(".")] + components = [v.strip("[]") for v in parsed_table.split(".")] + if len(components) == 3: + database, schema, table = components + elif len(components) == 2: + schema, table = components + database = db_name + elif len(components) == 1: + (table,) = components + database = db_name + schema = MSSqlDataPlatformTableCreator.DEFAULT_SCHEMA + else: + logger.warning( + f"Unsupported table format found {parsed_table} in query {query}" + ) + continue + qualified_table_name = f"{database}.{schema}.{table}" urn = urn_creator( config=self.config, platform_instance_resolver=self.platform_instance_resolver, @@ -634,7 +643,6 @@ def create_urn_using_old_parser( server=server, qualified_table_name=qualified_table_name, ) - dataplatform_tables.append( DataPlatformTable( data_platform_pair=self.get_platform_pair(), diff --git a/metadata-ingestion/tests/unit/test_powerbi_parser.py b/metadata-ingestion/tests/unit/test_powerbi_parser.py new file mode 100644 index 0000000000000..e53e8d7aee16f --- /dev/null +++ b/metadata-ingestion/tests/unit/test_powerbi_parser.py @@ -0,0 +1,65 @@ +import pytest + +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.source.powerbi.config import PowerBiDashboardSourceConfig +from datahub.ingestion.source.powerbi.dataplatform_instance_resolver import ( + ResolvePlatformInstanceFromDatasetTypeMapping, +) +from datahub.ingestion.source.powerbi.m_query.resolver import ( + MSSqlDataPlatformTableCreator, +) + + +@pytest.fixture +def creator(): + config = PowerBiDashboardSourceConfig( + tenant_id="test-tenant-id", + client_id="test-client-id", + client_secret="test-client-secret", + ) + return MSSqlDataPlatformTableCreator( + ctx=PipelineContext(run_id="test-run-id"), + config=config, + platform_instance_resolver=ResolvePlatformInstanceFromDatasetTypeMapping( + config + ), + ) + + +def test_parse_three_part_table_reference(creator): + v = creator.create_urn_using_old_parser( + "SELECT * FROM [dwhdbt].[dbo2].[my_table] where oper_day_date > getdate() - 5", + db_name="default_db", + server="server", + ) + assert len(v) == 1 + assert ( + v[0].urn + == "urn:li:dataset:(urn:li:dataPlatform:mssql,dwhdbt.dbo2.my_table,PROD)" + ) + + +def test_parse_two_part_table_reference(creator): + v = creator.create_urn_using_old_parser( + "SELECT * FROM my_schema.my_table", + db_name="default_db", + server="server", + ) + assert len(v) == 1 + assert ( + v[0].urn + == "urn:li:dataset:(urn:li:dataPlatform:mssql,default_db.my_schema.my_table,PROD)" + ) + + +def test_parse_one_part_table_reference(creator): + v = creator.create_urn_using_old_parser( + "SELECT * FROM my_table", + db_name="default_db", + server="server", + ) + assert len(v) == 1 + assert ( + v[0].urn + == "urn:li:dataset:(urn:li:dataPlatform:mssql,default_db.dbo.my_table,PROD)" + )