From 5c06f7a245356759470391f770ffb62b4b738042 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Tue, 2 Apr 2024 22:13:05 +0200 Subject: [PATCH] fix(ingest/bigquery): Supporting lineage extraction in case the select query result's target table is set on job (#10191) Co-authored-by: Harshal Sheth --- .../datahub/ingestion/source/bigquery_v2/lineage.py | 11 ++++++++++- .../tests/unit/test_bigquery_lineage.py | 2 +- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index 764f4f2449924..0d205679a8bf3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -752,8 +752,17 @@ def _create_lineage_map( # Try the sql parser first. if self.config.lineage_use_sql_parser: + if e.statementType == "SELECT": + # We wrap select statements in a CTE to make them parseable as insert statement. + # This is a workaround for the sql parser to support the case where the user runs a query and inserts the result into a table.. + query = f"""create table `{destination_table.table_identifier.get_table_name()}` AS + ( + {e.query} + )""" + else: + query = e.query raw_lineage = sqlglot_lineage( - e.query, + query, schema_resolver=sql_parser_schema_resolver, default_db=e.project_id, ) diff --git a/metadata-ingestion/tests/unit/test_bigquery_lineage.py b/metadata-ingestion/tests/unit/test_bigquery_lineage.py index 1edac3fde0a6c..5d8c040b4123b 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_lineage.py +++ b/metadata-ingestion/tests/unit/test_bigquery_lineage.py @@ -28,7 +28,7 @@ def lineage_entries() -> List[QueryEvent]: SELECT first.a, second.b FROM `my_project.my_dataset.my_source_table1` first LEFT JOIN `my_project.my_dataset.my_source_table2` second ON first.id = second.id """, - statementType="SELECT", + statementType="INSERT", project_id="proj_12344", end_time=None, referencedTables=[