Skip to content

Commit

Permalink
fix(ingest/bigquery): Supporting lineage extraction in case the selec…
Browse files Browse the repository at this point in the history
…t query result's target table is set on job (datahub-project#10191)

Co-authored-by: Harshal Sheth <[email protected]>
  • Loading branch information
treff7es and hsheth2 authored Apr 2, 2024
1 parent 2873736 commit 5c06f7a
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -752,8 +752,17 @@ def _create_lineage_map(

# Try the sql parser first.
if self.config.lineage_use_sql_parser:
if e.statementType == "SELECT":
# We wrap select statements in a CTE to make them parseable as insert statement.
# This is a workaround for the sql parser to support the case where the user runs a query and inserts the result into a table..
query = f"""create table `{destination_table.table_identifier.get_table_name()}` AS
(
{e.query}
)"""
else:
query = e.query
raw_lineage = sqlglot_lineage(
e.query,
query,
schema_resolver=sql_parser_schema_resolver,
default_db=e.project_id,
)
Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion/tests/unit/test_bigquery_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def lineage_entries() -> List[QueryEvent]:
SELECT first.a, second.b FROM `my_project.my_dataset.my_source_table1` first
LEFT JOIN `my_project.my_dataset.my_source_table2` second ON first.id = second.id
""",
statementType="SELECT",
statementType="INSERT",
project_id="proj_12344",
end_time=None,
referencedTables=[
Expand Down

0 comments on commit 5c06f7a

Please sign in to comment.