From b7958c4a9105357b75925a5fc71262fe8445d443 Mon Sep 17 00:00:00 2001 From: skrydal Date: Tue, 22 Oct 2024 21:54:30 +0200 Subject: [PATCH] fix(ingest/bigquery): Fix tags urn/name ingestion for BigQuery (#11691) --- .../source/bigquery_v2/bigquery_schema_gen.py | 2 +- .../bigquery_v2/bigquery_mcp_golden.json | 94 +++++++++++++++++++ .../integration/bigquery_v2/test_bigquery.py | 6 +- 3 files changed, 100 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py index 1235f638f68ff7..f53642d1fead24 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py @@ -310,7 +310,7 @@ def gen_dataset_containers( logger.warning( f"Failed to generate platform resource for label {k}:{v}: {e}" ) - tags_joined.append(tag_urn.urn()) + tags_joined.append(tag_urn.name) database_container_key = self.gen_project_id_key(database=project_id) diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json index 640ee1bf436b03..5e091596cc0f72 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json @@ -112,6 +112,26 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:priority:medium:test" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:8df46c5e3ded05a3122b0015822c0ef0", @@ -257,6 +277,64 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:7fbbf79fb726422dc2434222a8e30630", + "changeType": "UPSERT", + "aspectName": "platformResourceInfo", + "aspect": { + "json": { + "resourceType": "BigQueryLabelInfo", + "primaryKey": "priority/medium:test", + "secondaryKeys": [ + "urn:li:tag:priority:medium:test" + ], + "value": { + "blob": "{\"datahub_urn\": \"urn:li:tag:priority:medium:test\", \"managed_by_datahub\": false, \"key\": \"priority\", \"value\": \"medium:test\"}", + "contentType": "JSON", + "schemaType": "JSON", + "schemaRef": "BigQueryLabelInfo" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-2j2qqv", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:7fbbf79fb726422dc2434222a8e30630", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bigquery" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-2j2qqv", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "platformResource", + "entityUrn": "urn:li:platformResource:7fbbf79fb726422dc2434222a8e30630", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00-2j2qqv", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "platformResource", "entityUrn": "urn:li:platformResource:99b34051bd90d28d922b0e107277a916", @@ -1241,6 +1319,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:priority:medium:test", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "priority:medium:test" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:purchase", diff --git a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py index 39cefcb42f360b..1f146886361617 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py +++ b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py @@ -70,6 +70,7 @@ def recipe(mcp_output_path: str, source_config_override: dict = {}) -> dict: "include_table_lineage": True, "include_data_platform_instance": True, "capture_table_label_as_tag": True, + "capture_dataset_label_as_tag": True, "classification": ClassificationConfig( enabled=True, classifiers=[ @@ -141,7 +142,10 @@ def side_effect(*args: Any) -> Optional[PlatformResource]: get_platform_resource.side_effect = side_effect get_datasets_for_project_id.return_value = [ - BigqueryDataset(name=dataset_name, location="US") + # BigqueryDataset(name=dataset_name, location="US") + BigqueryDataset( + name=dataset_name, location="US", labels={"priority": "medium:test"} + ) ] table_list_item = TableListItem(