From ba9bfa0613f031080c14e29dcabdf3ffb088276f Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 25 Sep 2024 13:09:05 -0700 Subject: [PATCH 1/5] feat(ingest/dbt): speed up dbt CLL with node_name_patterns (#11450) --- .../ingestion/source/dbt/dbt_common.py | 58 +++++++++++++++++-- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index acbfd65f67d302..477ce183f6fcb2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -5,7 +5,7 @@ from dataclasses import dataclass, field from datetime import datetime from enum import auto -from typing import Any, Dict, Iterable, List, Optional, Tuple +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple import more_itertools import pydantic @@ -46,6 +46,7 @@ from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.source_helpers import auto_workunit from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.source.dbt.dbt_tests import ( DBTTest, DBTTestResult, @@ -1024,12 +1025,15 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: all_nodes_map, ) + def _is_allowed_node(self, key: str) -> bool: + return self.config.node_name_pattern.allowed(key) + def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]: nodes = [] for node in all_nodes: key = node.dbt_name - if not self.config.node_name_pattern.allowed(key): + if not self._is_allowed_node(key): self.report.nodes_filtered.append(key) continue @@ -1041,6 +1045,36 @@ def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]: def _to_schema_info(schema_fields: List[SchemaField]) -> SchemaInfo: return {column.fieldPath: column.nativeDataType for column in schema_fields} + def _determine_cll_required_nodes( + self, all_nodes_map: Dict[str, DBTNode] + ) -> Tuple[Set[str], Set[str]]: + # Based on the filter patterns, we only need to do schema inference and CLL + # for a subset of nodes. + # If a node depends on an ephemeral model, the ephemeral model should also be in the CLL list. + # Invariant: If it's in the CLL list, it will also be in the schema list. + # Invariant: The upstream of any node in the CLL list will be in the schema list. + schema_nodes: Set[str] = set() + cll_nodes: Set[str] = set() + + def add_node_to_cll_list(dbt_name: str) -> None: + if dbt_name in cll_nodes: + return + for upstream in all_nodes_map[dbt_name].upstream_nodes: + schema_nodes.add(upstream) + + upstream_node = all_nodes_map[upstream] + if upstream_node.is_ephemeral_model(): + add_node_to_cll_list(upstream) + + cll_nodes.add(dbt_name) + schema_nodes.add(dbt_name) + + for dbt_name in all_nodes_map.keys(): + if self._is_allowed_node(dbt_name): + add_node_to_cll_list(dbt_name) + + return schema_nodes, cll_nodes + def _infer_schemas_and_update_cll( # noqa: C901 self, all_nodes_map: Dict[str, DBTNode] ) -> None: @@ -1067,7 +1101,7 @@ def _infer_schemas_and_update_cll( # noqa: C901 ) return - graph = self.ctx.graph + graph: Optional[DataHubGraph] = self.ctx.graph schema_resolver = SchemaResolver( platform=self.config.target_platform, @@ -1079,7 +1113,7 @@ def _infer_schemas_and_update_cll( # noqa: C901 # Iterate over the dbt nodes in topological order. # This ensures that we process upstream nodes before downstream nodes. - node_order = topological_sort( + all_node_order = topological_sort( list(all_nodes_map.keys()), edges=list( (upstream, node.dbt_name) @@ -1088,7 +1122,17 @@ def _infer_schemas_and_update_cll( # noqa: C901 if upstream in all_nodes_map ), ) - for dbt_name in node_order: + schema_required_nodes, cll_required_nodes = self._determine_cll_required_nodes( + all_nodes_map + ) + + for dbt_name in all_node_order: + if dbt_name not in schema_required_nodes: + logger.debug( + f"Skipping {dbt_name} because it is filtered out by patterns" + ) + continue + node = all_nodes_map[dbt_name] logger.debug(f"Processing CLL/schemas for {node.dbt_name}") @@ -1163,6 +1207,10 @@ def _infer_schemas_and_update_cll( # noqa: C901 # For sources, we generate CLL as a 1:1 mapping. # We don't support CLL for tests (assertions) or seeds. pass + elif node.dbt_name not in cll_required_nodes: + logger.debug( + f"Not generating CLL for {node.dbt_name} because we don't need it." + ) elif node.compiled_code: # Add CTE stops based on the upstreams list. cte_mapping = { From d754650875df80c43c44f33cc49a749218cb5b23 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 25 Sep 2024 13:10:11 -0700 Subject: [PATCH 2/5] feat(ingest/dbt): produce multiple assertions for multi-table dbt tests (#11451) --- .../ingestion/source/dbt/dbt_common.py | 104 +++++---- .../dbt/dbt_test_events_golden.json | 210 ++++++++++++------ ...test_prefer_sql_parser_lineage_golden.json | 78 +++++-- ...bt_test_test_model_performance_golden.json | 142 ++++++++---- .../tests/integration/dbt/test_dbt.py | 4 +- 5 files changed, 364 insertions(+), 174 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 477ce183f6fcb2..c15f1deb43a3af 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -234,6 +234,10 @@ def can_emit_node_type(self, node_type: str) -> bool: return allowed == EmitDirective.YES + @property + def can_emit_test_definitions(self) -> bool: + return self.test_definitions == EmitDirective.YES + @property def can_emit_test_results(self) -> bool: return self.test_results == EmitDirective.YES @@ -736,8 +740,8 @@ def get_upstreams_for_test( all_nodes_map: Dict[str, DBTNode], platform_instance: Optional[str], environment: str, -) -> List[str]: - upstream_urns = [] +) -> Dict[str, str]: + upstreams = {} for upstream in test_node.upstream_nodes: if upstream not in all_nodes_map: @@ -748,15 +752,13 @@ def get_upstreams_for_test( upstream_manifest_node = all_nodes_map[upstream] - upstream_urns.append( - upstream_manifest_node.get_urn( - target_platform=DBT_PLATFORM, - data_platform_instance=platform_instance, - env=environment, - ) + upstreams[upstream] = upstream_manifest_node.get_urn( + target_platform=DBT_PLATFORM, + data_platform_instance=platform_instance, + env=environment, ) - return upstream_urns + return upstreams def make_mapping_upstream_lineage( @@ -893,40 +895,11 @@ def __init__(self, config: DBTCommonConfig, ctx: PipelineContext, platform: str) def create_test_entity_mcps( self, test_nodes: List[DBTNode], - custom_props: Dict[str, str], + extra_custom_props: Dict[str, str], all_nodes_map: Dict[str, DBTNode], ) -> Iterable[MetadataWorkUnit]: for node in sorted(test_nodes, key=lambda n: n.dbt_name): - assertion_urn = mce_builder.make_assertion_urn( - mce_builder.datahub_guid( - { - k: v - for k, v in { - "platform": DBT_PLATFORM, - "name": node.dbt_name, - "instance": self.config.platform_instance, - **( - # Ideally we'd include the env unconditionally. However, we started out - # not including env in the guid, so we need to maintain backwards compatibility - # with existing PROD assertions. - {"env": self.config.env} - if self.config.env != mce_builder.DEFAULT_ENV - and self.config.include_env_in_assertion_guid - else {} - ), - }.items() - if v is not None - } - ) - ) - - if self.config.entities_enabled.can_emit_node_type("test"): - yield MetadataChangeProposalWrapper( - entityUrn=assertion_urn, - aspect=self._make_data_platform_instance_aspect(), - ).as_workunit() - - upstream_urns = get_upstreams_for_test( + upstreams = get_upstreams_for_test( test_node=node, all_nodes_map=all_nodes_map, platform_instance=self.config.platform_instance, @@ -934,12 +907,51 @@ def create_test_entity_mcps( ) # In case a dbt test depends on multiple tables, we create separate assertions for each. - # TODO: This logic doesn't actually work properly, since we're reusing the same assertion_urn - # across multiple upstream tables, so we're actually only creating one assertion and the last - # upstream_urn gets used. Luckily, most dbt tests are associated with a single table, so this - # doesn't cause major issues in practice. - for upstream_urn in sorted(upstream_urns): - if self.config.entities_enabled.can_emit_node_type("test"): + for upstream_node_name, upstream_urn in upstreams.items(): + guid_upstream_part = {} + if len(upstreams) > 1: + # If we depend on multiple upstreams, we need to generate a unique guid for each assertion. + # If there was only one upstream, we want to maintain the original assertion for backwards compatibility. + guid_upstream_part = { + "on_dbt_upstream": upstream_node_name, + } + + assertion_urn = mce_builder.make_assertion_urn( + mce_builder.datahub_guid( + { + k: v + for k, v in { + "platform": DBT_PLATFORM, + "name": node.dbt_name, + "instance": self.config.platform_instance, + **( + # Ideally we'd include the env unconditionally. However, we started out + # not including env in the guid, so we need to maintain backwards compatibility + # with existing PROD assertions. + {"env": self.config.env} + if self.config.env != mce_builder.DEFAULT_ENV + and self.config.include_env_in_assertion_guid + else {} + ), + **guid_upstream_part, + }.items() + if v is not None + } + ) + ) + + custom_props = { + "dbt_unique_id": node.dbt_name, + "dbt_test_upstream_unique_id": upstream_node_name, + **extra_custom_props, + } + + if self.config.entities_enabled.can_emit_test_definitions: + yield MetadataChangeProposalWrapper( + entityUrn=assertion_urn, + aspect=self._make_data_platform_instance_aspect(), + ).as_workunit() + yield make_assertion_from_test( custom_props, node, diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json index 91095966eddd12..56e745d4f9acae 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json @@ -1886,6 +1886,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.accepted_values_orders_status__placed__shipped__completed__return_pending__returned.be6b5b5ec3", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -1939,8 +1941,8 @@ }, "assertionUrn": "urn:li:assertion:b052a324c05327985f3b579a19ad7579", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -1974,6 +1976,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.accepted_values_stg_orders_status__placed__shipped__completed__return_pending__returned.080fb20aad", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2027,8 +2031,8 @@ }, "assertionUrn": "urn:li:assertion:da743330013b7e3e3707ac6e526ab408", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2062,6 +2066,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.accepted_values_stg_payments_payment_method__credit_card__coupon__bank_transfer__gift_card.3c3820f278", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_payments", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2115,8 +2121,8 @@ }, "assertionUrn": "urn:li:assertion:2887b9c826e0be6296a37833bdc380bd", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2150,6 +2156,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.assert_total_payment_amount_is_positive", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2191,8 +2199,8 @@ }, "assertionUrn": "urn:li:assertion:591d8dc8939e0cf9bf0fd03264ad1a0e", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2226,6 +2234,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.dbt_expectations_expect_column_values_to_be_between_customers_customer_id__2000000__0__customer_id_is_not_null__False.e67667298f", + "dbt_test_upstream_unique_id": "model.jaffle_shop.customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2293,6 +2303,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.dbt_expectations_expect_column_values_to_be_in_set_customers_customer_id__customer_id_is_not_null__0__1__2.81450cfcd8", + "dbt_test_upstream_unique_id": "model.jaffle_shop.customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2349,8 +2361,8 @@ }, "assertionUrn": "urn:li:assertion:bf7fd2b46d2c32ee9bb036acd1559782", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2384,6 +2396,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.dbt_expectations_expect_column_values_to_not_be_in_set_orders_credit_card_amount__credit_card_amount_is_not_null__0.888b06036c", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2435,8 +2449,8 @@ }, "assertionUrn": "urn:li:assertion:1c217b7587a0cad47a07a09bfe154055", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2470,6 +2484,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_customers_customer_id.5c9bf9911d", + "dbt_test_upstream_unique_id": "model.jaffle_shop.customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2516,8 +2532,8 @@ }, "assertionUrn": "urn:li:assertion:44519aa345bf3ea896179f9f352ae946", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2551,6 +2567,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_amount.106140f9fd", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2597,8 +2615,8 @@ }, "assertionUrn": "urn:li:assertion:bbd78a070092f54313153abec49f6f31", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2632,6 +2650,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_bank_transfer_amount.7743500c49", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2678,8 +2698,8 @@ }, "assertionUrn": "urn:li:assertion:52d06197762e3608d94609e96f03a0a7", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2713,6 +2733,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_coupon_amount.ab90c90625", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2759,8 +2781,8 @@ }, "assertionUrn": "urn:li:assertion:ca065a99637630468f688717590beeab", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2794,6 +2816,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_credit_card_amount.d3ca593b59", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2840,8 +2864,8 @@ }, "assertionUrn": "urn:li:assertion:7a305acc5fc049dc9bbd141b814461d0", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2875,6 +2899,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_customer_id.c5f02694af", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2921,8 +2947,8 @@ }, "assertionUrn": "urn:li:assertion:11087a3d7ae178df22c42922ac8ef8ad", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2956,6 +2982,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_gift_card_amount.413a0d2d7a", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3002,8 +3030,8 @@ }, "assertionUrn": "urn:li:assertion:b301bb47cc4ebce4e78a194b3de11f25", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3037,6 +3065,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_order_id.cf6c17daed", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3083,8 +3113,8 @@ }, "assertionUrn": "urn:li:assertion:2e9117138dcc9facda66f1efd55a8cd7", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3118,6 +3148,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_stg_customers_customer_id.e2cfb1f9aa", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3164,8 +3196,8 @@ }, "assertionUrn": "urn:li:assertion:25ebf4faa9b1654ef54c46d975ca0a81", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3199,6 +3231,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_stg_orders_order_id.81cfe2fe64", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3245,8 +3279,8 @@ }, "assertionUrn": "urn:li:assertion:b03abcc447aac70bbebb22a8a9d7dbbe", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3280,6 +3314,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_stg_payments_payment_id.c19cc50075", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_payments", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3326,8 +3362,8 @@ }, "assertionUrn": "urn:li:assertion:c1eebc71f36690e4523adca30314e927", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3339,7 +3375,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "entityUrn": "urn:li:assertion:3191c2851901165afc07c3bd7f5f590a", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -3355,12 +3391,14 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "entityUrn": "urn:li:assertion:3191c2851901165afc07c3bd7f5f590a", "changeType": "UPSERT", "aspectName": "assertionInfo", "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.relationships_orders_customer_id__customer_id__ref_customers_.c6ec7f58f2", + "dbt_test_upstream_unique_id": "model.jaffle_shop.customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3401,7 +3439,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "entityUrn": "urn:li:assertion:3191c2851901165afc07c3bd7f5f590a", "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { @@ -3414,10 +3452,10 @@ "type": "SUCCESS", "nativeResults": {} }, - "assertionUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "assertionUrn": "urn:li:assertion:3191c2851901165afc07c3bd7f5f590a", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3429,12 +3467,30 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "entityUrn": "urn:li:assertion:0331fa8a77015ca1a48d3a2fc90948fb", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:0331fa8a77015ca1a48d3a2fc90948fb", "changeType": "UPSERT", "aspectName": "assertionInfo", "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.relationships_orders_customer_id__customer_id__ref_customers_.c6ec7f58f2", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3475,7 +3531,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "entityUrn": "urn:li:assertion:0331fa8a77015ca1a48d3a2fc90948fb", "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { @@ -3488,10 +3544,10 @@ "type": "SUCCESS", "nativeResults": {} }, - "assertionUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "assertionUrn": "urn:li:assertion:0331fa8a77015ca1a48d3a2fc90948fb", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3525,6 +3581,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.unique_customers_customer_id.c5af1ff4b1", + "dbt_test_upstream_unique_id": "model.jaffle_shop.customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3577,8 +3635,8 @@ }, "assertionUrn": "urn:li:assertion:c51ca9c4b5a1f964bef748f0b8968e71", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3612,6 +3670,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.unique_orders_order_id.fed79b3a6e", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3664,8 +3724,8 @@ }, "assertionUrn": "urn:li:assertion:caa9b8060e214cecab88a92dc39c2e60", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3699,6 +3759,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.unique_stg_customers_customer_id.c7614daada", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3751,8 +3813,8 @@ }, "assertionUrn": "urn:li:assertion:54bac90e6785bdefd8685ebf8814c429", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3786,6 +3848,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.unique_stg_orders_order_id.e3b841c71a", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3838,8 +3902,8 @@ }, "assertionUrn": "urn:li:assertion:815963e1332b46a203504ba46ebfab24", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3873,6 +3937,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.unique_stg_payments_payment_id.3744510712", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_payments", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3925,8 +3991,8 @@ }, "assertionUrn": "urn:li:assertion:fac27f352406b941125292413afa8096", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3936,6 +4002,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:0331fa8a77015ca1a48d3a2fc90948fb", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "assertion", "entityUrn": "urn:li:assertion:11087a3d7ae178df22c42922ac8ef8ad", @@ -4034,7 +4116,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:44519aa345bf3ea896179f9f352ae946", + "entityUrn": "urn:li:assertion:3191c2851901165afc07c3bd7f5f590a", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4050,7 +4132,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:52d06197762e3608d94609e96f03a0a7", + "entityUrn": "urn:li:assertion:44519aa345bf3ea896179f9f352ae946", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4066,7 +4148,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:54bac90e6785bdefd8685ebf8814c429", + "entityUrn": "urn:li:assertion:52d06197762e3608d94609e96f03a0a7", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4082,7 +4164,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:591d8dc8939e0cf9bf0fd03264ad1a0e", + "entityUrn": "urn:li:assertion:54bac90e6785bdefd8685ebf8814c429", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4098,7 +4180,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:7a305acc5fc049dc9bbd141b814461d0", + "entityUrn": "urn:li:assertion:591d8dc8939e0cf9bf0fd03264ad1a0e", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4114,7 +4196,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:815963e1332b46a203504ba46ebfab24", + "entityUrn": "urn:li:assertion:7a305acc5fc049dc9bbd141b814461d0", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4130,7 +4212,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b03abcc447aac70bbebb22a8a9d7dbbe", + "entityUrn": "urn:li:assertion:815963e1332b46a203504ba46ebfab24", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4146,7 +4228,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b052a324c05327985f3b579a19ad7579", + "entityUrn": "urn:li:assertion:b03abcc447aac70bbebb22a8a9d7dbbe", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4162,7 +4244,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "entityUrn": "urn:li:assertion:b052a324c05327985f3b579a19ad7579", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json index d2c71659706818..42a416473ae243 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json @@ -4498,6 +4498,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.assert_source_actor_last_update_is_recent", + "dbt_test_upstream_unique_id": "source.sample_dbt.pagila.actor", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -4574,6 +4576,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.is_email_monthly_billing_with_cust_email.57a935ce99", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -4656,6 +4660,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.not_null_monthly_billing_with_cust_billing_month.19ce54289b", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -4737,6 +4743,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.not_null_monthly_billing_with_cust_email.d405c2cc13", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -4796,7 +4804,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -4812,12 +4820,14 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "assertionInfo", "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.relationships_monthly_billing_with_cust_customer_id__customer_id__ref_customer_details_.653e08a90b", + "dbt_test_upstream_unique_id": "model.sample_dbt.customer_details", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -4826,10 +4836,10 @@ }, "type": "DATASET", "datasetAssertion": { - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", "scope": "DATASET_COLUMN", "fields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" ], "aggregation": "IDENTITY", "operator": "_NATIVE_", @@ -4858,20 +4868,20 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { "json": { "timestampMillis": 1663355198239, "runId": "just-some-random-id", - "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} }, - "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "assertionUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -4886,12 +4896,30 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", "changeType": "UPSERT", "aspectName": "assertionInfo", "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.relationships_monthly_billing_with_cust_customer_id__customer_id__ref_customer_details_.653e08a90b", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -4900,10 +4928,10 @@ }, "type": "DATASET", "datasetAssertion": { - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", "scope": "DATASET_COLUMN", "fields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" ], "aggregation": "IDENTITY", "operator": "_NATIVE_", @@ -4932,20 +4960,20 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { "json": { "timestampMillis": 1663355198239, "runId": "just-some-random-id", - "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} }, - "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "assertionUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -4982,6 +5010,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.source_not_null_pagila_actor_actor_id.ad63829d3e", + "dbt_test_upstream_unique_id": "source.sample_dbt.pagila.actor", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5063,6 +5093,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.source_unique_pagila_actor_actor_id.76aff1935a", + "dbt_test_upstream_unique_id": "source.sample_dbt.pagila.actor", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5128,7 +5160,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -5144,7 +5176,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -5206,6 +5238,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "assertion", "entityUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json index bec42f460e0b55..c281ea3eed0fa0 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json @@ -638,8 +638,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -659,8 +659,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1097,8 +1097,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1118,8 +1118,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1420,8 +1420,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1441,8 +1441,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1944,8 +1944,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1965,8 +1965,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -5270,6 +5270,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.assert_source_actor_last_update_is_recent", + "dbt_test_upstream_unique_id": "source.sample_dbt.pagila.actor", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5311,8 +5313,8 @@ }, "assertionUrn": "urn:li:assertion:ba2c6ba830d407d539452f4cf46c92a6", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5346,6 +5348,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.is_email_monthly_billing_with_cust_email.57a935ce99", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5393,8 +5397,8 @@ }, "assertionUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5428,6 +5432,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.not_null_monthly_billing_with_cust_billing_month.19ce54289b", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5474,8 +5480,8 @@ }, "assertionUrn": "urn:li:assertion:c456eccf6440c6e3388c584689a74d91", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5509,6 +5515,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.not_null_monthly_billing_with_cust_email.d405c2cc13", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5555,8 +5563,8 @@ }, "assertionUrn": "urn:li:assertion:f812b73477d81e6af283d918cb59e7bf", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5568,7 +5576,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -5584,12 +5592,14 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "assertionInfo", "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.relationships_monthly_billing_with_cust_customer_id__customer_id__ref_customer_details_.653e08a90b", + "dbt_test_upstream_unique_id": "model.sample_dbt.customer_details", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5598,10 +5608,10 @@ }, "type": "DATASET", "datasetAssertion": { - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", "scope": "DATASET_COLUMN", "fields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" ], "aggregation": "IDENTITY", "operator": "_NATIVE_", @@ -5630,23 +5640,23 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { "json": { "timestampMillis": 1663355198239, "runId": "just-some-random-id", - "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} }, - "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "assertionUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5658,12 +5668,30 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-model-performance", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", "changeType": "UPSERT", "aspectName": "assertionInfo", "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.relationships_monthly_billing_with_cust_customer_id__customer_id__ref_customer_details_.653e08a90b", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5672,10 +5700,10 @@ }, "type": "DATASET", "datasetAssertion": { - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", "scope": "DATASET_COLUMN", "fields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" ], "aggregation": "IDENTITY", "operator": "_NATIVE_", @@ -5704,23 +5732,23 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { "json": { "timestampMillis": 1663355198239, "runId": "just-some-random-id", - "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} }, - "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "assertionUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5754,6 +5782,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.source_not_null_pagila_actor_actor_id.ad63829d3e", + "dbt_test_upstream_unique_id": "source.sample_dbt.pagila.actor", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5800,8 +5830,8 @@ }, "assertionUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5835,6 +5865,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.source_unique_pagila_actor_actor_id.76aff1935a", + "dbt_test_upstream_unique_id": "source.sample_dbt.pagila.actor", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5887,8 +5919,8 @@ }, "assertionUrn": "urn:li:assertion:60ce4aad7ff6dbff7004da0f2258c9df", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5900,7 +5932,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -5916,7 +5948,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -5978,6 +6010,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-model-performance", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "assertion", "entityUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index d213cffa78045e..d60bb425c1ff59 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -465,7 +465,7 @@ def test_dbt_tests_only_assertions( ) > 20 ) - number_of_valid_assertions_in_test_results = 23 + number_of_valid_assertions_in_test_results = 24 assert ( mce_helpers.assert_entity_urn_like( entity_type="assertion", regex_pattern="urn:li:assertion:", file=output_file @@ -542,7 +542,7 @@ def test_dbt_only_test_definitions_and_results( ) > 20 ) - number_of_assertions = 24 + number_of_assertions = 25 assert ( mce_helpers.assert_entity_urn_like( entity_type="assertion", regex_pattern="urn:li:assertion:", file=output_file From bfc9ae90367df3a04b4a3f63c4ae50fa0262ba8b Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 25 Sep 2024 15:28:47 -0700 Subject: [PATCH 3/5] feat(ingest): add `git` to ingestion-base image (#11477) --- docker/datahub-ingestion-base/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile index 08cf2efdcb6a19..6e885b8349a8b5 100644 --- a/docker/datahub-ingestion-base/Dockerfile +++ b/docker/datahub-ingestion-base/Dockerfile @@ -8,8 +8,6 @@ ARG DEBIAN_REPO_URL=https://deb.debian.org/debian ARG UBUNTU_REPO_URL=http://ports.ubuntu.com/ubuntu-ports ARG PIP_MIRROR_URL=https://pypi.python.org/simple -FROM powerman/dockerize:0.19 AS dockerize-binary - FROM ubuntu:22.04 AS base ARG GITHUB_REPO_URL @@ -44,6 +42,7 @@ RUN apt-get update && apt-get upgrade -y \ krb5-config \ libkrb5-dev \ librdkafka-dev \ + git \ wget \ curl \ zip \ From f8535c00c5180652a5ac4f44e189f890af4690ba Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 25 Sep 2024 15:39:11 -0700 Subject: [PATCH 4/5] fix(ingest): include platform instance in looker usage urns (#11469) --- .../ingestion/source/looker/looker_source.py | 70 ++++++------ .../ingestion/source/looker/looker_usage.py | 100 ++++++++---------- 2 files changed, 74 insertions(+), 96 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index 4cb70cf079c5f4..e593e132dafd7e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -13,7 +13,6 @@ Set, Tuple, Union, - cast, ) from looker_sdk.error import SDKError @@ -164,28 +163,6 @@ def __init__(self, config: LookerDashboardSourceConfig, ctx: PipelineContext): # The list values are used purely for debugging purposes. self.reachable_explores: Dict[Tuple[str, str], List[str]] = {} - # Keep stat generators to generate entity stat aspect later - stat_generator_config: looker_usage.StatGeneratorConfig = ( - looker_usage.StatGeneratorConfig( - looker_api_wrapper=self.looker_api, - looker_user_registry=self.user_registry, - interval=self.source_config.extract_usage_history_for_interval, - strip_user_ids_from_email=self.source_config.strip_user_ids_from_email, - platform_name=self.source_config.platform_name, - max_threads=self.source_config.max_threads, - ) - ) - - self.dashboard_stat_generator = looker_usage.create_stat_entity_generator( - looker_usage.SupportedStatEntity.DASHBOARD, - config=stat_generator_config, - ) - - self.chart_stat_generator = looker_usage.create_stat_entity_generator( - looker_usage.SupportedStatEntity.CHART, - config=stat_generator_config, - ) - # To keep track of folders (containers) which have already been ingested # Required, as we do not ingest all folders but only those that have dashboards/looks self.processed_folders: List[str] = [] @@ -648,9 +625,7 @@ def _create_platform_instance_aspect( ) def _make_chart_urn(self, element_id: str) -> str: - platform_instance: Optional[str] = None - if self.source_config.include_platform_instance_in_urns: platform_instance = self.source_config.platform_instance @@ -872,18 +847,21 @@ def _make_dashboard_metadata_events( return proposals - def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str: + def _make_dashboard_urn(self, looker_dashboard_name_part: str) -> str: + # Note that `looker_dashboard_name_part` will like be `dashboard.1234`. platform_instance: Optional[str] = None - if self.source_config.include_platform_instance_in_urns: platform_instance = self.source_config.platform_instance return builder.make_dashboard_urn( - name=looker_dashboard.get_urn_dashboard_id(), + name=looker_dashboard_name_part, platform=self.source_config.platform_name, platform_instance=platform_instance, ) + def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str: + return self._make_dashboard_urn(looker_dashboard.get_urn_dashboard_id()) + def _make_explore_metadata_events( self, ) -> Iterable[ @@ -1397,7 +1375,6 @@ def _get_folder_and_ancestors_workunits( def extract_usage_stat( self, looker_dashboards: List[looker_usage.LookerDashboardForUsage] ) -> List[MetadataChangeProposalWrapper]: - mcps: List[MetadataChangeProposalWrapper] = [] looks: List[looker_usage.LookerChartForUsage] = [] # filter out look from all dashboard for dashboard in looker_dashboards: @@ -1408,16 +1385,33 @@ def extract_usage_stat( # dedup looks looks = list({str(look.id): look for look in looks}.values()) - usage_stat_generators = [ - self.dashboard_stat_generator( - cast(List[looker_usage.ModelForUsage], looker_dashboards), self.reporter - ), - self.chart_stat_generator( - cast(List[looker_usage.ModelForUsage], looks), self.reporter - ), - ] + # Keep stat generators to generate entity stat aspect later + stat_generator_config: looker_usage.StatGeneratorConfig = ( + looker_usage.StatGeneratorConfig( + looker_api_wrapper=self.looker_api, + looker_user_registry=self.user_registry, + interval=self.source_config.extract_usage_history_for_interval, + strip_user_ids_from_email=self.source_config.strip_user_ids_from_email, + max_threads=self.source_config.max_threads, + ) + ) - for usage_stat_generator in usage_stat_generators: + dashboard_usage_generator = looker_usage.create_dashboard_stat_generator( + stat_generator_config, + self.reporter, + self._make_dashboard_urn, + looker_dashboards, + ) + + chart_usage_generator = looker_usage.create_chart_stat_generator( + stat_generator_config, + self.reporter, + self._make_chart_urn, + looks, + ) + + mcps: List[MetadataChangeProposalWrapper] = [] + for usage_stat_generator in [dashboard_usage_generator, chart_usage_generator]: for mcp in usage_stat_generator.generate_usage_stat_mcps(): mcps.append(mcp) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py index 93af0effa9f1f4..6a623e1e97b5dc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py @@ -10,12 +10,10 @@ import logging from abc import ABC, abstractmethod from dataclasses import dataclass -from enum import Enum -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, cast +from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, cast from looker_sdk.sdk.api40.models import Dashboard, LookWithQuery -import datahub.emitter.mce_builder as builder from datahub.emitter.mce_builder import Aspect, AspectAbstract from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.source.looker import looker_common @@ -97,7 +95,6 @@ class StatGeneratorConfig: looker_user_registry: LookerUserRegistry strip_user_ids_from_email: bool interval: str - platform_name: str max_threads: int = 1 @@ -166,7 +163,7 @@ class BaseStatGenerator(ABC): def __init__( self, config: StatGeneratorConfig, - looker_models: List[ModelForUsage], + looker_models: Sequence[ModelForUsage], report: LookerDashboardSourceReport, ): self.config = config @@ -411,14 +408,16 @@ class DashboardStatGenerator(BaseStatGenerator): def __init__( self, config: StatGeneratorConfig, - looker_dashboards: List[LookerDashboardForUsage], + looker_dashboards: Sequence[LookerDashboardForUsage], report: LookerDashboardSourceReport, + urn_builder: Callable[[str], str], ): super().__init__( config, - looker_models=cast(List[ModelForUsage], looker_dashboards), + looker_models=looker_dashboards, report=report, ) + self.urn_builder = urn_builder self.report = report self.report.report_dashboards_scanned_for_usage(len(looker_dashboards)) @@ -457,10 +456,7 @@ def _get_urn(self, model: ModelForUsage) -> str: assert isinstance(model, LookerDashboardForUsage) assert model.id is not None - return builder.make_dashboard_urn( - self.config.platform_name, - looker_common.get_urn_looker_dashboard_id(model.id), - ) + return self.urn_builder(looker_common.get_urn_looker_dashboard_id(model.id)) def to_entity_absolute_stat_aspect( self, looker_object: ModelForUsage @@ -528,14 +524,16 @@ class LookStatGenerator(BaseStatGenerator): def __init__( self, config: StatGeneratorConfig, - looker_looks: List[LookerChartForUsage], + looker_looks: Sequence[LookerChartForUsage], report: LookerDashboardSourceReport, + urn_builder: Callable[[str], str], ): super().__init__( config, - looker_models=cast(List[ModelForUsage], looker_looks), + looker_models=looker_looks, report=report, ) + self.urn_builder = urn_builder self.report = report report.report_charts_scanned_for_usage(len(looker_looks)) @@ -570,10 +568,7 @@ def _get_urn(self, model: ModelForUsage) -> str: assert isinstance(model, LookerChartForUsage) assert model.id is not None - return builder.make_chart_urn( - self.config.platform_name, - looker_common.get_urn_looker_element_id(str(model.id)), - ) + return self.urn_builder(looker_common.get_urn_looker_element_id(str(model.id))) def to_entity_absolute_stat_aspect( self, looker_object: ModelForUsage @@ -629,45 +624,34 @@ def append_user_stat( ) -class SupportedStatEntity(Enum): - DASHBOARD = "dashboard" - CHART = "chart" - - -# type_ is because of type is builtin identifier -def create_stat_entity_generator( - type_: SupportedStatEntity, config: StatGeneratorConfig -) -> Callable[[List[ModelForUsage], LookerDashboardSourceReport], BaseStatGenerator]: - # Wrapper function to defer creation of actual entities - # config is generally available at the startup, however entities may get created later during processing - def create_dashboard_stat_generator( - looker_dashboards: List[LookerDashboardForUsage], - report: LookerDashboardSourceReport, - ) -> BaseStatGenerator: - logger.debug( - "Number of dashboard received for stat processing = {}".format( - len(looker_dashboards) - ) - ) - return DashboardStatGenerator( - config=config, looker_dashboards=looker_dashboards, report=report +def create_dashboard_stat_generator( + config: StatGeneratorConfig, + report: LookerDashboardSourceReport, + urn_builder: Callable[[str], str], + looker_dashboards: Sequence[LookerDashboardForUsage], +) -> DashboardStatGenerator: + logger.debug( + "Number of dashboard received for stat processing = {}".format( + len(looker_dashboards) ) - - def create_chart_stat_generator( - looker_looks: List[LookerChartForUsage], report: LookerDashboardSourceReport - ) -> BaseStatGenerator: - logger.debug( - "Number of looks received for stat processing = {}".format( - len(looker_looks) - ) - ) - return LookStatGenerator( - config=config, looker_looks=looker_looks, report=report - ) - - stat_entities_generator = { - SupportedStatEntity.DASHBOARD: create_dashboard_stat_generator, - SupportedStatEntity.CHART: create_chart_stat_generator, - } - - return stat_entities_generator[type_] # type: ignore + ) + return DashboardStatGenerator( + config=config, + looker_dashboards=looker_dashboards, + report=report, + urn_builder=urn_builder, + ) + + +def create_chart_stat_generator( + config: StatGeneratorConfig, + report: LookerDashboardSourceReport, + urn_builder: Callable[[str], str], + looker_looks: Sequence[LookerChartForUsage], +) -> LookStatGenerator: + logger.debug( + "Number of looks received for stat processing = {}".format(len(looker_looks)) + ) + return LookStatGenerator( + config=config, looker_looks=looker_looks, report=report, urn_builder=urn_builder + ) From 68fb97f7f17743ebd115f8be3a637762fd8dfe22 Mon Sep 17 00:00:00 2001 From: sagar-salvi-apptware <159135491+sagar-salvi-apptware@users.noreply.github.com> Date: Thu, 26 Sep 2024 05:24:13 +0530 Subject: [PATCH 5/5] fix(ingest/openapi): update recipe for DataHub OpenAPI with url_complement and bearer token (#10980) --- .../docs/sources/openapi/openapi_recipe.yml | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/docs/sources/openapi/openapi_recipe.yml b/metadata-ingestion/docs/sources/openapi/openapi_recipe.yml index 07c08a814a7b1c..68cf869c0106fb 100644 --- a/metadata-ingestion/docs/sources/openapi/openapi_recipe.yml +++ b/metadata-ingestion/docs/sources/openapi/openapi_recipe.yml @@ -4,11 +4,21 @@ source: name: test_endpoint # this name will appear in DatHub url: https://test_endpoint.com/ swagger_file: classicapi/doc/swagger.json # where to search for the OpenApi definitions - get_token: # optional, if you need to get an authentication token beforehand + + # option 1: bearer token + bearer_token: "" + + # option 2: dynamically generated tokens, username/password is mandetory + get_token: request_type: get - url: api/authentication/login?username={username}&password={password} - username: your_username # optional - password: your_password # optional + url_complement: api/authentication/login?username={username}&password={password} + username: your_username + password: your_password + + # option 3: using basic auth + username: your_username + password: your_password + forced_examples: # optionals /accounts/groupname/{name}: ['test'] /accounts/username/{name}: ['test']