From 1955c05d78a7b791e622eaf736e8f8d53150a4e5 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Wed, 31 Jul 2024 13:29:00 +0100 Subject: [PATCH 01/10] feat(cli): Add run-id option to put sub-command (#11023) Adds an option to assign run-id to a given put command execution. This is useful when transformers do not exist for a given ingestion payload, we can follow up with custom metadata and assign it to an ingestion pipeline. --- .../src/datahub/cli/cli_utils.py | 11 +++++-- metadata-ingestion/src/datahub/cli/put_cli.py | 31 ++++++++++++++----- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py index 06861065ca6f25..21841b173c23d3 100644 --- a/metadata-ingestion/src/datahub/cli/cli_utils.py +++ b/metadata-ingestion/src/datahub/cli/cli_utils.py @@ -13,7 +13,7 @@ from datahub.emitter.aspect import ASPECT_MAP, TIMESERIES_ASPECT_MAP from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.request_helper import make_curl_command -from datahub.emitter.serialization_helper import post_json_transform +from datahub.emitter.serialization_helper import post_json_transform, pre_json_transform from datahub.metadata.com.linkedin.pegasus2avro.mxe import ( MetadataChangeEvent, MetadataChangeProposal, @@ -153,10 +153,11 @@ def post_entity( aspect_value: Dict, cached_session_host: Optional[Tuple[Session, str]] = None, is_async: Optional[str] = "false", + system_metadata: Union[None, SystemMetadataClass] = None, ) -> int: endpoint: str = "/aspects/?action=ingestProposal" - proposal = { + proposal: Dict[str, Any] = { "proposal": { "entityType": entity_type, "entityUrn": urn, @@ -169,6 +170,12 @@ def post_entity( }, "async": is_async, } + + if system_metadata is not None: + proposal["proposal"]["systemMetadata"] = json.dumps( + pre_json_transform(system_metadata.to_obj()) + ) + payload = json.dumps(proposal) url = gms_host + endpoint curl_command = make_curl_command(session, "POST", url, payload) diff --git a/metadata-ingestion/src/datahub/cli/put_cli.py b/metadata-ingestion/src/datahub/cli/put_cli.py index 40af54c7c7e2e4..989b1a6d02fd01 100644 --- a/metadata-ingestion/src/datahub/cli/put_cli.py +++ b/metadata-ingestion/src/datahub/cli/put_cli.py @@ -1,12 +1,12 @@ import logging -from typing import Optional +from typing import Optional, Union import click from click_default_group import DefaultGroup from datahub.cli.cli_utils import post_entity from datahub.configuration.config_loader import load_config_file -from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.mcp import MetadataChangeProposalWrapper, SystemMetadataClass from datahub.ingestion.graph.client import get_default_graph from datahub.metadata.schema_classes import ( DataPlatformInfoClass as DataPlatformInfo, @@ -36,9 +36,15 @@ def put() -> None: @click.option("--urn", required=True, type=str) @click.option("-a", "--aspect", required=True, type=str) @click.option("-d", "--aspect-data", required=True, type=str) +@click.option( + "--run-id", + type=str, + required=False, + help="Run ID into which we should log the aspect.", +) @upgrade.check_upgrade @telemetry.with_telemetry() -def aspect(urn: str, aspect: str, aspect_data: str) -> None: +def aspect(urn: str, aspect: str, aspect_data: str, run_id: Optional[str]) -> None: """Update a single aspect of an entity""" entity_type = guess_entity_type(urn) @@ -48,6 +54,10 @@ def aspect(urn: str, aspect: str, aspect_data: str) -> None: client = get_default_graph() + system_metadata: Union[None, SystemMetadataClass] = None + if run_id: + system_metadata = SystemMetadataClass(runId=run_id) + # TODO: Replace with client.emit, requires figuring out the correct subsclass of _Aspect to create from the data status = post_entity( client._session, @@ -56,6 +66,7 @@ def aspect(urn: str, aspect: str, aspect_data: str) -> None: aspect_name=aspect, entity_type=entity_type, aspect_value=aspect_obj, + system_metadata=system_metadata, ) click.secho(f"Update succeeded with status {status}", fg="green") @@ -82,8 +93,11 @@ def aspect(urn: str, aspect: str, aspect_data: str) -> None: help="Logo URL that must be reachable from the DataHub UI.", required=True, ) +@click.option( + "--run-id", type=str, help="Run ID into which we should log the platform." +) def platform( - ctx: click.Context, name: str, display_name: Optional[str], logo: str + ctx: click.Context, name: str, display_name: Optional[str], logo: str, run_id: str ) -> None: """ Create or update a dataplatform entity in DataHub @@ -104,11 +118,12 @@ def platform( logoUrl=logo, ) datahub_graph = get_default_graph() - datahub_graph.emit( - MetadataChangeProposalWrapper( - entityUrn=str(platform_urn), aspect=data_platform_info - ) + mcp = MetadataChangeProposalWrapper( + entityUrn=str(platform_urn), + aspect=data_platform_info, + systemMetadata=SystemMetadataClass(runId=run_id), ) + datahub_graph.emit(mcp) click.echo( f"✅ Successfully wrote data platform metadata for {platform_urn} to DataHub ({datahub_graph})" ) From e75118942b55cdbbb1c56c0c2343a0fdd3175780 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 31 Jul 2024 09:07:38 -0700 Subject: [PATCH 02/10] fix(ingest): improve sql error reporting calls (#11025) --- .../ingestion/source/sql/sql_common.py | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 1624203c4096b9..b3a5f134c61d64 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -713,18 +713,17 @@ def loop_tables( # noqa: C901 data_reader, ) except Exception as e: - self.warn( - logger, - f"{schema}.{table}", - f"Ingestion error: {e}", + self.report.warning( + "Error processing table", + context=f"{schema}.{table}", + exc=e, ) - logger.debug( - f"Error processing table {schema}.{table}: Error was: {e} Traceback:", - exc_info=e, - ) - except Exception as e: - self.error(logger, f"{schema}", f"Tables error: {e}") + self.report.failure( + "Error processing tables", + context=schema, + exc=e, + ) def add_information_for_schema(self, inspector: Inspector, schema: str) -> None: pass @@ -1047,13 +1046,17 @@ def loop_views( sql_config=sql_config, ) except Exception as e: - self.warn( - logger, - f"{schema}.{view}", - f"Ingestion error: {e} {traceback.format_exc()}", + self.report.warning( + "Error processing view", + context=f"{schema}.{view}", + exc=e, ) except Exception as e: - self.error(logger, f"{schema}", f"Views error: {e}") + self.report.failure( + "Error processing views", + context=schema, + exc=e, + ) def _process_view( self, From 233330480cae622b4c3ff815d52ab64c09ecdc16 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 31 Jul 2024 09:08:54 -0700 Subject: [PATCH 03/10] fix(airflow): fix CI setup (#11031) --- .github/workflows/airflow-plugin.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index ab5b3eb48da7f3..d4f0a1369da253 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -52,7 +52,7 @@ jobs: extra_pip_requirements: 'apache-airflow~=2.8.1 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.1/constraints-3.10.txt' extra_pip_extras: plugin-v2 - python-version: "3.11" - extra_pip_requirements: 'apache-airflow~=2.9.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.10.txt' + extra_pip_requirements: 'apache-airflow~=2.9.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.11.txt' extra_pip_extras: plugin-v2 fail-fast: false steps: From 4b9844da1b405495c8ebf8982d1247277838b029 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 31 Jul 2024 09:23:02 -0700 Subject: [PATCH 04/10] feat(ingest/dbt): add experimental `prefer_sql_parser_lineage` flag (#11039) --- .../ingestion/source/dbt/dbt_common.py | 125 +- .../dbt_enabled_with_schemas_mces_golden.json | 38 +- .../dbt_test_column_meta_mapping_golden.json | 62 +- ...test_prefer_sql_parser_lineage_golden.json | 3126 +++++++++++++++++ ...bt_test_test_model_performance_golden.json | 62 +- ...th_complex_owner_patterns_mces_golden.json | 38 +- ...th_data_platform_instance_mces_golden.json | 38 +- ...h_non_incremental_lineage_mces_golden.json | 38 +- ..._target_platform_instance_mces_golden.json | 38 +- .../tests/integration/dbt/test_dbt.py | 14 + .../tests/unit/test_dbt_source.py | 26 + 11 files changed, 3421 insertions(+), 184 deletions(-) create mode 100644 metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 9972e9e46369d9..ead86acc299ca0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -366,6 +366,13 @@ class DBTCommonConfig( description="When enabled, includes the compiled code in the emitted metadata.", ) + prefer_sql_parser_lineage: bool = Field( + default=False, + description="Normally we use dbt's metadata to generate table lineage. When enabled, we prefer results from the SQL parser when generating lineage instead. " + "This can be useful when dbt models reference tables directly, instead of using the ref() macro. " + "This requires that `skip_sources_in_lineage` is enabled.", + ) + @validator("target_platform") def validate_target_platform_value(cls, target_platform: str) -> str: if target_platform.lower() == DBT_PLATFORM: @@ -447,6 +454,16 @@ def validate_skip_sources_in_lineage( return skip_sources_in_lineage + @validator("prefer_sql_parser_lineage") + def validate_prefer_sql_parser_lineage( + cls, prefer_sql_parser_lineage: bool, values: Dict + ) -> bool: + if prefer_sql_parser_lineage and not values.get("skip_sources_in_lineage"): + raise ValueError( + "`prefer_sql_parser_lineage` requires that `skip_sources_in_lineage` is enabled." + ) + return prefer_sql_parser_lineage + @dataclass class DBTColumn: @@ -516,6 +533,9 @@ class DBTNode: columns: List[DBTColumn] = field(default_factory=list) upstream_nodes: List[str] = field(default_factory=list) # list of upstream dbt_name upstream_cll: List[DBTColumnLineageInfo] = field(default_factory=list) + raw_sql_parsing_result: Optional[ + SqlParsingResult + ] = None # only set for nodes that don't depend on ephemeral models cll_debug_info: Optional[SqlParsingDebugInfo] = None meta: Dict[str, Any] = field(default_factory=dict) @@ -1130,6 +1150,7 @@ def _infer_schemas_and_update_cll( # noqa: C901 # Run sql parser to infer the schema + generate column lineage. sql_result = None + depends_on_ephemeral_models = False if node.node_type in {"source", "test", "seed"}: # For sources, we generate CLL as a 1:1 mapping. # We don't support CLL for tests (assertions) or seeds. @@ -1148,6 +1169,8 @@ def _infer_schemas_and_update_cll( # noqa: C901 upstream_node.name, schema_resolver.platform ) } + if cte_mapping: + depends_on_ephemeral_models = True sql_result = self._parse_cll(node, cte_mapping, schema_resolver) else: @@ -1155,8 +1178,12 @@ def _infer_schemas_and_update_cll( # noqa: C901 # Save the column lineage. if self.config.include_column_lineage and sql_result: - # We only save the debug info here. We'll report errors based on it later, after - # applying the configured node filters. + # We save the raw info here. We use this for supporting `prefer_sql_parser_lineage`. + if not depends_on_ephemeral_models: + node.raw_sql_parsing_result = sql_result + + # We use this for error reporting. However, we only want to report errors + # after node filters are applied. node.cll_debug_info = sql_result.debug_info if sql_result.column_lineage: @@ -1171,6 +1198,7 @@ def _infer_schemas_and_update_cll( # noqa: C901 for column_lineage_info in sql_result.column_lineage for upstream_column in column_lineage_info.upstreams # Only include the CLL if the table in in the upstream list. + # TODO: Add some telemetry around this - how frequently does it filter stuff out? if target_platform_urn_to_dbt_name.get(upstream_column.table) in node.upstream_nodes ] @@ -1813,33 +1841,76 @@ def _translate_dbt_name_to_upstream_urn(dbt_name: str) -> str: if node.cll_debug_info and node.cll_debug_info.error: self.report.report_warning( - node.dbt_name, - f"Error parsing SQL to generate column lineage: {node.cll_debug_info.error}", + "Error parsing SQL to generate column lineage", + context=node.dbt_name, + exc=node.cll_debug_info.error, ) - cll = [ - FineGrainedLineage( - upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, - downstreamType=FineGrainedLineageDownstreamType.FIELD_SET, - upstreams=[ - mce_builder.make_schema_field_urn( - _translate_dbt_name_to_upstream_urn( - upstream_column.upstream_dbt_name - ), - upstream_column.upstream_col, + + cll = None + if self.config.prefer_sql_parser_lineage and node.raw_sql_parsing_result: + sql_parsing_result = node.raw_sql_parsing_result + if sql_parsing_result and not sql_parsing_result.debug_info.table_error: + # If we have some table lineage from SQL parsing, use that. + upstream_urns = sql_parsing_result.in_tables + + cll = [] + for column_lineage in sql_parsing_result.column_lineage or []: + cll.append( + FineGrainedLineage( + upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, + downstreamType=FineGrainedLineageDownstreamType.FIELD, + upstreams=[ + mce_builder.make_schema_field_urn( + upstream.table, upstream.column + ) + for upstream in column_lineage.upstreams + ], + downstreams=[ + mce_builder.make_schema_field_urn( + node_urn, column_lineage.downstream.column + ) + ], + confidenceScore=sql_parsing_result.debug_info.confidence, + ) ) - for upstream_column in upstreams - ], - downstreams=[ - mce_builder.make_schema_field_urn(node_urn, downstream) - ], - confidenceScore=( - node.cll_debug_info.confidence if node.cll_debug_info else None - ), - ) - for downstream, upstreams in itertools.groupby( - node.upstream_cll, lambda x: x.downstream_col - ) - ] + + else: + if self.config.prefer_sql_parser_lineage: + if node.upstream_cll: + self.report.report_warning( + "SQL parser lineage is not available for this node, falling back to dbt-based column lineage.", + context=node.dbt_name, + ) + else: + # SQL parsing failed entirely, which is already reported above. + pass + + cll = [ + FineGrainedLineage( + upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, + downstreamType=FineGrainedLineageDownstreamType.FIELD, + upstreams=[ + mce_builder.make_schema_field_urn( + _translate_dbt_name_to_upstream_urn( + upstream_column.upstream_dbt_name + ), + upstream_column.upstream_col, + ) + for upstream_column in upstreams + ], + downstreams=[ + mce_builder.make_schema_field_urn(node_urn, downstream) + ], + confidenceScore=( + node.cll_debug_info.confidence + if node.cll_debug_info + else None + ), + ) + for downstream, upstreams in itertools.groupby( + node.upstream_cll, lambda x: x.downstream_col + ) + ] if not upstream_urns: return None diff --git a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json index aacb7093e5b4ea..dc8c400b291574 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_enabled_with_schemas_mces_golden.json @@ -271,7 +271,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),customer_id)" ], @@ -283,7 +283,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),full_name)" ], @@ -294,7 +294,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),email)" ], @@ -305,7 +305,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),address)" ], @@ -316,7 +316,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),city)" ], @@ -327,7 +327,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),postal_code)" ], @@ -338,7 +338,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),phone)" ], @@ -549,7 +549,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)" ], @@ -560,7 +560,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)" ], @@ -571,7 +571,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)" ], @@ -828,7 +828,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" ], @@ -844,7 +844,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" ], @@ -860,7 +860,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" ], @@ -876,7 +876,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)" ], @@ -892,7 +892,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)" ], @@ -908,7 +908,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)" ], @@ -1076,7 +1076,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" ], @@ -1087,7 +1087,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" ], @@ -1098,7 +1098,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" ], diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json index 5a35b4763af06f..60f5bf4fbca9a1 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_column_meta_mapping_golden.json @@ -224,7 +224,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" ], @@ -236,7 +236,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),full_name)" ], @@ -248,7 +248,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),first_name)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),last_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),initial_full_name)" ], @@ -259,7 +259,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),email)" ], @@ -270,7 +270,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),address)" ], @@ -281,7 +281,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),city)" ], @@ -292,7 +292,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),postal_code)" ], @@ -303,7 +303,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),phone)" ], @@ -491,7 +491,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),billing_month)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),billing_month)" ], @@ -502,7 +502,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" ], @@ -513,7 +513,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),amount)" ], @@ -524,7 +524,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),email)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),email)" ], @@ -773,7 +773,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),amount)" ], @@ -789,7 +789,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),customer_id)" ], @@ -805,7 +805,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),payment_date)" ], @@ -821,7 +821,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),payment_id)" ], @@ -837,7 +837,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),rental_id)" ], @@ -853,7 +853,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),staff_id)" ], @@ -1049,7 +1049,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),billing_month)" ], @@ -1060,7 +1060,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),customer_id)" ], @@ -1071,7 +1071,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),amount)" ], @@ -1371,7 +1371,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),active)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),active)" ], @@ -1382,7 +1382,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),activebool)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),activebool)" ], @@ -1393,7 +1393,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),address_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),address_id)" ], @@ -1404,7 +1404,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),create_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),create_date)" ], @@ -1415,7 +1415,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),customer_id)" ], @@ -1426,7 +1426,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),email)" ], @@ -1437,7 +1437,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),first_name)" ], @@ -1448,7 +1448,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),last_name)" ], @@ -1459,7 +1459,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_update)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),last_update)" ], @@ -1470,7 +1470,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),store_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),store_id)" ], diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json new file mode 100644 index 00000000000000..81754fd6cbcaca --- /dev/null +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json @@ -0,0 +1,3126 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "ephemeral", + "dbt_file_path": "models/transform/customer_details.sql", + "language": "sql", + "dbt_unique_id": "model.sample_dbt.customer_details", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "customer_details", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "model.sample_dbt.customer_details", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "full_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "initial_full_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "city", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "postal_code", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "phone", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "TEXT", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),first_name)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),last_name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),full_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),first_name)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),last_name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),initial_full_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),email)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),email)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD),address)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),address)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.city,PROD),city)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),city)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD),postal_code)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),postal_code)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.address,PROD),phone)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),phone)" + ], + "confidenceScore": 0.9 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": false, + "viewLogic": "{{ config(\n materialized = \"ephemeral\",\n) }}\n\nSELECT\n c.customer_id,\n c.first_name || ' ' || c.last_name as \"full_name\",\n (\n select cs.first_name || ' ' || cs.last_name\n from {{ ref('customer_snapshot') }} cs where cs.customer_id = c.customer_id\n order by dbt_valid_from desc\n limit 1\n ) as \"initial_full_name\",\n c.email,\n a.address,\n m.city,\n a.postal_code,\n a.phone\nFROM\n {{ source('pagila', 'customer')}} c\n left outer join {{ source('pagila', 'address')}} a on c.address_id = a.address_id\n left outer join {{ source('pagila', 'city') }} m on a.city_id = m.city_id", + "formattedViewLogic": "SELECT\n c.customer_id,\n c.first_name || ' ' || c.last_name AS \"full_name\",\n (\n SELECT\n cs.first_name || ' ' || cs.last_name\n FROM \"pagila\".\"public\".\"customer_snapshot\" AS cs\n WHERE\n cs.customer_id = c.customer_id\n ORDER BY\n dbt_valid_from DESC\n LIMIT 1\n ) AS \"initial_full_name\",\n c.email,\n a.address,\n m.city,\n a.postal_code,\n a.phone\nFROM \"pagila\".\"public\".\"customer\" AS c\nLEFT OUTER JOIN \"pagila\".\"public\".\"address\" AS a\n ON c.address_id = a.address_id\nLEFT OUTER JOIN \"pagila\".\"public\".\"city\" AS m\n ON a.city_id = m.city_id", + "viewLanguage": "SQL" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "table", + "dbt_file_path": "models/billing/monthly_billing_with_cust.sql", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "model.sample_dbt.monthly_billing_with_cust", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "an-aliased-view-for-monthly-billing", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "model.sample_dbt.monthly_billing_with_cust", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "billing_month", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),BillingMonth)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),billing_month)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),amount)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),amount)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),email)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),email)" + ], + "confidenceScore": 0.9 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": true, + "viewLogic": "{{ config(\n materialized = \"table\",\n alias='an-aliased-view-for-monthly-billing'\n) }}\n\nSELECT \n pbc.\"BillingMonth\" as billing_month,\n pbc.customer_id,\n pbc.amount,\n cust.email\nFROM\n {{ ref('payments_by_customer_by_month')}} pbc\n left outer join {{ ref('customer_details')}} cust on pbc.customer_id = cust.customer_id\nORDER BY\n pbc.\"BillingMonth\"", + "formattedViewLogic": "WITH __dbt__cte__customer_details AS (\n SELECT\n c.customer_id,\n c.first_name || ' ' || c.last_name AS \"full_name\",\n (\n SELECT\n cs.first_name || ' ' || cs.last_name\n FROM \"pagila\".\"public\".\"customer_snapshot\" AS cs\n WHERE\n cs.customer_id = c.customer_id\n ORDER BY\n dbt_valid_from DESC\n LIMIT 1\n ) AS \"initial_full_name\",\n c.email,\n a.address,\n m.city,\n a.postal_code,\n a.phone\n FROM \"pagila\".\"public\".\"customer\" AS c\n LEFT OUTER JOIN \"pagila\".\"public\".\"address\" AS a\n ON c.address_id = a.address_id\n LEFT OUTER JOIN \"pagila\".\"public\".\"city\" AS m\n ON a.city_id = m.city_id\n)\nSELECT\n pbc.\"BillingMonth\" AS billing_month,\n pbc.customer_id,\n pbc.amount,\n cust.email\nFROM \"pagila\".\"public\".\"payments_by_customer_by_month\" AS pbc\nLEFT OUTER JOIN __dbt__cte__customer_details AS cust\n ON pbc.customer_id = cust.customer_id\nORDER BY\n pbc.\"BillingMonth\"", + "viewLanguage": "SQL" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:a42a5b1bee156e45972e12d4156fb7a2", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": { + "dbt_name": "model.sample_dbt.monthly_billing_with_cust", + "dbt_urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)" + }, + "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "type": "BATCH_SCHEDULED", + "created": { + "time": 1663355198240, + "actor": "urn:li:corpuser:datahub" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:a42a5b1bee156e45972e12d4156fb7a2", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "upstreamInstances": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:a42a5b1bee156e45972e12d4156fb7a2", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:a42a5b1bee156e45972e12d4156fb7a2", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:a42a5b1bee156e45972e12d4156fb7a2", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198240, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "STARTED" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:a42a5b1bee156e45972e12d4156fb7a2", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198242, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResultType": "success" + }, + "durationMillis": 2 + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "view", + "dbt_file_path": "models/base/payments_base.sql", + "catalog_type": "VIEW", + "language": "sql", + "dbt_unique_id": "model.sample_dbt.payments_base", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "an_aliased_view_for_payments", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "model.sample_dbt.payments_base", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric(5,2)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "payment_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "rental_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "staff_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD),amount)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD),amount)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),amount)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD),customer_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD),payment_date)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD),payment_date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),payment_date)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD),payment_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD),payment_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),payment_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD),rental_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD),rental_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),rental_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD),staff_id)", + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD),staff_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),staff_id)" + ], + "confidenceScore": 0.9 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": false, + "viewLogic": "{{ config(\n materialized=\"view\",\n alias='an_aliased_view_for_payments'\n) }}\n\nwith payments as (\n\n select \n *\n from \n {{ source('pagila', 'payment_p2020_01')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_02')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_02')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_03')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_04')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_05')}}\n UNION ALL\n select \n *\n from \n {{ source('pagila', 'payment_p2020_06')}}\n)\n\nselect *\nfrom payments", + "formattedViewLogic": "WITH payments AS (\n SELECT\n *\n FROM \"pagila\".\"public\".\"payment_p2020_01\"\n UNION ALL\n SELECT\n *\n FROM \"pagila\".\"public\".\"payment_p2020_02\"\n UNION ALL\n SELECT\n *\n FROM \"pagila\".\"public\".\"payment_p2020_02\"\n UNION ALL\n SELECT\n *\n FROM \"pagila\".\"public\".\"payment_p2020_03\"\n UNION ALL\n SELECT\n *\n FROM \"pagila\".\"public\".\"payment_p2020_04\"\n UNION ALL\n SELECT\n *\n FROM \"pagila\".\"public\".\"payment_p2020_05\"\n UNION ALL\n SELECT\n *\n FROM \"pagila\".\"public\".\"payment_p2020_06\"\n)\nSELECT\n *\nFROM payments", + "viewLanguage": "SQL" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:fc6268f0be68fd04c310705b65efd6fe", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": { + "dbt_name": "model.sample_dbt.payments_base", + "dbt_urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)" + }, + "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)", + "type": "BATCH_SCHEDULED", + "created": { + "time": 1663355198240, + "actor": "urn:li:corpuser:datahub" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:fc6268f0be68fd04c310705b65efd6fe", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)", + "upstreamInstances": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:fc6268f0be68fd04c310705b65efd6fe", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_01,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_02,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_03,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_04,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_05,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payment_p2020_06,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:fc6268f0be68fd04c310705b65efd6fe", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:fc6268f0be68fd04c310705b65efd6fe", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198240, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "STARTED" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:fc6268f0be68fd04c310705b65efd6fe", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198242, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResultType": "success" + }, + "durationMillis": 2 + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "table", + "dbt_file_path": "models/transform/payments_by_customer_by_month.sql", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "model.sample_dbt.payments_by_customer_by_month", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "payments_by_customer_by_month", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "model.sample_dbt.payments_by_customer_by_month", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "BillingMonth", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "amount", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "numeric", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "description": "description for customer_id from dbt", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:dbt:tag_from_dbt" + } + ] + }, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),payment_date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),BillingMonth)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),amount)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),amount)" + ], + "confidenceScore": 0.9 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": true, + "viewLogic": "{{ config(\n materialized = \"table\",\n) }}\n\nSELECT\n date_trunc('month', payment_date) as \"BillingMonth\",\n customer_id,\n sum(amount) as \"amount\"\nFROM\n {{ ref('payments_base')}}\nGROUP BY\n \"BillingMonth\",\n customer_id", + "formattedViewLogic": "SELECT\n DATE_TRUNC('MONTH', payment_date) AS \"BillingMonth\",\n customer_id,\n SUM(amount) AS \"amount\"\nFROM \"pagila\".\"public\".\"an_aliased_view_for_payments\"\nGROUP BY\n \"BillingMonth\",\n customer_id", + "viewLanguage": "SQL" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:57aa623f096cf3a28af70fe94b713907", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": { + "dbt_name": "model.sample_dbt.payments_by_customer_by_month", + "dbt_urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)" + }, + "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)", + "type": "BATCH_SCHEDULED", + "created": { + "time": 1663355198240, + "actor": "urn:li:corpuser:datahub" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:57aa623f096cf3a28af70fe94b713907", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)", + "upstreamInstances": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:57aa623f096cf3a28af70fe94b713907", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:57aa623f096cf3a28af70fe94b713907", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:57aa623f096cf3a28af70fe94b713907", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198240, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "STARTED" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:57aa623f096cf3a28af70fe94b713907", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198242, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResultType": "success" + }, + "durationMillis": 2 + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Snapshot" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "snapshot", + "materialization": "snapshot", + "dbt_file_path": "snapshots/customer_snapshot.sql", + "catalog_type": "BASE TABLE", + "language": "sql", + "dbt_unique_id": "snapshot.sample_dbt.customer_snapshot", + "dbt_package_name": "sample_dbt", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "name": "customer_snapshot", + "description": "", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "snapshot.sample_dbt.customer_snapshot", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "active", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "activebool", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.BooleanType": {} + } + }, + "nativeDataType": "boolean", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "address_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "create_date", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "date", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "dbt_scd_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "dbt_updated_at", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "dbt_valid_from", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "dbt_valid_to", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "first_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_name", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "text", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_update", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp with time zone", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "store_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),active)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),active)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),activebool)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),activebool)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),address_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),address_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),create_date)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),create_date)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),customer_id)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),email)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),email)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),first_name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),first_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),last_name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),last_name)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),last_update)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),last_update)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD),store_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),store_id)" + ], + "confidenceScore": 0.9 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": true, + "viewLogic": "\n\n{{\n config(\n target_database='pagila',\n target_schema='public',\n unique_key='customer_id',\n\n strategy='timestamp',\n updated_at='last_update',\n )\n}}\n\nselect * from {{ source('pagila', 'customer') }}\n\n", + "formattedViewLogic": "SELECT\n *\nFROM \"pagila\".\"public\".\"customer\"", + "viewLanguage": "SQL" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:36225e795a4597b2376996774a803b0d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": { + "dbt_name": "snapshot.sample_dbt.customer_snapshot", + "dbt_urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)" + }, + "name": "just-some-random-id_urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)", + "type": "BATCH_SCHEDULED", + "created": { + "time": 1663355198240, + "actor": "urn:li:corpuser:datahub" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:36225e795a4597b2376996774a803b0d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)", + "upstreamInstances": [] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:36225e795a4597b2376996774a803b0d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:36225e795a4597b2376996774a803b0d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:36225e795a4597b2376996774a803b0d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198240, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "STARTED" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:36225e795a4597b2376996774a803b0d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198242, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResultType": "success" + }, + "durationMillis": 2 + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "changeType": "PATCH", + "aspectName": "upstreamLineage", + "aspect": { + "json": [ + { + "op": "add", + "path": "/upstreams/urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "type": "COPY" + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an-aliased-view-for-monthly-billing,PROD),amount)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),amount)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an-aliased-view-for-monthly-billing,PROD),billing_month)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),billing_month)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an-aliased-view-for-monthly-billing,PROD),email)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),email)", + "value": { + "confidenceScore": 1.0 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD)", + "changeType": "PATCH", + "aspectName": "upstreamLineage", + "aspect": { + "json": [ + { + "op": "add", + "path": "/upstreams/urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD)", + "type": "COPY" + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),amount)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),amount)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),customer_id)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),customer_id)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),payment_date)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),payment_date)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),payment_id)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),payment_id)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),rental_id)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),rental_id)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),staff_id)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),staff_id)", + "value": { + "confidenceScore": 1.0 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD)", + "changeType": "PATCH", + "aspectName": "upstreamLineage", + "aspect": { + "json": [ + { + "op": "add", + "path": "/upstreams/urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD)", + "type": "COPY" + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),BillingMonth)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),BillingMonth)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),amount)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),amount)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),customer_id)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),customer_id)", + "value": { + "confidenceScore": 1.0 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD)", + "changeType": "PATCH", + "aspectName": "upstreamLineage", + "aspect": { + "json": [ + { + "op": "add", + "path": "/upstreams/urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)", + "value": { + "auditStamp": { + "time": 1643871600000, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD)", + "type": "COPY" + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),active)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),active)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),activebool)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),activebool)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),address_id)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),address_id)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),create_date)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),create_date)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),customer_id)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),customer_id)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),dbt_scd_id)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),dbt_scd_id)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),dbt_updated_at)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),dbt_updated_at)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),dbt_valid_from)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),dbt_valid_from)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),dbt_valid_to)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),dbt_valid_to)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),email)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),email)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),first_name)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),first_name)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),last_name)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),last_name)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),last_update)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),last_update)", + "value": { + "confidenceScore": 1.0 + } + }, + { + "op": "add", + "path": "/fineGrainedLineages/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),store_id)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),store_id)", + "value": { + "confidenceScore": 1.0 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:ba2c6ba830d407d539452f4cf46c92a6", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:ba2c6ba830d407d539452f4cf46c92a6", + "changeType": "UPSERT", + "aspectName": "assertionInfo", + "aspect": { + "json": { + "customProperties": { + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "type": "DATASET", + "datasetAssertion": { + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "scope": "DATASET_ROWS", + "aggregation": "_NATIVE_", + "operator": "_NATIVE_", + "nativeType": "assert_source_actor_last_update_is_recent", + "nativeParameters": {}, + "logic": "select\n *\nfrom \"pagila\".\"public\".\"actor\"\nwhere last_update < (now() - interval '100 years')" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:ba2c6ba830d407d539452f4cf46c92a6", + "changeType": "UPSERT", + "aspectName": "assertionRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198239, + "runId": "just-some-random-id", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:ba2c6ba830d407d539452f4cf46c92a6", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", + "changeType": "UPSERT", + "aspectName": "assertionInfo", + "aspect": { + "json": { + "customProperties": { + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "type": "DATASET", + "datasetAssertion": { + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "scope": "DATASET_COLUMN", + "fields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),email)" + ], + "aggregation": "_NATIVE_", + "operator": "_NATIVE_", + "nativeType": "is_email_monthly_billing_with_cust_email", + "nativeParameters": { + "column_name": "email", + "model": "{{ get_where_subquery(ref('monthly_billing_with_cust')) }}" + }, + "logic": "\n\n select *\n from \"pagila\".\"public\".\"an-aliased-view-for-monthly-billing\"\n where email not like '%@%'\n\n" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", + "changeType": "UPSERT", + "aspectName": "assertionRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198239, + "runId": "just-some-random-id", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:c456eccf6440c6e3388c584689a74d91", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:c456eccf6440c6e3388c584689a74d91", + "changeType": "UPSERT", + "aspectName": "assertionInfo", + "aspect": { + "json": { + "customProperties": { + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "type": "DATASET", + "datasetAssertion": { + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "scope": "DATASET_COLUMN", + "fields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),billing_month)" + ], + "aggregation": "IDENTITY", + "operator": "NOT_NULL", + "nativeType": "not_null_monthly_billing_with_cust_billing_month", + "nativeParameters": { + "column_name": "billing_month", + "model": "{{ get_where_subquery(ref('monthly_billing_with_cust')) }}" + } + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:c456eccf6440c6e3388c584689a74d91", + "changeType": "UPSERT", + "aspectName": "assertionRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198239, + "runId": "just-some-random-id", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:c456eccf6440c6e3388c584689a74d91", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f812b73477d81e6af283d918cb59e7bf", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f812b73477d81e6af283d918cb59e7bf", + "changeType": "UPSERT", + "aspectName": "assertionInfo", + "aspect": { + "json": { + "customProperties": { + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "type": "DATASET", + "datasetAssertion": { + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "scope": "DATASET_COLUMN", + "fields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),email)" + ], + "aggregation": "IDENTITY", + "operator": "NOT_NULL", + "nativeType": "not_null_monthly_billing_with_cust_email", + "nativeParameters": { + "column_name": "email", + "model": "{{ get_where_subquery(ref('monthly_billing_with_cust')) }}" + } + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f812b73477d81e6af283d918cb59e7bf", + "changeType": "UPSERT", + "aspectName": "assertionRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198239, + "runId": "just-some-random-id", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:f812b73477d81e6af283d918cb59e7bf", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "changeType": "UPSERT", + "aspectName": "assertionInfo", + "aspect": { + "json": { + "customProperties": { + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "type": "DATASET", + "datasetAssertion": { + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "scope": "DATASET_COLUMN", + "fields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" + ], + "aggregation": "IDENTITY", + "operator": "_NATIVE_", + "parameters": { + "value": { + "value": "null", + "type": "SET" + } + }, + "nativeType": "relationships_monthly_billing_with_cust_customer_id__customer_id__ref_customer_details_", + "nativeParameters": { + "column_name": "customer_id", + "field": "customer_id", + "model": "{{ get_where_subquery(ref('monthly_billing_with_cust')) }}", + "to": "ref('customer_details')" + }, + "logic": "monthly_billing_with_cust.customer_id referential integrity to customer_details.customer_id" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "changeType": "UPSERT", + "aspectName": "assertionRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198239, + "runId": "just-some-random-id", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "changeType": "UPSERT", + "aspectName": "assertionInfo", + "aspect": { + "json": { + "customProperties": { + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "type": "DATASET", + "datasetAssertion": { + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "scope": "DATASET_COLUMN", + "fields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" + ], + "aggregation": "IDENTITY", + "operator": "_NATIVE_", + "parameters": { + "value": { + "value": "null", + "type": "SET" + } + }, + "nativeType": "relationships_monthly_billing_with_cust_customer_id__customer_id__ref_customer_details_", + "nativeParameters": { + "column_name": "customer_id", + "field": "customer_id", + "model": "{{ get_where_subquery(ref('monthly_billing_with_cust')) }}", + "to": "ref('customer_details')" + }, + "logic": "monthly_billing_with_cust.customer_id referential integrity to customer_details.customer_id" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "changeType": "UPSERT", + "aspectName": "assertionRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198239, + "runId": "just-some-random-id", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", + "changeType": "UPSERT", + "aspectName": "assertionInfo", + "aspect": { + "json": { + "customProperties": { + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "type": "DATASET", + "datasetAssertion": { + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "scope": "DATASET_COLUMN", + "fields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD),actor_id)" + ], + "aggregation": "IDENTITY", + "operator": "NOT_NULL", + "nativeType": "source_not_null_pagila_actor_actor_id", + "nativeParameters": { + "column_name": "actor_id", + "model": "{{ get_where_subquery(source('pagila', 'actor')) }}" + } + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", + "changeType": "UPSERT", + "aspectName": "assertionRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198239, + "runId": "just-some-random-id", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:60ce4aad7ff6dbff7004da0f2258c9df", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:60ce4aad7ff6dbff7004da0f2258c9df", + "changeType": "UPSERT", + "aspectName": "assertionInfo", + "aspect": { + "json": { + "customProperties": { + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", + "manifest_version": "1.7.3", + "manifest_adapter": "postgres", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.7.3" + }, + "type": "DATASET", + "datasetAssertion": { + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "scope": "DATASET_COLUMN", + "fields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD),actor_id)" + ], + "aggregation": "UNIQUE_PROPOTION", + "operator": "EQUAL_TO", + "parameters": { + "value": { + "value": "1.0", + "type": "NUMBER" + } + }, + "nativeType": "source_unique_pagila_actor_actor_id", + "nativeParameters": { + "column_name": "actor_id", + "model": "{{ get_where_subquery(source('pagila', 'actor')) }}" + } + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:60ce4aad7ff6dbff7004da0f2258c9df", + "changeType": "UPSERT", + "aspectName": "assertionRunEvent", + "aspect": { + "json": { + "timestampMillis": 1663355198239, + "runId": "just-some-random-id", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.actor,PROD)", + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResults": {} + }, + "assertionUrn": "urn:li:assertion:60ce4aad7ff6dbff7004da0f2258c9df", + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:60ce4aad7ff6dbff7004da0f2258c9df", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:ba2c6ba830d407d539452f4cf46c92a6", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:c456eccf6440c6e3388c584689a74d91", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f812b73477d81e6af283d918cb59e7bf", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:36225e795a4597b2376996774a803b0d", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:57aa623f096cf3a28af70fe94b713907", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:a42a5b1bee156e45972e12d4156fb7a2", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:fc6268f0be68fd04c310705b65efd6fe", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:dbt:tag_from_dbt", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "dbt:tag_from_dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json index a9b7df7c2b2a1e..bec42f460e0b55 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json @@ -224,7 +224,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" ], @@ -236,7 +236,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),full_name)" ], @@ -248,7 +248,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),first_name)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.customer_snapshot,PROD),last_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),initial_full_name)" ], @@ -259,7 +259,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),email)" ], @@ -270,7 +270,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),address)" ], @@ -281,7 +281,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),city)" ], @@ -292,7 +292,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),postal_code)" ], @@ -303,7 +303,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),phone)" ], @@ -491,7 +491,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),BillingMonth)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),billing_month)" ], @@ -502,7 +502,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" ], @@ -513,7 +513,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.payments_by_customer_by_month,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),amount)" ], @@ -524,7 +524,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),email)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),email)" ], @@ -899,7 +899,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),amount)" ], @@ -915,7 +915,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),customer_id)" ], @@ -931,7 +931,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),payment_date)" ], @@ -947,7 +947,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),payment_id)" ], @@ -963,7 +963,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),rental_id)" ], @@ -979,7 +979,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an_aliased_view_for_payments,PROD),staff_id)" ], @@ -1285,7 +1285,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),BillingMonth)" ], @@ -1296,7 +1296,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),customer_id)" ], @@ -1307,7 +1307,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.an_aliased_view_for_payments,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payments_by_customer_by_month,PROD),amount)" ], @@ -1732,7 +1732,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),active)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),active)" ], @@ -1743,7 +1743,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),activebool)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),activebool)" ], @@ -1754,7 +1754,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),address_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),address_id)" ], @@ -1765,7 +1765,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),create_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),create_date)" ], @@ -1776,7 +1776,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),customer_id)" ], @@ -1787,7 +1787,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),email)" ], @@ -1798,7 +1798,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),first_name)" ], @@ -1809,7 +1809,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),last_name)" ], @@ -1820,7 +1820,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_update)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),last_update)" ], @@ -1831,7 +1831,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),store_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_snapshot,PROD),store_id)" ], diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json index 4863a1e4f73983..495fa32569f569 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_complex_owner_patterns_mces_golden.json @@ -235,7 +235,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),customer_id)" ], @@ -247,7 +247,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),full_name)" ], @@ -258,7 +258,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),email)" ], @@ -269,7 +269,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),address)" ], @@ -280,7 +280,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),city)" ], @@ -291,7 +291,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),postal_code)" ], @@ -302,7 +302,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),phone)" ], @@ -495,7 +495,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)" ], @@ -506,7 +506,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)" ], @@ -517,7 +517,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)" ], @@ -774,7 +774,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" ], @@ -790,7 +790,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" ], @@ -806,7 +806,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" ], @@ -822,7 +822,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)" ], @@ -838,7 +838,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)" ], @@ -854,7 +854,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)" ], @@ -1022,7 +1022,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" ], @@ -1033,7 +1033,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" ], @@ -1044,7 +1044,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" ], diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json index 7c61e9fcd0e3a1..20b7cf4a1c26ca 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_data_platform_instance_mces_golden.json @@ -237,7 +237,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),customer_id)" ], @@ -249,7 +249,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD),first_name)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD),last_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),full_name)" ], @@ -260,7 +260,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.customer,PROD),email)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),email)" ], @@ -271,7 +271,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.address,PROD),address)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),address)" ], @@ -282,7 +282,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.city,PROD),city)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),city)" ], @@ -293,7 +293,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.address,PROD),postal_code)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),postal_code)" ], @@ -304,7 +304,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.address,PROD),phone)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.customer_details,PROD),phone)" ], @@ -498,7 +498,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)" ], @@ -509,7 +509,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)" ], @@ -520,7 +520,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)" ], @@ -778,7 +778,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),amount)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" ], @@ -794,7 +794,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),customer_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" ], @@ -810,7 +810,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),payment_date)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" ], @@ -826,7 +826,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),payment_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),payment_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)" ], @@ -842,7 +842,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),rental_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),rental_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)" ], @@ -858,7 +858,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_05,PROD),staff_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.public.payment_p2020_06,PROD),staff_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)" ], @@ -1027,7 +1027,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" ], @@ -1038,7 +1038,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" ], @@ -1049,7 +1049,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,dbt-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" ], diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json index a2a8437d551ebf..80ca85a5e6c61b 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_non_incremental_lineage_mces_golden.json @@ -236,7 +236,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),customer_id)" ], @@ -248,7 +248,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),full_name)" ], @@ -259,7 +259,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),email)" ], @@ -270,7 +270,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),address)" ], @@ -281,7 +281,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),city)" ], @@ -292,7 +292,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),postal_code)" ], @@ -303,7 +303,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),phone)" ], @@ -496,7 +496,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)" ], @@ -507,7 +507,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)" ], @@ -518,7 +518,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)" ], @@ -775,7 +775,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" ], @@ -791,7 +791,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" ], @@ -807,7 +807,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" ], @@ -823,7 +823,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)" ], @@ -839,7 +839,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)" ], @@ -855,7 +855,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)" ], @@ -1023,7 +1023,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" ], @@ -1034,7 +1034,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" ], @@ -1045,7 +1045,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" ], diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json index c37f3847117f68..1e6e4d8ba94a2e 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_with_target_platform_instance_mces_golden.json @@ -236,7 +236,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),customer_id)" ], @@ -248,7 +248,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),first_name)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),last_name)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),full_name)" ], @@ -259,7 +259,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer,PROD),email)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),email)" ], @@ -270,7 +270,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),address)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),address)" ], @@ -281,7 +281,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.city,PROD),city)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),city)" ], @@ -292,7 +292,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),postal_code)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),postal_code)" ], @@ -303,7 +303,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.address,PROD),phone)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.customer_details,PROD),phone)" ], @@ -496,7 +496,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),billing_month)" ], @@ -507,7 +507,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),customer_id)" ], @@ -518,7 +518,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-monthly-billing,PROD),amount)" ], @@ -775,7 +775,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),amount)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" ], @@ -791,7 +791,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),customer_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" ], @@ -807,7 +807,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_date)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" ], @@ -823,7 +823,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),payment_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),payment_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_id)" ], @@ -839,7 +839,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),rental_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),rental_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),rental_id)" ], @@ -855,7 +855,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_05,PROD),staff_id)", "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.payment_p2020_06,PROD),staff_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.an-aliased-view-for-payments,PROD),staff_id)" ], @@ -1023,7 +1023,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),payment_date)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),billing_month)" ], @@ -1034,7 +1034,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),customer_id)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),customer_id)" ], @@ -1045,7 +1045,7 @@ "upstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,ps-instance-1.pagila.dbt_postgres.an-aliased-view-for-payments,PROD),amount)" ], - "downstreamType": "FIELD_SET", + "downstreamType": "FIELD", "downstreams": [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.dbt_postgres.payments_by_customer_by_month,PROD),amount)" ], diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index 941315fcfa9d5e..a46da9707679c7 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -216,6 +216,20 @@ def set_paths( run_results_files=["sample_dbt_run_results_2.json"], source_config_modifiers={}, ), + DbtTestConfig( + "dbt-prefer-sql-parser-lineage", + "dbt_test_prefer_sql_parser_lineage.json", + "dbt_test_prefer_sql_parser_lineage_golden.json", + catalog_file="sample_dbt_catalog_2.json", + manifest_file="sample_dbt_manifest_2.json", + sources_file="sample_dbt_sources_2.json", + run_results_files=["sample_dbt_run_results_2.json"], + source_config_modifiers={ + "prefer_sql_parser_lineage": True, + "skip_sources_in_lineage": True, + "entities_enabled": {"sources": "NO"}, + }, + ), ], ids=lambda dbt_test_config: dbt_test_config.run_id, ) diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 99387ab4e6ae48..48a6fd0f650685 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -226,6 +226,32 @@ def test_dbt_config_skip_sources_in_lineage(): assert config.skip_sources_in_lineage is True +def test_dbt_config_prefer_sql_parser_lineage(): + with pytest.raises( + ValidationError, + match="prefer_sql_parser_lineage.*requires.*skip_sources_in_lineage", + ): + config_dict = { + "manifest_path": "dummy_path", + "catalog_path": "dummy_path", + "target_platform": "dummy_platform", + "prefer_sql_parser_lineage": True, + } + config = DBTCoreConfig.parse_obj(config_dict) + + config_dict = { + "manifest_path": "dummy_path", + "catalog_path": "dummy_path", + "target_platform": "dummy_platform", + "skip_sources_in_lineage": True, + "entities_enabled": {"sources": "NO"}, + "prefer_sql_parser_lineage": True, + } + config = DBTCoreConfig.parse_obj(config_dict) + assert config.skip_sources_in_lineage is True + assert config.prefer_sql_parser_lineage is True + + def test_dbt_s3_config(): # test missing aws config config_dict: dict = { From 451220978588de62a04391221223bd1e6bf1d14d Mon Sep 17 00:00:00 2001 From: sid-acryl <155424659+sid-acryl@users.noreply.github.com> Date: Wed, 31 Jul 2024 22:06:25 +0530 Subject: [PATCH 05/10] fix(ingestion/lookml): enable stack-trace in lookml logs (#10971) --- .../source/looker/looker_file_loader.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py index bc069bd1e59ac4..fd670c23ad9cb0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_file_loader.py @@ -60,7 +60,12 @@ def _load_viewfile( with open(path) as file: raw_file_content = file.read() except Exception as e: - self.reporter.failure("Failed to read lkml file", path, exc=e) + self.reporter.failure( + title="LKML File Loading Error", + message="A lookml file is not present on local storage or GitHub", + context=f"file path: {path}", + exc=e, + ) self.viewfile_cache[path] = None return None try: @@ -86,8 +91,19 @@ def _load_viewfile( self.viewfile_cache[path] = looker_viewfile return looker_viewfile except Exception as e: - self.reporter.failure("Failed to parse lkml file", path, exc=e) + self.reporter.failure( + title="LKML File Parsing Error", + message="The input file is not lookml file", + context=f"file path: {path}", + exc=e, + ) + + logger.debug(f"Raw file content for path {path}") + + logger.debug(raw_file_content) + self.viewfile_cache[path] = None + return None def load_viewfile( From 7fc6556da664e8c406d6de55e83e1bf3053ae479 Mon Sep 17 00:00:00 2001 From: Raj Tekal Date: Wed, 31 Jul 2024 12:47:58 -0400 Subject: [PATCH 06/10] (chore): Linting fix (#11015) --- .../java/com/linkedin/metadata/service/RollbackService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java index 01af399c9b1155..403665120c6868 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/service/RollbackService.java @@ -173,7 +173,7 @@ public RollbackResponse rollbackIngestion( // Rollback timeseries aspects DeleteAspectValuesResult timeseriesRollbackResult = timeseriesAspectService.rollbackTimeseriesAspects(opContext, runId); - rowsDeletedFromEntityDeletion += timeseriesRollbackResult.getNumDocsDeleted(); + rowsDeletedFromEntityDeletion += timeseriesRollbackResult.getNumDocsDeleted().intValue(); log.info("finished deleting {} rows", deletedRows.size()); int aspectsReverted = deletedRows.size() + rowsDeletedFromEntityDeletion; From 347ac1aeb74c8aa6a4f2f609aaf7351fa0c2db5a Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 31 Jul 2024 22:19:39 +0530 Subject: [PATCH 07/10] chore(ci): update deprecated github actions (#10977) --- .github/workflows/airflow-plugin.yml | 4 ++-- .github/workflows/build-and-test.yml | 4 ++-- .github/workflows/dagster-plugin.yml | 4 ++-- .github/workflows/docker-unified.yml | 6 +++--- .github/workflows/metadata-ingestion.yml | 4 ++-- .github/workflows/metadata-io.yml | 4 ++-- .github/workflows/spark-smoke-test.yml | 4 ++-- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index d4f0a1369da253..114256ad825e56 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -74,7 +74,7 @@ jobs: - name: pip freeze show list installed if: always() run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && pip freeze - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: ${{ always() && matrix.python-version == '3.10' && matrix.extra_pip_requirements == 'apache-airflow>=2.7.0' }} with: name: Test Results (Airflow Plugin ${{ matrix.python-version}}) @@ -98,7 +98,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index c93267947b65a8..d2116fc2fca788 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -99,7 +99,7 @@ jobs: if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }} run: | ./gradlew -PjavaClassVersionDefault=8 :metadata-integration:java:spark-lineage:compileJava - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: always() with: name: Test Results (build) @@ -128,7 +128,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/dagster-plugin.yml b/.github/workflows/dagster-plugin.yml index 48f1b24196c9e0..381a01aca82c34 100644 --- a/.github/workflows/dagster-plugin.yml +++ b/.github/workflows/dagster-plugin.yml @@ -56,7 +56,7 @@ jobs: - name: pip freeze show list installed if: always() run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && pip freeze - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }} with: name: Test Results (dagster Plugin ${{ matrix.python-version}}) @@ -79,7 +79,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 9487e71e8da3d1..216f51e8ce970d 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -1024,18 +1024,18 @@ jobs: docker logs datahub-datahub-frontend-react-1 >& frontend-${{ matrix.test_strategy }}.log || true docker logs datahub-upgrade-1 >& upgrade-${{ matrix.test_strategy }}.log || true - name: Upload logs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() with: name: docker logs path: "*.log" - name: Upload screenshots - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() with: name: cypress-snapshots-${{ matrix.test_strategy }} path: smoke-test/tests/cypress/cypress/screenshots/ - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: always() with: name: Test Results (smoke tests) ${{ matrix.test_strategy }} diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 51b97552eb150a..ef84afd9c37793 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -83,7 +83,7 @@ jobs: df -hl docker image ls docker system df - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Test Results (metadata ingestion ${{ matrix.python-version }}) path: | @@ -106,7 +106,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 6797c7ad67c0b6..4b1e878ea25261 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -62,7 +62,7 @@ jobs: - name: Gradle build (and test) run: | ./gradlew :metadata-io:test - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: always() with: name: Test Results (metadata-io) @@ -78,7 +78,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Upload - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml index 8ffc8420ba9413..46f6e95454477f 100644 --- a/.github/workflows/spark-smoke-test.yml +++ b/.github/workflows/spark-smoke-test.yml @@ -69,14 +69,14 @@ jobs: docker logs elasticsearch >& elasticsearch-${{ matrix.test_strategy }}.log || true docker logs datahub-frontend-react >& frontend-${{ matrix.test_strategy }}.log || true - name: Upload logs - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() with: name: docker logs path: | "**/build/container-logs/*.log" "*.log" - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 if: always() with: name: Test Results (smoke tests) From 52c50f029a60b85b4fdb1bf15e83ecd6897e99d4 Mon Sep 17 00:00:00 2001 From: Steffen Grohsschmiedt Date: Wed, 31 Jul 2024 18:58:56 +0200 Subject: [PATCH 08/10] Fix ALB configuration example (#10981) --- docs/deploy/aws.md | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/docs/deploy/aws.md b/docs/deploy/aws.md index d1003077e24861..67dd9a734e67f5 100644 --- a/docs/deploy/aws.md +++ b/docs/deploy/aws.md @@ -76,7 +76,7 @@ First, if you did not use eksctl to setup the kubernetes cluster, make sure to g Download the IAM policy document for allowing the controller to make calls to AWS APIs on your behalf. ``` -curl -o iam_policy.json https://raw.githubusercontent.com/kubernetes-sigs/aws-load-balancer-controller/v2.2.0/docs/install/iam_policy.json +curl -o iam_policy.json https://raw.githubusercontent.com/kubernetes-sigs/aws-load-balancer-controller/main/docs/install/iam_policy.json ``` Create an IAM policy based on the policy document by running the following. @@ -148,13 +148,9 @@ datahub-frontend: alb.ingress.kubernetes.io/certificate-arn: <> alb.ingress.kubernetes.io/inbound-cidrs: 0.0.0.0/0 alb.ingress.kubernetes.io/listen-ports: '[{"HTTP": 80}, {"HTTPS":443}]' - alb.ingress.kubernetes.io/actions.ssl-redirect: '{"Type": "redirect", "RedirectConfig": { "Protocol": "HTTPS", "Port": "443", "StatusCode": "HTTP_301"}}' + alb.ingress.kubernetes.io/ssl-redirect: '443' hosts: - host: <> - redirectPaths: - - path: /* - name: ssl-redirect - port: use-annotation paths: - /* ``` From edc8fd409d18b45574ab4cc8ab49540a2709e563 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Wed, 31 Jul 2024 12:39:07 -0500 Subject: [PATCH 09/10] chore(ingestion-base): bump base image packages (#11053) --- .../base-requirements.txt | 179 +++++++++--------- docker/datahub-ingestion-base/build.gradle | 2 +- docker/datahub-ingestion/build.gradle | 2 +- docker/docker-compose-with-cassandra.yml | 6 +- docker/docker-compose-without-neo4j.yml | 6 +- docker/docker-compose.yml | 6 +- .../profiles/docker-compose.prerequisites.yml | 2 +- 7 files changed, 106 insertions(+), 97 deletions(-) diff --git a/docker/datahub-ingestion-base/base-requirements.txt b/docker/datahub-ingestion-base/base-requirements.txt index 2f2b64723f0edd..fa07b4184a6bc0 100644 --- a/docker/datahub-ingestion-base/base-requirements.txt +++ b/docker/datahub-ingestion-base/base-requirements.txt @@ -1,19 +1,20 @@ # Generated requirements file. Run ./regenerate-base-requirements.sh to regenerate. -acryl-datahub-classify==0.0.10 +acryl-datahub-classify==0.0.11 acryl-PyHive==0.6.16 -acryl-sqlglot==24.0.1.dev7 +acryl-sqlglot==25.3.1.dev3 aenum==3.1.15 -aiohttp==3.9.5 +aiohappyeyeballs==2.3.2 +aiohttp==3.10.0 aiosignal==1.3.1 -alembic==1.13.1 +alembic==1.13.2 altair==4.2.0 anyio==4.4.0 -apache-airflow==2.9.2 +apache-airflow==2.9.3 apache-airflow-providers-common-io==1.3.2 -apache-airflow-providers-common-sql==1.14.0 -apache-airflow-providers-fab==1.1.1 -apache-airflow-providers-ftp==3.9.1 -apache-airflow-providers-http==4.11.1 +apache-airflow-providers-common-sql==1.14.2 +apache-airflow-providers-fab==1.2.2 +apache-airflow-providers-ftp==3.10.0 +apache-airflow-providers-http==4.12.0 apache-airflow-providers-imap==3.6.1 apache-airflow-providers-smtp==1.7.1 apache-airflow-providers-sqlite==3.8.1 @@ -26,25 +27,30 @@ asgiref==3.8.1 asn1crypto==1.5.1 asttokens==2.4.1 async-timeout==4.0.3 -asynch==0.2.3 +asynch==0.2.4 attrs==23.2.0 avro==1.11.3 avro-gen3==0.7.13 +azure-common==1.1.28 +azure-core==1.29.4 +azure-identity==1.14.1 +azure-storage-blob==12.21.0 +azure-storage-file-datalake==12.16.0 Babel==2.15.0 backoff==2.2.1 beautifulsoup4==4.12.3 bleach==6.1.0 blinker==1.8.2 blis==0.7.11 -boto3==1.34.129 -botocore==1.34.129 +boto3==1.34.151 +botocore==1.34.151 bracex==2.4 cached-property==1.5.2 cachelib==0.9.0 -cachetools==5.3.3 +cachetools==5.4.0 catalogue==2.0.10 cattrs==23.2.3 -certifi==2024.6.2 +certifi==2024.7.4 cffi==1.16.0 chardet==5.2.0 charset-normalizer==3.3.2 @@ -55,25 +61,26 @@ click-spinner==0.1.10 clickclick==20.10.2 clickhouse-driver==0.2.8 clickhouse-sqlalchemy==0.2.4 +cloudpathlib==0.18.1 cloudpickle==3.0.0 colorama==0.4.6 colorlog==4.8.0 comm==0.2.2 confection==0.1.5 ConfigUpdater==3.2 -confluent-kafka==2.4.0 -connexion==2.14.1 +confluent-kafka==2.5.0 +connexion==2.14.2 cron-descriptor==1.4.3 -croniter==2.0.5 +croniter==3.0.3 cryptography==42.0.8 cx_Oracle==8.3.0 cymem==2.0.8 databricks-dbapi==0.6.0 -databricks-sdk==0.28.0 +databricks-sdk==0.29.0 databricks-sql-connector==2.9.6 dataflows-tabulator==1.54.3 db-dtypes==1.2.0 -debugpy==1.8.1 +debugpy==1.8.2 decorator==5.1.1 defusedxml==0.7.1 deltalake==0.17.4 @@ -84,42 +91,42 @@ docker==7.1.0 docutils==0.21.2 ecdsa==0.19.0 elasticsearch==7.13.4 -email_validator==2.1.2 +email_validator==2.2.0 entrypoints==0.4 et-xmlfile==1.1.0 -exceptiongroup==1.2.1 +exceptiongroup==1.2.2 executing==2.0.1 expandvars==0.12.0 -fastavro==1.9.4 +fastavro==1.9.5 fastjsonschema==2.20.0 -filelock==3.15.1 +filelock==3.15.4 Flask==2.2.5 flatdict==4.0.1 frozenlist==1.4.1 fsspec==2023.12.2 future==1.0.0 -GeoAlchemy2==0.15.1 +GeoAlchemy2==0.15.2 gitdb==4.0.11 GitPython==3.1.43 -google-api-core==2.19.0 -google-auth==2.30.0 -google-cloud-appengine-logging==1.4.3 +google-api-core==2.19.1 +google-auth==2.32.0 +google-cloud-appengine-logging==1.4.5 google-cloud-audit-log==0.2.5 -google-cloud-bigquery==3.24.0 +google-cloud-bigquery==3.25.0 google-cloud-core==2.4.1 -google-cloud-datacatalog==3.19.0 +google-cloud-datacatalog==3.20.0 google-cloud-datacatalog-lineage==0.2.2 google-cloud-logging==3.5.0 google-crc32c==1.5.0 -google-re2==1.1.20240601 +google-re2==1.1.20240702 google-resumable-media==2.7.1 -googleapis-common-protos==1.63.1 +googleapis-common-protos==1.63.2 gql==3.5.0 graphql-core==3.2.3 great-expectations==0.15.50 greenlet==3.0.3 -grpc-google-iam-v1==0.13.0 -grpcio==1.64.1 +grpc-google-iam-v1==0.13.1 +grpcio==1.65.2 grpcio-status==1.62.2 grpcio-tools==1.62.2 gssapi==1.8.3 @@ -130,7 +137,7 @@ httpx==0.27.0 humanfriendly==10.0 idna==3.7 ijson==3.3.0 -importlib_metadata==7.1.0 +importlib_metadata==7.2.1 importlib_resources==6.4.0 inflection==0.5.1 ipaddress==1.0.23 @@ -148,7 +155,7 @@ jsonlines==4.0.0 jsonpatch==1.33 jsonpointer==3.0.0 jsonref==1.1.0 -jsonschema==4.22.0 +jsonschema==4.23.0 jsonschema-specifications==2023.12.1 jupyter-server==1.16.0 jupyter_client==7.4.9 @@ -159,16 +166,16 @@ langcodes==3.4.0 language_data==1.2.0 lark==1.1.4 lazy-object-proxy==1.10.0 -leb128==1.0.7 -limits==3.12.0 +leb128==1.0.8 +limits==3.13.0 linear-tsv==1.1.0 linkify-it-py==2.0.3 -lkml==1.3.4 +lkml==1.3.5 lockfile==0.12.2 looker-sdk==23.0.0 lxml==5.2.2 lz4==4.3.3 -makefun==1.15.2 +makefun==1.15.4 Mako==1.3.5 marisa-trie==1.2.0 markdown-it-py==3.0.0 @@ -182,11 +189,12 @@ mdurl==0.1.2 methodtools==0.4.7 mistune==3.0.2 mixpanel==4.10.1 -mlflow-skinny==2.14.0 +mlflow-skinny==2.15.0 mmhash3==3.0.1 more-itertools==10.3.0 moto==4.2.14 msal==1.22.0 +msal-extensions==1.1.0 multidict==6.0.5 murmurhash==1.0.10 mypy-extensions==1.0.0 @@ -201,47 +209,46 @@ notebook_shim==0.2.4 numpy==1.26.4 oauthlib==3.2.2 okta==1.7.0 -openlineage-airflow==1.12.0 -openlineage-integration-common==1.12.0 -openlineage-python==1.12.0 -openlineage_sql==1.12.0 -openpyxl==3.1.4 -opentelemetry-api==1.25.0 -opentelemetry-exporter-otlp==1.25.0 -opentelemetry-exporter-otlp-proto-common==1.25.0 -opentelemetry-exporter-otlp-proto-grpc==1.25.0 -opentelemetry-exporter-otlp-proto-http==1.25.0 -opentelemetry-proto==1.25.0 -opentelemetry-sdk==1.25.0 -opentelemetry-semantic-conventions==0.46b0 +openlineage-airflow==1.18.0 +openlineage-integration-common==1.18.0 +openlineage-python==1.18.0 +openlineage_sql==1.18.0 +openpyxl==3.1.5 +opentelemetry-api==1.26.0 +opentelemetry-exporter-otlp==1.26.0 +opentelemetry-exporter-otlp-proto-common==1.26.0 +opentelemetry-exporter-otlp-proto-grpc==1.26.0 +opentelemetry-exporter-otlp-proto-http==1.26.0 +opentelemetry-proto==1.26.0 +opentelemetry-sdk==1.26.0 +opentelemetry-semantic-conventions==0.47b0 ordered-set==4.1.0 packaging==24.1 pandas==2.1.4 pandocfilters==1.5.1 parse==1.20.2 parso==0.8.4 -pathlib_abc==0.1.1 pathspec==0.12.1 -pathy==0.11.0 pendulum==3.0.0 pexpect==4.9.0 phonenumbers==8.13.0 platformdirs==4.2.2 pluggy==1.5.0 +portalocker==2.10.1 preshed==3.0.9 prison==0.2.1 progressbar2==4.4.2 prometheus_client==0.20.0 prompt_toolkit==3.0.47 -proto-plus==1.23.0 -protobuf==4.25.3 +proto-plus==1.24.0 +protobuf==4.25.4 psutil==6.0.0 psycopg2-binary==2.9.9 ptyprocess==0.7.0 -pure-eval==0.2.2 pure-sasl==0.6.2 +pure_eval==0.2.3 py-partiql-parser==0.5.0 -pyarrow==16.1.0 +pyarrow==17.0.0 pyarrow-hotfix==0.6 pyasn1==0.6.0 pyasn1_modules==0.4.0 @@ -249,16 +256,16 @@ pyathena==2.25.2 pycountry==24.6.1 pycparser==2.22 pycryptodome==3.20.0 -pydantic==1.10.16 -pydash==8.0.1 +pydantic==1.10.17 +pydash==8.0.3 pydruid==0.6.9 Pygments==2.18.0 pyiceberg==0.4.0 -pymongo==4.7.3 +pymongo==4.8.0 PyMySQL==1.1.1 -pyOpenSSL==24.1.0 +pyOpenSSL==24.2.1 pyparsing==3.0.9 -pyspnego==0.11.0 +pyspnego==0.11.1 python-daemon==3.0.1 python-dateutil==2.9.0.post0 python-jose==3.3.0 @@ -273,9 +280,9 @@ pytz==2024.1 PyYAML==6.0.1 pyzmq==26.0.3 redash-toolbelt==0.1.9 -redshift-connector==2.1.1 +redshift-connector==2.1.2 referencing==0.35.1 -regex==2024.5.15 +regex==2024.7.24 requests==2.32.3 requests-file==2.1.0 requests-gssapi==1.3.0 @@ -286,31 +293,32 @@ rfc3339-validator==0.1.4 rfc3986==2.0.0 rich==13.7.1 rich-argparse==1.5.2 -rpds-py==0.18.1 +rpds-py==0.19.1 rsa==4.9 rstr==3.2.2 ruamel.yaml==0.17.17 -s3transfer==0.10.1 +s3transfer==0.10.2 schwifty==2024.6.1 -scipy==1.13.1 +scipy==1.14.0 scramp==1.4.5 Send2Trash==1.8.3 -sentry-sdk==2.5.1 +sentry-sdk==2.12.0 setproctitle==1.3.3 +shellingham==1.5.4 simple-salesforce==1.12.6 six==1.16.0 slack-sdk==3.18.1 -smart-open==6.4.0 +smart-open==7.0.4 smmap==5.0.1 sniffio==1.3.1 -snowflake-connector-python==3.11.0 -snowflake-sqlalchemy==1.5.3 +snowflake-connector-python==3.12.0 +snowflake-sqlalchemy==1.6.1 sortedcontainers==2.4.0 soupsieve==2.5 -spacy==3.5.0 +spacy==3.7.5 spacy-legacy==3.0.12 spacy-loggers==1.0.5 -sql-metadata==2.2.2 +sql_metadata==2.12.0 SQLAlchemy==1.4.44 sqlalchemy-bigquery==1.11.0 sqlalchemy-cockroachdb==1.4.4 @@ -318,7 +326,7 @@ SQLAlchemy-JSONField==1.0.2 sqlalchemy-pytds==0.3.5 sqlalchemy-redshift==0.8.14 SQLAlchemy-Utils==0.41.2 -sqlglotrs==0.2.5 +sqlglotrs==0.2.7 sqllineage==1.3.8 sqlparse==0.4.4 srsly==2.4.8 @@ -327,25 +335,25 @@ strictyaml==1.7.3 tableauserverclient==0.25 tableschema==1.20.11 tabulate==0.9.0 -tenacity==8.4.1 -teradatasql==20.0.0.12 +tenacity==9.0.0 +teradatasql==20.0.0.14 teradatasqlalchemy==20.0.0.1 termcolor==2.4.0 terminado==0.18.1 text-unidecode==1.3 -thinc==8.1.12 +thinc==8.2.5 thrift==0.16.0 thrift-sasl==0.4.3 -time-machine==2.14.1 +time-machine==2.14.2 tinycss2==1.3.0 toml==0.10.2 -tomlkit==0.12.5 +tomlkit==0.13.0 toolz==0.12.1 tornado==6.4.1 tqdm==4.66.4 traitlets==5.2.1.post0 -trino==0.328.0 -typer==0.7.0 +trino==0.329.0 +typer==0.12.3 typing-inspect==0.9.0 typing_extensions==4.12.2 tzdata==2024.1 @@ -355,15 +363,16 @@ ujson==5.10.0 unicodecsv==0.14.1 universal_pathlib==0.2.2 urllib3==1.26.19 -vertica-python==1.3.8 +vertica-python==1.4.0 vertica-sqlalchemy-dialect==0.0.8.2 vininfo==1.8.0 wasabi==1.1.3 wcmatch==8.5.2 wcwidth==0.2.13 +weasel==0.4.1 webencodings==0.5.1 websocket-client==1.8.0 -Werkzeug==2.3.8 +Werkzeug==2.2.3 widgetsnbextension==4.0.11 wirerope==0.4.7 wrapt==1.16.0 diff --git a/docker/datahub-ingestion-base/build.gradle b/docker/datahub-ingestion-base/build.gradle index faa0589cfbfbbf..5652fedcd93b3b 100644 --- a/docker/datahub-ingestion-base/build.gradle +++ b/docker/datahub-ingestion-base/build.gradle @@ -12,7 +12,7 @@ ext { docker_target = project.getProperties().getOrDefault("dockerTarget", "slim") docker_version = "${version}${docker_target == 'slim' ? '-slim' : ''}" - revision = 3 // increment to trigger rebuild + revision = 4 // increment to trigger rebuild } docker { diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle index b9ab546674a031..6757be7cd6f221 100644 --- a/docker/datahub-ingestion/build.gradle +++ b/docker/datahub-ingestion/build.gradle @@ -12,7 +12,7 @@ ext { docker_target = project.getProperties().getOrDefault("dockerTarget", "slim") docker_version = "${version}${docker_target == 'slim' ? '-slim' : ''}" - revision = 3 // increment to trigger rebuild + revision = 4 // increment to trigger rebuild } dependencies { diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml index d722b07b9a7af4..de766f76cb626e 100644 --- a/docker/docker-compose-with-cassandra.yml +++ b/docker/docker-compose-with-cassandra.yml @@ -144,7 +144,7 @@ services: - neo4jdata:/data schema-registry: hostname: schema-registry - image: confluentinc/cp-schema-registry:7.4.0 + image: ${DATAHUB_CONFLUENT_SCHEMA_REGISTRY_IMAGE:-confluentinc/cp-schema-registry}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 env_file: schema-registry/env/docker.env @@ -159,7 +159,7 @@ services: condition: service_healthy broker: hostname: broker - image: confluentinc/cp-kafka:7.4.0 + image: ${DATAHUB_CONFLUENT_KAFKA_IMAGE:-confluentinc/cp-kafka}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - 29092:29092 - 9092:9092 @@ -177,7 +177,7 @@ services: - broker:/var/lib/kafka/data/ zookeeper: hostname: zookeeper - image: confluentinc/cp-zookeeper:7.4.0 + image: ${DATAHUB_CONFLUENT_ZOOKEEPER_IMAGE:-confluentinc/cp-zookeeper}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - 2181:2181 env_file: zookeeper/env/docker.env diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index eae36fb849fd5c..748a2cc9e04167 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -123,7 +123,7 @@ services: - esdata:/usr/share/elasticsearch/data schema-registry: hostname: schema-registry - image: confluentinc/cp-schema-registry:7.4.0 + image: ${DATAHUB_CONFLUENT_SCHEMA_REGISTRY_IMAGE:-confluentinc/cp-schema-registry}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 env_file: schema-registry/env/docker.env @@ -138,7 +138,7 @@ services: condition: service_healthy broker: hostname: broker - image: confluentinc/cp-kafka:7.4.0 + image: ${DATAHUB_CONFLUENT_KAFKA_IMAGE:-confluentinc/cp-kafka}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 env_file: broker/env/docker.env @@ -155,7 +155,7 @@ services: - broker:/var/lib/kafka/data/ zookeeper: hostname: zookeeper - image: confluentinc/cp-zookeeper:7.4.0 + image: ${DATAHUB_CONFLUENT_ZOOKEEPER_IMAGE:-confluentinc/cp-zookeeper}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 env_file: zookeeper/env/docker.env diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 96f37496859a46..ae55861580becd 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -143,7 +143,7 @@ services: - neo4jdata:/data schema-registry: hostname: schema-registry - image: confluentinc/cp-schema-registry:7.4.0 + image: ${DATAHUB_CONFLUENT_SCHEMA_REGISTRY_IMAGE:-confluentinc/cp-schema-registry}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 env_file: schema-registry/env/docker.env @@ -158,7 +158,7 @@ services: condition: service_healthy broker: hostname: broker - image: confluentinc/cp-kafka:7.4.0 + image: ${DATAHUB_CONFLUENT_KAFKA_IMAGE:-confluentinc/cp-kafka}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 env_file: broker/env/docker.env @@ -175,7 +175,7 @@ services: - broker:/var/lib/kafka/data/ zookeeper: hostname: zookeeper - image: confluentinc/cp-zookeeper:7.4.0 + image: ${DATAHUB_CONFLUENT_ZOOKEEPER_IMAGE:-confluentinc/cp-zookeeper}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} ports: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 env_file: zookeeper/env/docker.env diff --git a/docker/profiles/docker-compose.prerequisites.yml b/docker/profiles/docker-compose.prerequisites.yml index 08ebc8b65d8c9c..7cd9c9039539cc 100644 --- a/docker/profiles/docker-compose.prerequisites.yml +++ b/docker/profiles/docker-compose.prerequisites.yml @@ -210,7 +210,7 @@ services: - neo4jdata:/data kafka-broker: hostname: broker - image: confluentinc/cp-kafka:7.4.0 + image: ${DATAHUB_CONFLUENT_KAFKA_IMAGE:-confluentinc/cp-kafka}:${DATAHUB_CONFLUENT_VERSION:-7.4.0} command: - /bin/bash - -c From e14dc9159c67e65c02d317f736fe7a01eb50ffc4 Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Wed, 31 Jul 2024 18:45:29 +0100 Subject: [PATCH 10/10] feat(cli): Trim report of dataHubExecutionRequestResult to max GMS size (#11051) --- docs/how/updating-datahub.md | 3 ++- .../reporting/datahub_ingestion_run_summary_provider.py | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 2821b63e7d305a..08ababcb5cfce9 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -81,7 +81,8 @@ profiling: - #10498 - Tableau ingestion can now be configured to ingest multiple sites at once and add the sites as containers. The feature is currently only available for Tableau Server. - #10466 - Extends configuration in `~/.datahubenv` to match `DatahubClientConfig` object definition. See full configuration in https://datahubproject.io/docs/python-sdk/clients/. The CLI should now respect the updated configurations specified in `~/.datahubenv` across its functions and utilities. This means that for systems where ssl certification is disabled, setting `disable_ssl_verification: true` in `~./datahubenv` will apply to all CLI calls. - #11002 - We will not auto-generate a `~/.datahubenv` file. You must either run `datahub init` to create that file, or set environment variables so that the config is loaded. - +- #11023 - Added a new parameter to datahub's `put` cli command: `--run-id`. This parameter is useful to associate a given write to an ingestion process. A use-case can be mimick transformers when a transformer for aspect being written does not exist. +- #11051 - Ingestion reports will now trim the summary text to a maximum of 800k characters to avoid generating `dataHubExecutionRequestResult` that are too large for GMS to handle. ## 0.13.3 ### Breaking Changes diff --git a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py index 2245e27ecedabf..a175870cd9fbea 100644 --- a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +++ b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py @@ -31,6 +31,7 @@ from datahub.utilities.logging_manager import get_log_buffer from datahub.utilities.urns.urn import Urn + logger = logging.getLogger(__name__) @@ -43,6 +44,7 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener): _EXECUTOR_ID: str = "__datahub_cli_" _EXECUTION_REQUEST_SOURCE_TYPE: str = "CLI_INGESTION_SOURCE" _INGESTION_TASK_NAME: str = "CLI Ingestion" + _MAX_SUMMARY_SIZE: int = 800000 @staticmethod def get_cur_time_in_ms() -> int: @@ -209,7 +211,9 @@ def on_completion( status=status, startTimeMs=self.start_time_ms, durationMs=self.get_cur_time_in_ms() - self.start_time_ms, - report=summary, + # Truncate summary such that the generated MCP will not exceed GMS's payload limit. + # Hardcoding the overall size of dataHubExecutionRequestResult to >1MB by trimming summary to 800,000 chars + report=summary[-self._MAX_SUMMARY_SIZE:], structuredReport=structured_report, )