From 8479d2f6abdebb2df4f7de20256be554d1e0477e Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Mon, 26 Aug 2024 14:58:33 +0530 Subject: [PATCH 1/9] fix(analytics): index description so analytics are correct (#11224) --- .../ReindexDomainDescriptionConfig.java | 29 +++++++++++ .../ReindexDomainDescription.java | 49 +++++++++++++++++++ .../ReindexDomainDescriptionStep.java | 42 ++++++++++++++++ .../com/linkedin/domain/DomainProperties.pdl | 4 ++ .../src/main/resources/application.yaml | 5 ++ 5 files changed, 129 insertions(+) create mode 100644 datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDomainDescriptionConfig.java create mode 100644 datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescription.java create mode 100644 datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescriptionStep.java diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDomainDescriptionConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDomainDescriptionConfig.java new file mode 100644 index 00000000000000..3cdab0dc4d4bc6 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDomainDescriptionConfig.java @@ -0,0 +1,29 @@ +package com.linkedin.datahub.upgrade.config; + +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.datahub.upgrade.system.domaindescription.ReindexDomainDescription; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Conditional; +import org.springframework.context.annotation.Configuration; + +@Configuration +@Conditional(SystemUpdateCondition.NonBlockingSystemUpdateCondition.class) +public class ReindexDomainDescriptionConfig { + + @Bean + public NonBlockingSystemUpgrade reindexDomainDescription( + final OperationContext opContext, + final EntityService entityService, + final AspectDao aspectDao, + @Value("${systemUpdate.domainDescription.enabled}") final boolean enabled, + @Value("${systemUpdate.domainDescription.batchSize}") final Integer batchSize, + @Value("${systemUpdate.domainDescription.delayMs}") final Integer delayMs, + @Value("${systemUpdate.domainDescription.limit}") final Integer limit) { + return new ReindexDomainDescription( + opContext, entityService, aspectDao, enabled, batchSize, delayMs, limit); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescription.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescription.java new file mode 100644 index 00000000000000..85af912e24f68a --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescription.java @@ -0,0 +1,49 @@ +package com.linkedin.datahub.upgrade.system.domaindescription; + +import com.google.common.collect.ImmutableList; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +/** + * A job that reindexes all domain aspects as part of reindexing descriptions This is required to + * fix the analytics for domains + */ +@Slf4j +public class ReindexDomainDescription implements NonBlockingSystemUpgrade { + + private final List _steps; + + public ReindexDomainDescription( + @Nonnull OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + boolean enabled, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + if (enabled) { + _steps = + ImmutableList.of( + new ReindexDomainDescriptionStep( + opContext, entityService, aspectDao, batchSize, batchDelayMs, limit)); + } else { + _steps = ImmutableList.of(); + } + } + + @Override + public String id() { + return this.getClass().getName(); + } + + @Override + public List steps() { + return _steps; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescriptionStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescriptionStep.java new file mode 100644 index 00000000000000..1fa8bc92af078f --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescriptionStep.java @@ -0,0 +1,42 @@ +package com.linkedin.datahub.upgrade.system.domaindescription; + +import static com.linkedin.metadata.Constants.*; + +import com.linkedin.datahub.upgrade.system.AbstractMCLStep; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.Nullable; + +@Slf4j +public class ReindexDomainDescriptionStep extends AbstractMCLStep { + + public ReindexDomainDescriptionStep( + OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + super(opContext, entityService, aspectDao, batchSize, batchDelayMs, limit); + } + + @Override + public String id() { + return "domain-description-v1"; + } + + @Nonnull + @Override + protected String getAspectName() { + return DOMAIN_PROPERTIES_ASPECT_NAME; + } + + @Nullable + @Override + protected String getUrnLike() { + return "urn:li:" + DOMAIN_ENTITY_NAME + ":%"; + } +} diff --git a/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl index eb307b726855db..2d93d9399bf722 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl @@ -26,6 +26,10 @@ record DomainProperties includes CustomProperties { /** * Description of the Domain */ + @Searchable = { + "fieldType": "TEXT", + "hasValuesFieldName": "hasDescription" + } description: optional string /** diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 5b3673ddca52c6..3f1c6dd1a3d7d8 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -335,6 +335,11 @@ systemUpdate: batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_BATCH_SIZE:1000} delayMs: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_DELAY_MS:30000} limit: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_LIMIT:0} + domainDescription: + enabled: ${BOOTSTRAP_SYSTEM_UPDATE_DOMAIN_DESCRIPTION_ENABLED:true} + batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_DOMAIN_DESCRIPTION_BATCH_SIZE:1000} + delayMs: ${BOOTSTRAP_SYSTEM_UPDATE_DOMAIN_DESCRIPTION_DELAY_MS:30000} + limit: ${BOOTSTRAP_SYSTEM_UPDATE_DOMAIN_DESCRIPTION_CLL_LIMIT:0} browsePathsV2: enabled: ${BOOTSTRAP_SYSTEM_UPDATE_BROWSE_PATHS_V2_ENABLED:true} batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_BROWSE_PATHS_V2_BATCH_SIZE:5000} From 94e7706e3b949b4ca55b847c9cf0f42d085fff09 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 26 Aug 2024 09:44:08 -0500 Subject: [PATCH 2/9] chore(bump): bump azure-identity (#11235) Co-authored-by: Harshal Sheth --- metadata-ingestion/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 0b2b31adee10a0..e945cb38367df6 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -238,7 +238,7 @@ # Instead, we put the fix in our PyHive fork, so no thrift pin is needed. } -microsoft_common = {"msal==1.22.0"} +microsoft_common = {"msal>=1.22.0"} iceberg_common = { # Iceberg Python SDK @@ -271,7 +271,7 @@ abs_base = { "azure-core==1.29.4", - "azure-identity>=1.14.0", + "azure-identity>=1.17.1", "azure-storage-blob>=12.19.0", "azure-storage-file-datalake>=12.14.0", "more-itertools>=8.12.0", From 3db9d97dd9d031e5609315b112be69d9600fe07c Mon Sep 17 00:00:00 2001 From: Pedro Silva Date: Mon, 26 Aug 2024 17:23:40 +0100 Subject: [PATCH 3/9] feat(docs): Update docs on managing user subscriptions (#11243) --- docs/authorization/roles.md | 1 + docs/managed-datahub/subscription-and-notification.md | 3 +++ 2 files changed, 4 insertions(+) diff --git a/docs/authorization/roles.md b/docs/authorization/roles.md index a1719438d2941b..3e0666a7759c86 100644 --- a/docs/authorization/roles.md +++ b/docs/authorization/roles.md @@ -179,6 +179,7 @@ These privileges are only relevant to DataHub Cloud. | Manage Documentation Proposals | :heavy_check_mark: | :heavy_check_mark: | :x: | The ability to manage a proposal update an asset's documentation | | Manage Group Notification Settings | :heavy_check_mark: | :heavy_check_mark: | :x: | The ability to manage notification settings for a group. | | Manage Group Subscriptions | :heavy_check_mark: | :heavy_check_mark: | :x: | The ability to manage subscriptions for a group. | +| Manage User Subscriptions | :heavy_check_mark: | :x: | :x: | The ability to manage subscriptions for another user. | | Manage Data Contract Proposals | :heavy_check_mark: | :heavy_check_mark: | :x: | The ability to manage a proposal for a Data Contract | | Share Entity | :heavy_check_mark: | :heavy_check_mark: | :x: | The ability to share an entity with another DataHub Cloud instance. | diff --git a/docs/managed-datahub/subscription-and-notification.md b/docs/managed-datahub/subscription-and-notification.md index 5c2eaa17e5f2c9..c3c31d5fed7e61 100644 --- a/docs/managed-datahub/subscription-and-notification.md +++ b/docs/managed-datahub/subscription-and-notification.md @@ -27,6 +27,9 @@ If you want to create and manage group-level Subscriptions for your team, you wi - Manage Group Notification Settings - Manage Group Subscriptions +And to manage other user's subscriptions: +- Manage User Subscriptions + ## Using DataHub’s Subscriptions and Notifications Feature The first step is identifying the assets you want to subscribe to. From e2e6d105d3de217050cf9800fc3cc12682b5939c Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Mon, 26 Aug 2024 22:26:02 +0530 Subject: [PATCH 4/9] fix(cli/delete): change filter to include env (#11246) --- metadata-ingestion/src/datahub/ingestion/graph/filters.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/graph/filters.py b/metadata-ingestion/src/datahub/ingestion/graph/filters.py index 8974f159171d1e..edb45fa5c2dbc1 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/filters.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/filters.py @@ -110,6 +110,10 @@ def _get_env_filters(env: str) -> List[SearchFilterRule]: "field": "customProperties", "value": f"instance={env}", }, + { + "field": "env", + "value": env, + } # Note that not all entity types have an env (e.g. dashboards / charts). # If the env filter is specified, these will be excluded. ] From ca4c436f66d072491216f03303cab967e5f146c8 Mon Sep 17 00:00:00 2001 From: Davi Arnaut Date: Mon, 26 Aug 2024 09:57:59 -0700 Subject: [PATCH 5/9] Rephrase scope of automated scanning (#11248) --- docs/SECURITY_STANCE.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/SECURITY_STANCE.md b/docs/SECURITY_STANCE.md index 716bbaa06d8acf..a48244d741b70b 100644 --- a/docs/SECURITY_STANCE.md +++ b/docs/SECURITY_STANCE.md @@ -8,10 +8,11 @@ aspects of our approach to handling security vulnerabilities and keeping our com ## Our Track Record -We have a proactive approach to security. To date we've successfully resolved over 2,000 security related issues -flagged by automated scanners and reported by community members, demonstrating our commitment to maintaining a secure -platform. This is a testament to the collaborative efforts of our community in identifying and helping us address -potential vulnerabilities. It truly takes a village. +We have a proactive approach to security. To date we've successfully resolved many security related issues reported by +community members or flagged by automated scanners (which includes upstream dependencies and what known risks the +dependencies contain), demonstrating our commitment to maintaining a secure platform. This is a testament to the +collaborative efforts of our community in identifying and helping us address potential vulnerabilities. It truly takes +a village. ## Reporting Security Issues From 99824b4972e0d6a2a6a5167ad5a21eb4692fa5ec Mon Sep 17 00:00:00 2001 From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com> Date: Mon, 26 Aug 2024 11:18:53 -0700 Subject: [PATCH 6/9] docs(urn): Update urn docs with restrictions (#11213) Co-authored-by: John Joyce --- docs/what/urn.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/what/urn.md b/docs/what/urn.md index fbead110afe0ac..122b93038d9dec 100644 --- a/docs/what/urn.md +++ b/docs/what/urn.md @@ -32,3 +32,13 @@ urn:li:corpuser:jdoe urn:li:dataset:(urn:li:dataPlatform:kafka,PageViewEvent,PROD) urn:li:dataset:(urn:li:dataPlatform:hdfs,PageViewEvent,EI) ``` + +## Restrictions + +There are a few restrictions when creating an urn: + +1. Commas are reserved character in URN fields: `,` +2. Parentheses are reserved characters in URN fields: `( , )` +3. Colons are reserved characters in URN fields: `:` + +Please do not use these characters when creating or generating urns. From ce99bc4f22c2bbe4ecc5505946d1f2028f50cfbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20L=C3=BCdin?= <13187726+Masterchen09@users.noreply.github.com> Date: Mon, 26 Aug 2024 20:29:15 +0200 Subject: [PATCH 7/9] feat(ingest): add ingestion source for SAP Analytics Cloud (#10958) Co-authored-by: Harshal Sheth --- .../source/builder/RecipeForm/constants.ts | 39 +- .../ingest/source/builder/RecipeForm/sac.ts | 161 ++++ .../source/builder/SelectTemplateStep.tsx | 12 + .../app/ingest/source/builder/constants.ts | 4 + .../app/ingest/source/builder/sources.json | 8 + .../src/app/ingest/source/conf/sac/sac.ts | 26 + .../src/app/ingest/source/conf/sources.tsx | 2 + .../src/app/shared/getLogoFromPlatform.tsx | 23 - datahub-web-react/src/images/saclogo.svg | 19 + .../docs/sources/sac/sac_pre.md | 45 + .../docs/sources/sac/sac_recipe.yml | 40 + metadata-ingestion/setup.py | 9 + .../ingestion/source/common/subtypes.py | 7 + .../datahub/ingestion/source/sac/__init__.py | 0 .../src/datahub/ingestion/source/sac/sac.py | 775 ++++++++++++++++++ .../ingestion/source/sac/sac_common.py | 45 + .../src/datahub/utilities/logging_manager.py | 1 + .../tests/integration/sac/__init__.py | 0 .../tests/integration/sac/metadata.xml | 404 +++++++++ .../integration/sac/sac_mces_golden.json | 663 +++++++++++++++ .../tests/integration/sac/test_sac.py | 316 +++++++ .../main/resources/boot/data_platforms.json | 10 + .../cypress/e2e/mutations/ingestion_source.js | 2 +- .../cypress/e2e/mutations/managing_secrets.js | 4 +- 24 files changed, 2587 insertions(+), 28 deletions(-) create mode 100644 datahub-web-react/src/app/ingest/source/builder/RecipeForm/sac.ts create mode 100644 datahub-web-react/src/app/ingest/source/conf/sac/sac.ts delete mode 100644 datahub-web-react/src/app/shared/getLogoFromPlatform.tsx create mode 100644 datahub-web-react/src/images/saclogo.svg create mode 100644 metadata-ingestion/docs/sources/sac/sac_pre.md create mode 100644 metadata-ingestion/docs/sources/sac/sac_recipe.yml create mode 100644 metadata-ingestion/src/datahub/ingestion/source/sac/__init__.py create mode 100644 metadata-ingestion/src/datahub/ingestion/source/sac/sac.py create mode 100644 metadata-ingestion/src/datahub/ingestion/source/sac/sac_common.py create mode 100644 metadata-ingestion/tests/integration/sac/__init__.py create mode 100644 metadata-ingestion/tests/integration/sac/metadata.xml create mode 100644 metadata-ingestion/tests/integration/sac/sac_mces_golden.json create mode 100644 metadata-ingestion/tests/integration/sac/test_sac.py diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts index 6a5e6c9de2b96b..b6d076cf6da026 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts @@ -83,7 +83,7 @@ import { PROJECT_NAME, } from './lookml'; import { PRESTO, PRESTO_HOST_PORT, PRESTO_DATABASE, PRESTO_USERNAME, PRESTO_PASSWORD } from './presto'; -import { AZURE, BIGQUERY_BETA, CSV, DBT_CLOUD, MYSQL, OKTA, POWER_BI, UNITY_CATALOG, VERTICA } from '../constants'; +import { AZURE, BIGQUERY_BETA, CSV, DBT_CLOUD, MYSQL, OKTA, POWER_BI, SAC, UNITY_CATALOG, VERTICA } from '../constants'; import { BIGQUERY_BETA_PROJECT_ID, DATASET_ALLOW, DATASET_DENY, PROJECT_ALLOW, PROJECT_DENY } from './bigqueryBeta'; import { MYSQL_HOST_PORT, MYSQL_PASSWORD, MYSQL_USERNAME } from './mysql'; import { MSSQL, MSSQL_DATABASE, MSSQL_HOST_PORT, MSSQL_PASSWORD, MSSQL_USERNAME } from './mssql'; @@ -171,6 +171,20 @@ import { USER_ALLOW, USER_DENY, } from './azure'; +import { + SAC_TENANT_URL, + SAC_TOKEN_URL, + SAC_CLIENT_ID, + SAC_CLIENT_SECRET, + INGEST_STORIES, + INGEST_APPLICATIONS, + RESOURCE_ID_ALLOW, + RESOURCE_ID_DENY, + RESOURCE_NAME_ALLOW, + RESOURCE_NAME_DENY, + FOLDER_ALLOW, + FOLDER_DENY, +} from './sac'; export enum RecipeSections { Connection = 0, @@ -519,8 +533,29 @@ export const RECIPE_FIELDS: RecipeFields = { filterFields: [GROUP_ALLOW, GROUP_DENY, USER_ALLOW, USER_DENY], advancedFields: [AZURE_INGEST_USERS, AZURE_INGEST_GROUPS, STATEFUL_INGESTION_ENABLED, SKIP_USERS_WITHOUT_GROUP], }, + [SAC]: { + fields: [SAC_TENANT_URL, SAC_TOKEN_URL, SAC_CLIENT_ID, SAC_CLIENT_SECRET], + filterFields: [ + INGEST_STORIES, + INGEST_APPLICATIONS, + RESOURCE_ID_ALLOW, + RESOURCE_ID_DENY, + RESOURCE_NAME_ALLOW, + RESOURCE_NAME_DENY, + FOLDER_ALLOW, + FOLDER_DENY, + ], + advancedFields: [STATEFUL_INGESTION_ENABLED], + }, }; export const CONNECTORS_WITH_FORM = new Set(Object.keys(RECIPE_FIELDS)); -export const CONNECTORS_WITH_TEST_CONNECTION = new Set([SNOWFLAKE, LOOKER, BIGQUERY_BETA, BIGQUERY, UNITY_CATALOG]); +export const CONNECTORS_WITH_TEST_CONNECTION = new Set([ + SNOWFLAKE, + LOOKER, + BIGQUERY_BETA, + BIGQUERY, + UNITY_CATALOG, + SAC, +]); diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/sac.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/sac.ts new file mode 100644 index 00000000000000..3f5c6d0b09a33e --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/sac.ts @@ -0,0 +1,161 @@ +import { RecipeField, FieldType, setListValuesOnRecipe } from './common'; + +export const SAC_TENANT_URL: RecipeField = { + name: 'tenant_url', + label: 'Tenant URL', + tooltip: 'The URL of the SAP Analytics Cloud tenant.', + type: FieldType.TEXT, + fieldPath: 'source.config.tenant_url', + placeholder: 'https://company.eu10.sapanalytics.cloud', + required: true, + rules: null, +}; + +export const SAC_TOKEN_URL: RecipeField = { + name: 'token_url', + label: 'Token URL', + tooltip: 'The OAuth 2.0 Token Service URL.', + type: FieldType.TEXT, + fieldPath: 'source.config.token_url', + placeholder: 'https://company.eu10.hana.ondemand.com/oauth/token', + required: true, + rules: null, +}; + +export const SAC_CLIENT_ID: RecipeField = { + name: 'client_id', + label: 'Client ID', + tooltip: 'Client ID.', + type: FieldType.SECRET, + fieldPath: 'source.config.client_id', + placeholder: 'client_id', + required: true, + rules: null, +}; + +export const SAC_CLIENT_SECRET: RecipeField = { + name: 'client_secret', + label: 'Client Secret', + tooltip: 'Client Secret.', + type: FieldType.SECRET, + fieldPath: 'source.config.client_secret', + placeholder: 'client_secret', + required: true, + rules: null, +}; + +export const INGEST_STORIES: RecipeField = { + name: 'ingest_stories', + label: 'Ingest Stories', + tooltip: 'Whether stories should be ingested into DataHub.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.ingest_stories', + rules: null, + section: 'Stories and Applications', +}; + +export const INGEST_APPLICATIONS: RecipeField = { + name: 'ingest_applications', + label: 'Ingest Applications', + tooltip: 'Whether applications should be ingested into DataHub.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.ingest_applications', + rules: null, + section: 'Stories and Applications', +}; + +const resourceIdAllowFieldPath = 'source.config.resource_id_pattern.allow'; +export const RESOURCE_ID_ALLOW: RecipeField = { + name: 'resource_id_pattern.allow', + label: 'Resource Id Allow Patterns', + tooltip: + 'Only include specific Stories and Applications by providing the id of the ressource, or a Regular Expression (REGEX). If not provided, all Stories and Applications will be included.', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: resourceIdAllowFieldPath, + rules: null, + section: 'Stories and Applications', + placeholder: 'LXTH4JCE36EOYLU41PIINLYPU9XRYM26', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, resourceIdAllowFieldPath), +}; + +const resourceIdDenyFieldPath = 'source.config.resource_id_pattern.deny'; +export const RESOURCE_ID_DENY: RecipeField = { + name: 'resource_id_pattern.deny', + label: 'Resource Id Deny Patterns', + tooltip: + 'Exclude specific Stories and Applications by providing the id of the resource, or a Regular Expression (REGEX). If not provided, all Stories and Applications will be included. Deny patterns always take precendence over Allow patterns.', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: resourceIdDenyFieldPath, + rules: null, + section: 'Stories and Applications', + placeholder: 'LXTH4JCE36EOYLU41PIINLYPU9XRYM26', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, resourceIdDenyFieldPath), +}; + +const resourceNameAllowFieldPath = 'source.config.resource_id_pattern.allow'; +export const RESOURCE_NAME_ALLOW: RecipeField = { + name: 'resource_name_pattern.allow', + label: 'Resource Name Allow Patterns', + tooltip: + 'Only include specific Stories and Applications by providing the name of the ressource, or a Regular Expression (REGEX). If not provided, all Stories and Applications will be included.', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: resourceNameAllowFieldPath, + rules: null, + section: 'Stories and Applications', + placeholder: 'Name of the story', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, resourceNameAllowFieldPath), +}; + +const resourceNameDenyFieldPath = 'source.config.resource_name_pattern.deny'; +export const RESOURCE_NAME_DENY: RecipeField = { + name: 'resource_name_pattern.deny', + label: 'Resource Name Deny Patterns', + tooltip: + 'Exclude specific Stories and Applications by providing the name of the resource, or a Regular Expression (REGEX). If not provided, all Stories and Applications will be included. Deny patterns always take precendence over Allow patterns.', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: resourceNameDenyFieldPath, + rules: null, + section: 'Stories and Applications', + placeholder: 'Name of the story', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, resourceNameDenyFieldPath), +}; + +const folderAllowFieldPath = 'source.config.resource_id_pattern.allow'; +export const FOLDER_ALLOW: RecipeField = { + name: 'folder_pattern.allow', + label: 'Folder Allow Patterns', + tooltip: + 'Only include specific Stories and Applications by providing the folder containing the resources, or a Regular Expression (REGEX). If not provided, all Stories and Applications will be included.', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: folderAllowFieldPath, + rules: null, + section: 'Stories and Applications', + placeholder: 'Folder of the story', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, folderAllowFieldPath), +}; + +const folderDenyFieldPath = 'source.config.folder_pattern.deny'; +export const FOLDER_DENY: RecipeField = { + name: 'folder_pattern.deny', + label: 'Folder Deny Patterns', + tooltip: + 'Exclude specific Stories and Applications by providing the folder containing the resources, or a Regular Expression (REGEX). If not provided, all Stories and Applications will be included. Deny patterns always take precendence over Allow patterns.', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: folderDenyFieldPath, + rules: null, + section: 'Stories and Applications', + placeholder: 'Folder of the story', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, folderDenyFieldPath), +}; diff --git a/datahub-web-react/src/app/ingest/source/builder/SelectTemplateStep.tsx b/datahub-web-react/src/app/ingest/source/builder/SelectTemplateStep.tsx index 3998915e07a2ce..e014cdcc8e2240 100644 --- a/datahub-web-react/src/app/ingest/source/builder/SelectTemplateStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/SelectTemplateStep.tsx @@ -104,6 +104,18 @@ export const SelectTemplateStep = ({ state, updateState, goTo, cancel, ingestion source.name.toLocaleLowerCase().includes(searchFilter.toLocaleLowerCase()), ); + filteredSources.sort((a, b) => { + if (a.name === 'custom') { + return 1; + } + + if (b.name === 'custom') { + return -1; + } + + return a.displayName.localeCompare(b.displayName); + }); + return (
diff --git a/datahub-web-react/src/app/ingest/source/builder/constants.ts b/datahub-web-react/src/app/ingest/source/builder/constants.ts index d90faa91b85a26..b67ca388c10546 100644 --- a/datahub-web-react/src/app/ingest/source/builder/constants.ts +++ b/datahub-web-react/src/app/ingest/source/builder/constants.ts @@ -34,6 +34,7 @@ import fivetranLogo from '../../../../images/fivetranlogo.png'; import csvLogo from '../../../../images/csv-logo.png'; import qlikLogo from '../../../../images/qliklogo.png'; import sigmaLogo from '../../../../images/sigmalogo.png'; +import sacLogo from '../../../../images/saclogo.svg'; export const ATHENA = 'athena'; export const ATHENA_URN = `urn:li:dataPlatform:${ATHENA}`; @@ -122,6 +123,8 @@ export const QLIK_SENSE = 'qlik-sense'; export const QLIK_SENSE_URN = `urn:li:dataPlatform:${QLIK_SENSE}`; export const SIGMA = 'sigma'; export const SIGMA_URN = `urn:li:dataPlatform:${SIGMA}`; +export const SAC = 'sac'; +export const SAC_URN = `urn:li:dataPlatform:${SAC}`; export const PLATFORM_URN_TO_LOGO = { [ATHENA_URN]: athenaLogo, @@ -161,6 +164,7 @@ export const PLATFORM_URN_TO_LOGO = { [CSV_URN]: csvLogo, [QLIK_SENSE_URN]: qlikLogo, [SIGMA_URN]: sigmaLogo, + [SAC_URN]: sacLogo, }; export const SOURCE_TO_PLATFORM_URN = { diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json index c35a7a033a8ab3..bb1c1a10ea6e5f 100644 --- a/datahub-web-react/src/app/ingest/source/builder/sources.json +++ b/datahub-web-react/src/app/ingest/source/builder/sources.json @@ -287,6 +287,14 @@ "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/csv'", "recipe": "source: \n type: csv-enricher \n config: \n # URL of your csv file to ingest \n filename: \n array_delimiter: '|' \n delimiter: ',' \n write_semantics: PATCH" }, + { + "urn": "urn:li:dataPlatform:sac", + "name": "sac", + "displayName": "SAP Analytics Cloud", + "description": "Import Stories, Applications and Models from SAP Analytics Cloud.", + "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/sac/", + "recipe": "source:\n type: sac\n config:\n tenant_url: # Your SAP Analytics Cloud tenant URL, e.g. https://company.eu10.sapanalytics.cloud or https://company.eu10.hcs.cloud.sap\n token_url: # The Token URL of your SAP Analytics Cloud tenant, e.g. https://company.eu10.hana.ondemand.com/oauth/token.\n\n # Add secret in Secrets Tab with relevant names for each variable\n client_id: \"${SAC_CLIENT_ID}\" # Your SAP Analytics Cloud client id\n client_secret: \"${SAC_CLIENT_SECRET}\" # Your SAP Analytics Cloud client secret" + }, { "urn": "urn:li:dataPlatform:custom", "name": "custom", diff --git a/datahub-web-react/src/app/ingest/source/conf/sac/sac.ts b/datahub-web-react/src/app/ingest/source/conf/sac/sac.ts new file mode 100644 index 00000000000000..e8a3b1f67866af --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/conf/sac/sac.ts @@ -0,0 +1,26 @@ +import { SourceConfig } from '../types'; +import sacLogo from '../../../../../images/saclogo.svg'; + +const placeholderRecipe = `\ +source: + type: sac + config: + tenant_url: # Your SAP Analytics Cloud tenant URL, e.g. https://company.eu10.sapanalytics.cloud or https://company.eu10.hcs.cloud.sap + token_url: # The Token URL of your SAP Analytics Cloud tenant, e.g. https://company.eu10.hana.ondemand.com/oauth/token. + + # Add secret in Secrets Tab with relevant names for each variable + client_id: "\${SAC_CLIENT_ID}" # Your SAP Analytics Cloud client id + client_secret: "\${SAC_CLIENT_SECRET}" # Your SAP Analytics Cloud client secret +`; + +export const SAC = 'sac'; + +const sacConfig: SourceConfig = { + type: SAC, + placeholderRecipe, + displayName: 'SAP Analytics Cloud', + docsUrl: 'https://datahubproject.io/docs/generated/ingestion/sources/sac/', + logoUrl: sacLogo, +}; + +export default sacConfig; diff --git a/datahub-web-react/src/app/ingest/source/conf/sources.tsx b/datahub-web-react/src/app/ingest/source/conf/sources.tsx index 4dbeeb5c975e9d..66644cd14ddd5e 100644 --- a/datahub-web-react/src/app/ingest/source/conf/sources.tsx +++ b/datahub-web-react/src/app/ingest/source/conf/sources.tsx @@ -17,6 +17,7 @@ import hiveConfig from './hive/hive'; import oracleConfig from './oracle/oracle'; import tableauConfig from './tableau/tableau'; import csvConfig from './csv/csv'; +import sacConfig from './sac/sac'; const baseUrl = window.location.origin; @@ -48,6 +49,7 @@ export const SOURCE_TEMPLATE_CONFIGS: Array = [ oracleConfig, hiveConfig, csvConfig, + sacConfig, { type: 'custom', placeholderRecipe: DEFAULT_PLACEHOLDER_RECIPE, diff --git a/datahub-web-react/src/app/shared/getLogoFromPlatform.tsx b/datahub-web-react/src/app/shared/getLogoFromPlatform.tsx deleted file mode 100644 index f32ab8b7dbca91..00000000000000 --- a/datahub-web-react/src/app/shared/getLogoFromPlatform.tsx +++ /dev/null @@ -1,23 +0,0 @@ -import lookerLogo from '../../images/lookerlogo.png'; -import supersetLogo from '../../images/supersetlogo.png'; -import airflowLogo from '../../images/airflowlogo.png'; -import redashLogo from '../../images/redashlogo.png'; - -/** - * TODO: This is a temporary solution, until the backend can push logos for all data platform types. - */ -export function getLogoFromPlatform(platform: string) { - if (platform.toLowerCase() === 'looker') { - return lookerLogo; - } - if (platform.toLowerCase() === 'superset') { - return supersetLogo; - } - if (platform.toLowerCase() === 'airflow') { - return airflowLogo; - } - if (platform.toLowerCase() === 'redash') { - return redashLogo; - } - return undefined; -} diff --git a/datahub-web-react/src/images/saclogo.svg b/datahub-web-react/src/images/saclogo.svg new file mode 100644 index 00000000000000..91bacb51f92a49 --- /dev/null +++ b/datahub-web-react/src/images/saclogo.svg @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/metadata-ingestion/docs/sources/sac/sac_pre.md b/metadata-ingestion/docs/sources/sac/sac_pre.md new file mode 100644 index 00000000000000..c62cd81fa27534 --- /dev/null +++ b/metadata-ingestion/docs/sources/sac/sac_pre.md @@ -0,0 +1,45 @@ +## Configuration Notes + +1. Refer to [Manage OAuth Clients](https://help.sap.com/docs/SAP_ANALYTICS_CLOUD/00f68c2e08b941f081002fd3691d86a7/4f43b54398fc4acaa5efa32badfe3df6.html) to create an OAuth client in SAP Analytics Cloud. The OAuth client is required to have the following properties: + + - Purpose: API Access + - Access: + - Data Import Service + - Authorization Grant: Client Credentials + +2. Maintain connection mappings (optional): + +To map individual connections in SAP Analytics Cloud to platforms, platform instances and environments, the `connection_mapping` configuration can be used within the recipe: + +```yaml +connection_mapping: + MY_BW_CONNECTION: + platform: bw + platform_instance: PROD_BW + env: PROD + MY_HANA_CONNECTION: + platform: hana + platform_instance: PROD_HANA + env: PROD +``` + +The key in the connection mapping dictionary represents the name of the connection created in SAP Analytics Cloud. + +## Concept mapping + +| SAP Analytics Cloud | DataHub | +|-----------------------|---------------------| +| `Story` | `Dashboard` | +| `Application` | `Dashboard` | +| `Live Data Model` | `Dataset` | +| `Import Data Model` | `Dataset` | +| `Model` | `Dataset` | + +## Limitations + +- Only models which are used in a Story or an Application will be ingested because there is no dedicated API to retrieve models (only for Stories and Applications). +- Browse Paths for models cannot be created because the folder where the models are saved is not returned by the API. +- Schema metadata is only ingested for Import Data Models because there is no possibility to get the schema metadata of the other model types. +- Lineages for Import Data Models cannot be ingested because the API is not providing any information about it. +- Currently, only SAP BW and SAP HANA are supported for ingesting the upstream lineages of Live Data Models - a warning is logged for all other connection types, please feel free to open an [issue on GitHub](https://github.com/datahub-project/datahub/issues/new/choose) with the warning message to have this fixed. +- For some models (e.g., builtin models) it cannot be detected whether the models are Live Data or Import Data Models. Therefore, these models will be ingested only with the `Story` subtype. diff --git a/metadata-ingestion/docs/sources/sac/sac_recipe.yml b/metadata-ingestion/docs/sources/sac/sac_recipe.yml new file mode 100644 index 00000000000000..e2067d815e41dd --- /dev/null +++ b/metadata-ingestion/docs/sources/sac/sac_recipe.yml @@ -0,0 +1,40 @@ +source: + type: sac + config: + stateful_ingestion: + enabled: true + + tenant_url: # Your SAP Analytics Cloud tenant URL, e.g. https://company.eu10.sapanalytics.cloud or https://company.eu10.hcs.cloud.sap + token_url: # The Token URL of your SAP Analytics Cloud tenant, e.g. https://company.eu10.hana.ondemand.com/oauth/token. + + # Add secret in Secrets Tab with relevant names for each variable + client_id: "${SAC_CLIENT_ID}" # Your SAP Analytics Cloud client id + client_secret: "${SAC_CLIENT_SECRET}" # Your SAP Analytics Cloud client secret + + # ingest stories + ingest_stories: true + + # ingest applications + ingest_applications: true + + resource_id_pattern: + allow: + - .* + + resource_name_pattern: + allow: + - .* + + folder_pattern: + allow: + - .* + + connection_mapping: + MY_BW_CONNECTION: + platform: bw + platform_instance: PROD_BW + env: PROD + MY_HANA_CONNECTION: + platform: hana + platform_instance: PROD_HANA + env: PROD diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index e945cb38367df6..301e571c96accc 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -314,6 +314,12 @@ mysql = sql_common | {"pymysql>=1.0.2"} +sac = { + "requests", + "pyodata>=1.11.1", + "Authlib", +} + # Note: for all of these, framework_common will be added. plugins: Dict[str, Set[str]] = { # Sink plugins. @@ -480,6 +486,7 @@ "fivetran": snowflake_common | bigquery_common | sqlglot_lib, "qlik-sense": sqlglot_lib | {"requests", "websocket-client"}, "sigma": sqlglot_lib | {"requests"}, + "sac": sac, } # This is mainly used to exclude plugins from the Docker image. @@ -620,6 +627,7 @@ "kafka-connect", "qlik-sense", "sigma", + "sac", ] if plugin for dependency in plugins[plugin] @@ -735,6 +743,7 @@ "fivetran = datahub.ingestion.source.fivetran.fivetran:FivetranSource", "qlik-sense = datahub.ingestion.source.qlik_sense.qlik_sense:QlikSenseSource", "sigma = datahub.ingestion.source.sigma.sigma:SigmaSource", + "sac = datahub.ingestion.source.sac.sac:SACSource", ], "datahub.ingestion.transformer.plugins": [ "pattern_cleanup_ownership = datahub.ingestion.transformer.pattern_cleanup_ownership:PatternCleanUpOwnership", diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py index 0d9fc8225532c9..4d335779fe49b0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py @@ -18,6 +18,9 @@ class DatasetSubTypes(str, Enum): QLIK_DATASET = "Qlik Dataset" BIGQUERY_TABLE_SNAPSHOT = "Bigquery Table Snapshot" SIGMA_DATASET = "Sigma Dataset" + SAC_MODEL = "Model" + SAC_IMPORT_DATA_MODEL = "Import Data Model" + SAC_LIVE_DATA_MODEL = "Live Data Model" # TODO: Create separate entity... NOTEBOOK = "Notebook" @@ -71,3 +74,7 @@ class BIAssetSubTypes(str, Enum): MODE_REPORT = "Report" MODE_QUERY = "Query" MODE_CHART = "Chart" + + # SAP Analytics Cloud + SAC_STORY = "Story" + SAC_APPLICATION = "Application" diff --git a/metadata-ingestion/src/datahub/ingestion/source/sac/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/sac/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py b/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py new file mode 100644 index 00000000000000..88cb1f821ff0d6 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/sac/sac.py @@ -0,0 +1,775 @@ +import json +import logging +from dataclasses import dataclass +from functools import partial +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple + +import pyodata +import pyodata.v2.model +import pyodata.v2.service +from authlib.integrations.requests_client import OAuth2Session +from pydantic import Field, SecretStr, validator +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +from datahub.configuration.common import AllowDenyPattern +from datahub.configuration.source_common import ( + DEFAULT_ENV, + DatasetSourceConfigMixin, + EnvConfigMixin, +) +from datahub.emitter.mce_builder import ( + dataset_urn_to_key, + make_dashboard_urn, + make_data_platform_urn, + make_dataplatform_instance_urn, + make_dataset_urn_with_platform_instance, + make_user_urn, +) +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.decorators import ( + SupportStatus, + capability, + config_class, + platform_name, + support_status, +) +from datahub.ingestion.api.incremental_lineage_helper import ( + IncrementalLineageConfigMixin, + auto_incremental_lineage, +) +from datahub.ingestion.api.source import ( + CapabilityReport, + MetadataWorkUnitProcessor, + SourceCapability, + TestableSource, + TestConnectionReport, +) +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.common.subtypes import BIAssetSubTypes, DatasetSubTypes +from datahub.ingestion.source.sac.sac_common import ( + ImportDataModelColumn, + Resource, + ResourceModel, +) +from datahub.ingestion.source.state.stale_entity_removal_handler import ( + StaleEntityRemovalHandler, + StaleEntityRemovalSourceReport, + StatefulStaleMetadataRemovalConfig, +) +from datahub.ingestion.source.state.stateful_ingestion_base import ( + StatefulIngestionConfigBase, + StatefulIngestionSourceBase, +) +from datahub.metadata.schema_classes import ( + AuditStampClass, + BrowsePathEntryClass, + BrowsePathsClass, + BrowsePathsV2Class, + ChangeAuditStampsClass, + DashboardInfoClass, + DataPlatformInstanceClass, + DatasetLineageTypeClass, + DatasetPropertiesClass, + DateTypeClass, + NullTypeClass, + NumberTypeClass, + SchemaFieldClass, + SchemaFieldDataTypeClass, + SchemalessClass, + SchemaMetadataClass, + StatusClass, + StringTypeClass, + SubTypesClass, + UpstreamClass, + UpstreamLineageClass, +) +from datahub.utilities import config_clean + +logger = logging.getLogger(__name__) + + +class ConnectionMappingConfig(EnvConfigMixin): + platform: Optional[str] = Field( + default=None, description="The platform that this connection mapping belongs to" + ) + + platform_instance: Optional[str] = Field( + default=None, + description="The instance of the platform that this connection mapping belongs to", + ) + + env: str = Field( + default=DEFAULT_ENV, + description="The environment that this connection mapping belongs to", + ) + + +class SACSourceConfig( + StatefulIngestionConfigBase, DatasetSourceConfigMixin, IncrementalLineageConfigMixin +): + stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field( + default=None, + description="Stateful ingestion related configs", + ) + + tenant_url: str = Field(description="URL of the SAP Analytics Cloud tenant") + token_url: str = Field( + description="URL of the OAuth token endpoint of the SAP Analytics Cloud tenant" + ) + client_id: str = Field(description="Client ID for the OAuth authentication") + client_secret: SecretStr = Field( + description="Client secret for the OAuth authentication" + ) + + ingest_stories: bool = Field( + default=True, + description="Controls whether Stories should be ingested", + ) + + ingest_applications: bool = Field( + default=True, + description="Controls whether Analytic Applications should be ingested", + ) + + ingest_import_data_model_schema_metadata: bool = Field( + default=True, + description="Controls whether schema metadata of Import Data Models should be ingested (ingesting schema metadata of Import Data Models significantly increases overall ingestion time)", + ) + + resource_id_pattern: AllowDenyPattern = Field( + AllowDenyPattern.allow_all(), + description="Patterns for selecting resource ids that are to be included", + ) + + resource_name_pattern: AllowDenyPattern = Field( + AllowDenyPattern.allow_all(), + description="Patterns for selecting resource names that are to be included", + ) + + folder_pattern: AllowDenyPattern = Field( + AllowDenyPattern.allow_all(), + description="Patterns for selecting folders that are to be included", + ) + + connection_mapping: Dict[str, ConnectionMappingConfig] = Field( + default={}, description="Custom mappings for connections" + ) + + query_name_template: Optional[str] = Field( + default="QUERY/{name}", + description="Template for generating dataset urns of consumed queries, the placeholder {query} can be used within the template for inserting the name of the query", + ) + + @validator("tenant_url", "token_url") + def remove_trailing_slash(cls, v): + return config_clean.remove_trailing_slashes(v) + + +@dataclass +class SACSourceReport(StaleEntityRemovalSourceReport): + pass + + +@platform_name("SAP Analytics Cloud", id="sac") +@config_class(SACSourceConfig) +@support_status(SupportStatus.TESTING) +@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") +@capability(SourceCapability.DESCRIPTIONS, "Enabled by default") +@capability( + SourceCapability.LINEAGE_COARSE, + "Enabled by default (only for Live Data Models)", +) +@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion") +@capability( + SourceCapability.SCHEMA_METADATA, + "Enabled by default (only for Import Data Models)", +) +class SACSource(StatefulIngestionSourceBase, TestableSource): + config: SACSourceConfig + report: SACSourceReport + platform = "sac" + + session: OAuth2Session + client: pyodata.Client + + ingested_dataset_entities: Set[str] = set() + ingested_upstream_dataset_keys: Set[str] = set() + + def __init__(self, config: SACSourceConfig, ctx: PipelineContext): + super().__init__(config, ctx) + self.config = config + self.report = SACSourceReport() + + self.session, self.client = SACSource.get_sac_connection(self.config) + + def close(self) -> None: + self.session.close() + super().close() + + @classmethod + def create(cls, config_dict: dict, ctx: PipelineContext) -> "SACSource": + config = SACSourceConfig.parse_obj(config_dict) + return cls(config, ctx) + + @staticmethod + def test_connection(config_dict: dict) -> TestConnectionReport: + test_report = TestConnectionReport() + + try: + config = SACSourceConfig.parse_obj(config_dict) + + # when creating the pyodata.Client, the metadata is automatically parsed and validated + session, _ = SACSource.get_sac_connection(config) + + # test the Data Import Service separately here, because it requires specific properties when configuring the OAuth client + response = session.get(url=f"{config.tenant_url}/api/v1/dataimport/models") + response.raise_for_status() + + session.close() + + test_report.basic_connectivity = CapabilityReport(capable=True) + except Exception as e: + test_report.basic_connectivity = CapabilityReport( + capable=False, failure_reason=f"{e}" + ) + + return test_report + + def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: + return [ + *super().get_workunit_processors(), + partial( + auto_incremental_lineage, + self.config.incremental_lineage, + ), + StaleEntityRemovalHandler.create( + self, self.config, self.ctx + ).workunit_processor, + ] + + def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: + if self.config.ingest_stories or self.config.ingest_applications: + resources = self.get_resources() + + for resource in resources: + datasets = [] + + for resource_model in resource.resource_models: + dataset_urn = make_dataset_urn_with_platform_instance( + platform=self.platform, + name=f"{resource_model.namespace}:{resource_model.model_id}", + platform_instance=self.config.platform_instance, + env=self.config.env, + ) + + if dataset_urn not in datasets: + datasets.append(dataset_urn) + + if dataset_urn in self.ingested_dataset_entities: + continue + + self.ingested_dataset_entities.add(dataset_urn) + + yield from self.get_model_workunits(dataset_urn, resource_model) + + yield from self.get_resource_workunits(resource, datasets) + + def get_report(self) -> SACSourceReport: + return self.report + + def get_resource_workunits( + self, resource: Resource, datasets: List[str] + ) -> Iterable[MetadataWorkUnit]: + dashboard_urn = make_dashboard_urn( + platform=self.platform, + name=resource.resource_id, + platform_instance=self.config.platform_instance, + ) + + if resource.ancestor_path: + mcp = MetadataChangeProposalWrapper( + entityUrn=dashboard_urn, + aspect=BrowsePathsClass( + paths=[ + f"/{self.platform}/{resource.ancestor_path}", + ], + ), + ) + + yield mcp.as_workunit() + + mcp = MetadataChangeProposalWrapper( + entityUrn=dashboard_urn, + aspect=BrowsePathsV2Class( + path=[ + BrowsePathEntryClass(id=folder_name) + for folder_name in resource.ancestor_path.split("/") + ], + ), + ) + + yield mcp.as_workunit() + + if self.config.platform_instance is not None: + mcp = MetadataChangeProposalWrapper( + entityUrn=dashboard_urn, + aspect=DataPlatformInstanceClass( + platform=make_data_platform_urn(self.platform), + instance=make_dataplatform_instance_urn( + self.platform, self.config.platform_instance + ), + ), + ) + + yield mcp.as_workunit() + + mcp = MetadataChangeProposalWrapper( + entityUrn=dashboard_urn, + aspect=DashboardInfoClass( + title=resource.name, + description=resource.description, + lastModified=ChangeAuditStampsClass( + created=AuditStampClass( + time=round(resource.created_time.timestamp() * 1000), + actor=make_user_urn(resource.created_by) + if resource.created_by + else "urn:li:corpuser:unknown", + ), + lastModified=AuditStampClass( + time=round(resource.modified_time.timestamp() * 1000), + actor=make_user_urn(resource.modified_by) + if resource.modified_by + else "urn:li:corpuser:unknown", + ), + ), + customProperties={ + "resourceType": resource.resource_type, + "resourceSubtype": resource.resource_subtype, + "storyId": resource.story_id, + "isMobile": str(resource.is_mobile), + }, + datasets=sorted(datasets) if datasets else None, + externalUrl=f"{self.config.tenant_url}{resource.open_url}", + ), + ) + + yield mcp.as_workunit() + + type_name: Optional[str] = None + if resource.resource_subtype == "": + type_name = BIAssetSubTypes.SAC_STORY + elif resource.resource_subtype == "APPLICATION": + type_name = BIAssetSubTypes.SAC_APPLICATION + + if type_name: + mcp = MetadataChangeProposalWrapper( + entityUrn=dashboard_urn, + aspect=SubTypesClass( + typeNames=[type_name], + ), + ) + + yield mcp.as_workunit() + + def get_model_workunits( + self, dataset_urn: str, model: ResourceModel + ) -> Iterable[MetadataWorkUnit]: + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=DatasetPropertiesClass( + name=model.name, + description=model.description, + customProperties={ + "namespace": model.namespace, + "modelId": model.model_id, + "isImport": "true" if model.is_import else "false", + }, + externalUrl=f"{self.config.tenant_url}/sap/fpa/ui/tenants/3c44c#view_id=model;model_id={model.namespace}:{model.model_id}", + ), + ) + + yield mcp.as_workunit() + + if model.is_import and self.config.ingest_import_data_model_schema_metadata: + primary_fields: List[str] = [] + schema_fields: List[SchemaFieldClass] = [] + + columns = self.get_import_data_model_columns(model_id=model.model_id) + for column in columns: + + schema_field = SchemaFieldClass( + fieldPath=column.name, + type=self.get_schema_field_data_type(column), + nativeDataType=self.get_schema_field_native_data_type(column), + description=column.description, + isPartOfKey=column.is_key, + ) + + schema_fields.append(schema_field) + + if column.is_key: + primary_fields.append(column.name) + + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=SchemaMetadataClass( + schemaName=model.model_id, + platform=make_data_platform_urn(self.platform), + version=0, + hash="", + platformSchema=SchemalessClass(), + fields=schema_fields, + primaryKeys=primary_fields, + ), + ) + + yield mcp.as_workunit() + + if model.system_type in ("BW", "HANA") and model.external_id is not None: + upstream_dataset_name: Optional[str] = None + + if model.system_type == "BW" and model.external_id.startswith( + "query:" + ): # query:[][][query] + query = model.external_id[11:-1] + upstream_dataset_name = self.get_query_name(query) + elif model.system_type == "HANA" and model.external_id.startswith( + "view:" + ): # view:[schema][schema.namespace][view] + schema, namespace_with_schema, view = model.external_id.split("][", 2) + schema = schema[6:] + namespace: Optional[str] = None + if len(schema) < len(namespace_with_schema): + namespace = namespace_with_schema[len(f"{schema}.") :] + view = view[:-1] + upstream_dataset_name = self.get_view_name(schema, namespace, view) + + if upstream_dataset_name is not None: + if model.connection_id in self.config.connection_mapping: + connection = self.config.connection_mapping[model.connection_id] + platform = ( + connection.platform + if connection.platform + else model.system_type.lower() + ) + platform_instance = connection.platform_instance + env = connection.env + else: + platform = model.system_type.lower() + platform_instance = model.connection_id + env = DEFAULT_ENV + + logger.info( + f"No connection mapping found for connection with id {model.connection_id}, connection id will be used as platform instance" + ) + + upstream_dataset_urn = make_dataset_urn_with_platform_instance( + platform=platform, + name=upstream_dataset_name, + platform_instance=platform_instance, + env=env, + ) + + if upstream_dataset_urn not in self.ingested_upstream_dataset_keys: + mcp = MetadataChangeProposalWrapper( + entityUrn=upstream_dataset_urn, + aspect=dataset_urn_to_key(upstream_dataset_urn), + ) + + yield mcp.as_workunit(is_primary_source=False) + + self.ingested_upstream_dataset_keys.add(upstream_dataset_urn) + + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=UpstreamLineageClass( + upstreams=[ + UpstreamClass( + dataset=upstream_dataset_urn, + type=DatasetLineageTypeClass.COPY, + ), + ], + ), + ) + + yield mcp.as_workunit() + else: + self.report.report_warning( + "unknown-upstream-dataset", + f"Unknown upstream dataset for model with id {model.namespace}:{model.model_id} and external id {model.external_id}", + ) + elif model.system_type is not None: + self.report.report_warning( + "unknown-system-type", + f"Unknown system type {model.system_type} for model with id {model.namespace}:{model.model_id} and external id {model.external_id}", + ) + + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=StatusClass( + removed=False, + ), + ) + + yield mcp.as_workunit() + + if model.external_id and model.connection_id and model.system_type: + type_name = DatasetSubTypes.SAC_LIVE_DATA_MODEL + elif model.is_import: + type_name = DatasetSubTypes.SAC_IMPORT_DATA_MODEL + else: + type_name = DatasetSubTypes.SAC_MODEL + + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=SubTypesClass( + typeNames=[type_name], + ), + ) + + yield mcp.as_workunit() + + mcp = MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=DataPlatformInstanceClass( + platform=make_data_platform_urn(self.platform), + instance=self.config.platform_instance, + ), + ) + + yield mcp.as_workunit() + + @staticmethod + def get_sac_connection( + config: SACSourceConfig, + ) -> Tuple[OAuth2Session, pyodata.Client]: + session = OAuth2Session( + client_id=config.client_id, + client_secret=config.client_secret.get_secret_value(), + token_endpoint=config.token_url, + token_endpoint_auth_method="client_secret_post", + grant_type="client_credentials", + ) + + retries = 3 + backoff_factor = 10 + status_forcelist = (500,) + + retry = Retry( + total=retries, + read=retries, + connect=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + ) + + adapter = HTTPAdapter(max_retries=retry) + session.mount("http://", adapter) + session.mount("https://", adapter) + + session.register_compliance_hook( + "protected_request", _add_sap_sac_custom_auth_header + ) + session.fetch_token() + + client = pyodata.Client( + url=f"{config.tenant_url}/api/v1", + connection=session, + config=pyodata.v2.model.Config(retain_null=True), + ) + + return session, client + + def get_resources(self) -> Iterable[Resource]: + import_data_model_ids = self.get_import_data_model_ids() + + filter = "isTemplate eq 0 and isSample eq 0 and isPublic eq 1" + if self.config.ingest_stories and self.config.ingest_applications: + filter += " and ((resourceType eq 'STORY' and resourceSubtype eq '') or (resourceType eq 'STORY' and resourceSubtype eq 'APPLICATION'))" + elif self.config.ingest_stories and not self.config.ingest_applications: + filter += " and resourceType eq 'STORY' and resourceSubtype eq ''" + elif not self.config.ingest_stories and self.config.ingest_applications: + filter += ( + " and resourceType eq 'STORY' and resourceSubtype eq 'APPLICATION'" + ) + + select = "resourceId,resourceType,resourceSubtype,storyId,name,description,createdTime,createdBy,modifiedBy,modifiedTime,openURL,ancestorPath,isMobile" + + entities: pyodata.v2.service.ListWithTotalCount = ( + self.client.entity_sets.Resources.get_entities() + .custom("$format", "json") + .filter(filter) + .select(select) + .execute() + ) + entity: pyodata.v2.service.EntityProxy + for entity in entities: + resource_id: str = entity.resourceId + name: str = entity.name.strip() + + if not self.config.resource_id_pattern.allowed( + resource_id + ) or not self.config.resource_name_pattern.allowed(name): + continue + + ancestor_path: Optional[str] = None + + try: + ancestors = json.loads(entity.ancestorPath) + ancestor_path = "/".join( + ancestor.replace("/", "%2F") for ancestor in ancestors + ) + except json.JSONDecodeError: + pass + + if ancestor_path and not self.config.folder_pattern.allowed(ancestor_path): + continue + + resource_models: Set[ResourceModel] = set() + + select = "modelId,name,description,externalId,connectionId,systemType" + + nav_entities: pyodata.v2.service.EntitySetProxy = ( + entity.nav("resourceModels") + .get_entities() + .custom("$format", "json") + .select(select) + .execute() + ) + nav_entity: pyodata.v2.service.EntityProxy + for nav_entity in nav_entities: + # the model id can have a different structure, commonly all model ids have a namespace (the part before the colon) and the model id itself + # t.4.sap.fpa.services.userFriendlyPerfLog:ACTIVITY_LOG is a builtin model without a possiblity to get more metadata about the model + # t.4.YV67EM4QBRU035A7TVKERZ786N:YV67EM4QBRU035A7TVKERZ786N is a model id where the model id itself also appears as part of the namespace + # t.4:C76tt2j402o1e69wnvrwfcl79c is a model id without the model id itself as part of the namespace + model_id: str = nav_entity.modelId + namespace, _, model_id = model_id.partition(":") + + resource_models.add( + ResourceModel( + namespace=namespace, + model_id=model_id, + name=nav_entity.name.strip(), + description=nav_entity.description.strip(), + system_type=nav_entity.systemType, # BW or HANA + connection_id=nav_entity.connectionId, + external_id=nav_entity.externalId, # query:[][][query] or view:[schema][schema.namespace][view] + is_import=model_id in import_data_model_ids, + ) + ) + + created_by: Optional[str] = entity.createdBy + if created_by in ("SYSTEM", "$DELETED_USER$"): + created_by = None + + modified_by: Optional[str] = entity.modifiedBy + if modified_by in ("SYSTEM", "$DELETED_USER$"): + modified_by = None + + yield Resource( + resource_id=resource_id, + resource_type=entity.resourceType, + resource_subtype=entity.resourceSubtype, + story_id=entity.storyId, + name=name, + description=entity.description.strip(), + created_time=entity.createdTime, + created_by=created_by, + modified_time=entity.modifiedTime, + modified_by=modified_by, + open_url=entity.openURL, + ancestor_path=ancestor_path, + is_mobile=entity.isMobile, + resource_models=frozenset(resource_models), + ) + + def get_import_data_model_ids(self) -> Set[str]: + response = self.session.get( + url=f"{self.config.tenant_url}/api/v1/dataimport/models" + ) + response.raise_for_status() + + import_data_model_ids = set( + model["modelID"] for model in response.json()["models"] + ) + return import_data_model_ids + + def get_import_data_model_columns( + self, model_id: str + ) -> List[ImportDataModelColumn]: + response = self.session.get( + url=f"{self.config.tenant_url}/api/v1/dataimport/models/{model_id}/metadata" + ) + response.raise_for_status() + + model_metadata = response.json() + + columns: List[ImportDataModelColumn] = [] + for column in model_metadata["factData"]["columns"]: + columns.append( + ImportDataModelColumn( + name=column["columnName"].strip(), + description=column["descriptionName"].strip(), + property_type=column["propertyType"], + data_type=column["columnDataType"], + max_length=column.get("maxLength"), + precision=column.get("precision"), + scale=column.get("scale"), + is_key=column["isKey"], + ) + ) + + return columns + + def get_query_name(self, query: str) -> str: + if not self.config.query_name_template: + return query + + query_name = self.config.query_name_template + query_name = query_name.replace("{name}", query) + + return query_name + + def get_view_name(self, schema: str, namespace: Optional[str], view: str) -> str: + if namespace: + return f"{schema}.{namespace}::{view}" + + return f"{schema}.{view}" + + def get_schema_field_data_type( + self, column: ImportDataModelColumn + ) -> SchemaFieldDataTypeClass: + if column.property_type == "DATE": + return SchemaFieldDataTypeClass(type=DateTypeClass()) + else: + if column.data_type == "string": + return SchemaFieldDataTypeClass(type=StringTypeClass()) + elif column.data_type in ("decimal", "int32"): + return SchemaFieldDataTypeClass(type=NumberTypeClass()) + else: + self.report.report_warning( + "unknown-data-type", + f"Unknown data type {column.data_type} found", + ) + + return SchemaFieldDataTypeClass(type=NullTypeClass()) + + def get_schema_field_native_data_type(self, column: ImportDataModelColumn) -> str: + native_data_type = column.data_type + if column.data_type == "decimal": + native_data_type = f"{column.data_type}({column.precision}, {column.scale})" + elif column.data_type == "int32": + native_data_type = f"{column.data_type}({column.precision})" + elif column.max_length is not None: + native_data_type = f"{column.data_type}({column.max_length})" + + return native_data_type + + +def _add_sap_sac_custom_auth_header( + url: str, headers: Dict[str, str], body: Any +) -> Tuple[str, Dict[str, str], Any]: + headers["x-sap-sac-custom-auth"] = "true" + return url, headers, body diff --git a/metadata-ingestion/src/datahub/ingestion/source/sac/sac_common.py b/metadata-ingestion/src/datahub/ingestion/source/sac/sac_common.py new file mode 100644 index 00000000000000..457fda1e061814 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/sac/sac_common.py @@ -0,0 +1,45 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import FrozenSet, Optional + + +@dataclass(frozen=True) +class ResourceModel: + namespace: str + model_id: str + name: str + description: str + system_type: Optional[str] + connection_id: Optional[str] + external_id: Optional[str] + is_import: bool + + +@dataclass(frozen=True) +class Resource: + resource_id: str + resource_type: str + resource_subtype: str + story_id: str + name: str + description: str + created_time: datetime + created_by: Optional[str] + modified_time: datetime + modified_by: Optional[str] + open_url: str + ancestor_path: Optional[str] + is_mobile: bool + resource_models: FrozenSet[ResourceModel] + + +@dataclass(frozen=True) +class ImportDataModelColumn: + name: str + description: str + property_type: str + data_type: str + max_length: Optional[int] + precision: Optional[int] + scale: Optional[int] + is_key: bool diff --git a/metadata-ingestion/src/datahub/utilities/logging_manager.py b/metadata-ingestion/src/datahub/utilities/logging_manager.py index 1eb763394094ac..926b8782fbf119 100644 --- a/metadata-ingestion/src/datahub/utilities/logging_manager.py +++ b/metadata-ingestion/src/datahub/utilities/logging_manager.py @@ -283,3 +283,4 @@ def configure_logging(debug: bool, log_file: Optional[str] = None) -> Iterator[N logging.getLogger("snowflake").setLevel(level=logging.WARNING) # logging.getLogger("botocore").setLevel(logging.INFO) # logging.getLogger("google").setLevel(logging.INFO) +logging.getLogger("pyodata").setLevel(logging.WARNING) diff --git a/metadata-ingestion/tests/integration/sac/__init__.py b/metadata-ingestion/tests/integration/sac/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-ingestion/tests/integration/sac/metadata.xml b/metadata-ingestion/tests/integration/sac/metadata.xml new file mode 100644 index 00000000000000..203f85dd286c9b --- /dev/null +++ b/metadata-ingestion/tests/integration/sac/metadata.xml @@ -0,0 +1,404 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sac/sac_mces_golden.json b/metadata-ingestion/tests/integration/sac/sac_mces_golden.json new file mode 100644 index 00000000000000..c88ded368711fb --- /dev/null +++ b/metadata-ingestion/tests/integration/sac/sac_mces_golden.json @@ -0,0 +1,663 @@ +[ +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sac,LXTH4JCE36EOYLU41PIINLYPU9XRYM26)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/sac/Public/Folder 1/Folder 2" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sac,LXTH4JCE36EOYLU41PIINLYPU9XRYM26)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Public" + }, + { + "id": "Folder 1" + }, + { + "id": "Folder 2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.ANL8Q577BA2F73KU3VELDXGWZK:ANL8Q577BA2F73KU3VELDXGWZK,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "namespace": "t.4.ANL8Q577BA2F73KU3VELDXGWZK", + "modelId": "ANL8Q577BA2F73KU3VELDXGWZK", + "isImport": "false" + }, + "externalUrl": "http://tenant/sap/fpa/ui/tenants/3c44c#view_id=model;model_id=t.4.ANL8Q577BA2F73KU3VELDXGWZK:ANL8Q577BA2F73KU3VELDXGWZK", + "name": "Name of the first model (BW)", + "description": "Description of the first model which has a connection to a BW query", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hana,HANA.SCHEMA.CE.SCHEMA::VIEW,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:hana", + "name": "HANA.SCHEMA.CE.SCHEMA::VIEW", + "origin": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.ANL8Q577BA2F73KU3VELDXGWZK:ANL8Q577BA2F73KU3VELDXGWZK,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bw,BW.QUERY/QUERY_TECHNICAL_NAME,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.ANL8Q577BA2F73KU3VELDXGWZK:ANL8Q577BA2F73KU3VELDXGWZK,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.ANL8Q577BA2F73KU3VELDXGWZK:ANL8Q577BA2F73KU3VELDXGWZK,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Live Data Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.ANL8Q577BA2F73KU3VELDXGWZK:ANL8Q577BA2F73KU3VELDXGWZK,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sac" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.K73U3VELDXGWZKANL8Q577BA2F:K73U3VELDXGWZKANL8Q577BA2F,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "namespace": "t.4.K73U3VELDXGWZKANL8Q577BA2F", + "modelId": "K73U3VELDXGWZKANL8Q577BA2F", + "isImport": "false" + }, + "externalUrl": "http://tenant/sap/fpa/ui/tenants/3c44c#view_id=model;model_id=t.4.K73U3VELDXGWZKANL8Q577BA2F:K73U3VELDXGWZKANL8Q577BA2F", + "name": "Name of the second model (HANA)", + "description": "Description of the second model which has a connection to a HANA view", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.DXGWZKANLK73U3VEL8Q577BA2F:DXGWZKANLK73U3VEL8Q577BA2F,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "DXGWZKANLK73U3VEL8Q577BA2F", + "platform": "urn:li:dataPlatform:sac", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.Schemaless": {} + }, + "fields": [ + { + "fieldPath": "Account", + "nullable": false, + "description": "Account", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string(256)", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "FIELD1", + "nullable": false, + "description": "FIELD1", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string(256)", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "FIELD2", + "nullable": false, + "description": "FIELD2", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string(256)", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "FIELD3", + "nullable": false, + "description": "FIELD3", + "type": { + "type": { + "com.linkedin.schema.DateType": {} + } + }, + "nativeDataType": "string(256)", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "Version", + "nullable": false, + "description": "Version", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string(300)", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "SignedData", + "nullable": false, + "description": "SignedData", + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "decimal(31, 7)", + "recursive": false, + "isPartOfKey": false + } + ], + "primaryKeys": [ + "Account", + "FIELD1", + "FIELD2", + "FIELD3", + "Version" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.K73U3VELDXGWZKANL8Q577BA2F:K73U3VELDXGWZKANL8Q577BA2F,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:hana,HANA.SCHEMA.CE.SCHEMA::VIEW,PROD)", + "type": "COPY" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.K73U3VELDXGWZKANL8Q577BA2F:K73U3VELDXGWZKANL8Q577BA2F,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.K73U3VELDXGWZKANL8Q577BA2F:K73U3VELDXGWZKANL8Q577BA2F,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Live Data Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.K73U3VELDXGWZKANL8Q577BA2F:K73U3VELDXGWZKANL8Q577BA2F,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sac" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.DXGWZKANLK73U3VEL8Q577BA2F:DXGWZKANLK73U3VEL8Q577BA2F,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "namespace": "t.4.DXGWZKANLK73U3VEL8Q577BA2F", + "modelId": "DXGWZKANLK73U3VEL8Q577BA2F", + "isImport": "true" + }, + "externalUrl": "http://tenant/sap/fpa/ui/tenants/3c44c#view_id=model;model_id=t.4.DXGWZKANLK73U3VEL8Q577BA2F:DXGWZKANLK73U3VEL8Q577BA2F", + "name": "Name of the third model (Import)", + "description": "Description of the third model which was imported", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.DXGWZKANLK73U3VEL8Q577BA2F:DXGWZKANLK73U3VEL8Q577BA2F,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bw,BW.QUERY/QUERY_TECHNICAL_NAME,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetKey", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:bw", + "name": "BW.QUERY/QUERY_TECHNICAL_NAME", + "origin": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.DXGWZKANLK73U3VEL8Q577BA2F:DXGWZKANLK73U3VEL8Q577BA2F,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Import Data Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.DXGWZKANLK73U3VEL8Q577BA2F:DXGWZKANLK73U3VEL8Q577BA2F,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:sac" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sac,LXTH4JCE36EOYLU41PIINLYPU9XRYM26)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "resourceType": "STORY", + "resourceSubtype": "", + "storyId": "STORY:t.4:LXTH4JCE36EOYLU41PIINLYPU9XRYM26", + "isMobile": "0" + }, + "externalUrl": "http://tenant/sap/fpa/ui/tenants/3c44c/bo/story/LXTH4JCE36EOYLU41PIINLYPU9XRYM26", + "title": "Name of the story", + "description": "Description of the story", + "charts": [], + "datasets": [ + "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.ANL8Q577BA2F73KU3VELDXGWZK:ANL8Q577BA2F73KU3VELDXGWZK,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.K73U3VELDXGWZKANL8Q577BA2F:K73U3VELDXGWZKANL8Q577BA2F,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.DXGWZKANLK73U3VEL8Q577BA2F:DXGWZKANLK73U3VEL8Q577BA2F,PROD)" + ], + "lastModified": { + "created": { + "time": 1667544309783, + "actor": "urn:li:corpuser:JOHN_DOE" + }, + "lastModified": { + "time": 1673067981272, + "actor": "urn:li:corpuser:JOHN_DOE" + } + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sac,LXTH4JCE36EOYLU41PIINLYPU9XRYM26)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sac,LXTH4JCE36EOYLU41PIINLYPU9XRYM26)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Story" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sac,EOYLU41PIILXTH4JCE36NLYPU9XRYM26)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/sac/Public/Folder 1/Folder 2" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sac,EOYLU41PIILXTH4JCE36NLYPU9XRYM26)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "resourceType": "STORY", + "resourceSubtype": "APPLICATION", + "storyId": "STORY:t.4:EOYLU41PIILXTH4JCE36NLYPU9XRYM26", + "isMobile": "0" + }, + "externalUrl": "http://tenant/sap/fpa/ui/tenants/3c44c/bo/story/EOYLU41PIILXTH4JCE36NLYPU9XRYM26", + "title": "Name of the application", + "description": "Description of the application", + "charts": [], + "datasets": [ + "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.ANL8Q577BA2F73KU3VELDXGWZK:ANL8Q577BA2F73KU3VELDXGWZK,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.K73U3VELDXGWZKANL8Q577BA2F:K73U3VELDXGWZKANL8Q577BA2F,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:sac,t.4.DXGWZKANLK73U3VEL8Q577BA2F:DXGWZKANLK73U3VEL8Q577BA2F,PROD)" + ], + "lastModified": { + "created": { + "time": 1673279404272, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1673279414272, + "actor": "urn:li:corpuser:unknown" + } + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sac,EOYLU41PIILXTH4JCE36NLYPU9XRYM26)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sac,EOYLU41PIILXTH4JCE36NLYPU9XRYM26)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Application" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(sac,EOYLU41PIILXTH4JCE36NLYPU9XRYM26)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Public" + }, + { + "id": "Folder 1" + }, + { + "id": "Folder 2" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "sac-integration-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sac/test_sac.py b/metadata-ingestion/tests/integration/sac/test_sac.py new file mode 100644 index 00000000000000..2b6ca81700712e --- /dev/null +++ b/metadata-ingestion/tests/integration/sac/test_sac.py @@ -0,0 +1,316 @@ +from functools import partial +from typing import Dict +from urllib.parse import parse_qs + +import pytest + +from datahub.ingestion.run.pipeline import Pipeline +from tests.test_helpers import mce_helpers + +MOCK_TENANT_URL = "http://tenant" +MOCK_TOKEN_URL = "http://tenant.authentication/oauth/token" +MOCK_CLIENT_ID = "foo" +MOCK_CLIENT_SECRET = "bar" +MOCK_ACCESS_TOKEN = "foobaraccesstoken" + + +@pytest.mark.integration +def test_sac( + pytestconfig, + tmp_path, + requests_mock, + mock_time, +): + requests_mock.post( + MOCK_TOKEN_URL, + json=match_token_url, + ) + + test_resources_dir = pytestconfig.rootpath / "tests/integration/sac" + + with open(f"{test_resources_dir}/metadata.xml", mode="rb") as f: + content = f.read() + requests_mock.get( + f"{MOCK_TENANT_URL}/api/v1/$metadata", + content=partial(match_metadata, content=content), + ) + + requests_mock.get( + f"{MOCK_TENANT_URL}/api/v1/Resources?$format=json&$filter=isTemplate eq 0 and isSample eq 0 and isPublic eq 1 and ((resourceType eq 'STORY' and resourceSubtype eq '') or (resourceType eq 'STORY' and resourceSubtype eq 'APPLICATION'))&$select=resourceId,resourceType,resourceSubtype,storyId,name,description,createdTime,createdBy,modifiedBy,modifiedTime,openURL,ancestorPath,isMobile", + json=match_resources, + ) + + requests_mock.get( + f"{MOCK_TENANT_URL}/api/v1/Resources%28%27LXTH4JCE36EOYLU41PIINLYPU9XRYM26%27%29/resourceModels?$format=json&$select=modelId,name,description,externalId,connectionId,systemType", + json=partial(match_resource, resource_id="LXTH4JCE36EOYLU41PIINLYPU9XRYM26"), + ) + + requests_mock.get( + f"{MOCK_TENANT_URL}/api/v1/Resources%28%27EOYLU41PIILXTH4JCE36NLYPU9XRYM26%27%29/resourceModels?$format=json&$select=modelId,name,description,externalId,connectionId,systemType", + json=partial(match_resource, resource_id="EOYLU41PIILXTH4JCE36NLYPU9XRYM26"), + ) + + requests_mock.get( + f"{MOCK_TENANT_URL}/api/v1/dataimport/models", + json=match_models, + ) + + requests_mock.get( + f"{MOCK_TENANT_URL}/api/v1/dataimport/models/DXGWZKANLK73U3VEL8Q577BA2F/metadata", + json=match_model_metadata, + ) + + pipeline = Pipeline.create( + { + "run_id": "sac-integration-test", + "source": { + "type": "sac", + "config": { + "tenant_url": MOCK_TENANT_URL, + "token_url": MOCK_TOKEN_URL, + "client_id": MOCK_CLIENT_ID, + "client_secret": MOCK_CLIENT_SECRET, + }, + }, + "sink": { + "type": "file", + "config": {"filename": f"{tmp_path}/sac_mces.json"}, + }, + }, + ) + + pipeline.run() + pipeline.raise_from_status() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=f"{tmp_path}/sac_mces.json", + golden_path=test_resources_dir / "sac_mces_golden.json", + ignore_paths=mce_helpers.IGNORE_PATH_TIMESTAMPS, + ) + + +def match_token_url(request, context): + form = parse_qs(request.text, strict_parsing=True) + + assert "grant_type" in form + assert len(form["grant_type"]) == 1 + assert form["grant_type"][0] == "client_credentials" + + assert "client_id" in form + assert len(form["client_id"]) == 1 + assert form["client_id"][0] == MOCK_CLIENT_ID + + assert "client_secret" in form + assert len(form["client_secret"]) == 1 + assert form["client_secret"][0] == MOCK_CLIENT_SECRET + + json = { + "access_token": MOCK_ACCESS_TOKEN, + "expires_in": 3599, + } + + return json + + +def check_authorization(headers: Dict[str, str]) -> None: + assert "Authorization" in headers + assert headers["Authorization"] == f"Bearer {MOCK_ACCESS_TOKEN}" + + assert "x-sap-sac-custom-auth" in headers + assert headers["x-sap-sac-custom-auth"] == "true" + + +def match_metadata(request, context, content): + check_authorization(request.headers) + + context.headers["content-type"] = "application/xml" + + return content + + +def match_resources(request, context): + check_authorization(request.headers) + + json = { + "d": { + "results": [ + { + "__metadata": { + "type": "sap.fpa.services.search.internal.ResourcesType", + "uri": "/api/v1/Resources('LXTH4JCE36EOYLU41PIINLYPU9XRYM26')", + }, + "name": "Name of the story", + "description": "Description of the story", + "resourceId": "LXTH4JCE36EOYLU41PIINLYPU9XRYM26", + "resourceType": "STORY", + "resourceSubtype": "", + "storyId": "STORY:t.4:LXTH4JCE36EOYLU41PIINLYPU9XRYM26", + "createdTime": "/Date(1667544309783)/", + "createdBy": "JOHN_DOE", + "modifiedBy": "JOHN_DOE", + "modifiedTime": "/Date(1673067981272)/", + "isMobile": 0, + "openURL": "/sap/fpa/ui/tenants/3c44c/bo/story/LXTH4JCE36EOYLU41PIINLYPU9XRYM26", + "ancestorPath": '["Public","Folder 1","Folder 2"]', + }, + { + "__metadata": { + "type": "sap.fpa.services.search.internal.ResourcesType", + "uri": "/api/v1/Resources('EOYLU41PIILXTH4JCE36NLYPU9XRYM26')", + }, + "name": "Name of the application", + "description": "Description of the application", + "resourceId": "EOYLU41PIILXTH4JCE36NLYPU9XRYM26", + "resourceType": "STORY", + "resourceSubtype": "APPLICATION", + "storyId": "STORY:t.4:EOYLU41PIILXTH4JCE36NLYPU9XRYM26", + "createdTime": "/Date(1673279404272)/", + "createdBy": "SYSTEM", + "modifiedBy": "$DELETED_USER$", + "modifiedTime": "/Date(1673279414272)/", + "isMobile": 0, + "openURL": "/sap/fpa/ui/tenants/3c44c/bo/story/EOYLU41PIILXTH4JCE36NLYPU9XRYM26", + "ancestorPath": '["Public","Folder 1","Folder 2"]', + }, + ], + }, + } + + return json + + +def match_resource(request, context, resource_id): + check_authorization(request.headers) + + json = { + "d": { + "results": [ + { + "__metadata": { + "type": "sap.fpa.services.search.internal.ModelsType", + "uri": f"/api/v1/Models(resourceId='{resource_id}',modelId='t.4.ANL8Q577BA2F73KU3VELDXGWZK%3AANL8Q577BA2F73KU3VELDXGWZK')", + }, + "modelId": "t.4.ANL8Q577BA2F73KU3VELDXGWZK:ANL8Q577BA2F73KU3VELDXGWZK", + "name": "Name of the first model (BW)", + "description": "Description of the first model which has a connection to a BW query", + "externalId": "query:[][][QUERY_TECHNICAL_NAME]", + "connectionId": "BW", + "systemType": "BW", + }, + { + "__metadata": { + "type": "sap.fpa.services.search.internal.ModelsType", + "uri": f"/api/v1/Models(resourceId='{resource_id}',modelId='t.4.K73U3VELDXGWZKANL8Q577BA2F%3AK73U3VELDXGWZKANL8Q577BA2F')", + }, + "modelId": "t.4.K73U3VELDXGWZKANL8Q577BA2F:K73U3VELDXGWZKANL8Q577BA2F", + "name": "Name of the second model (HANA)", + "description": "Description of the second model which has a connection to a HANA view", + "externalId": "view:[SCHEMA][NAMESPACE.SCHEMA][VIEW]", + "connectionId": "HANA", + "systemType": "HANA", + }, + { + "__metadata": { + "type": "sap.fpa.services.search.internal.ModelsType", + "uri": f"/api/v1/Models(resourceId='{resource_id}',modelId='t.4.DXGWZKANLK73U3VEL8Q577BA2F%3ADXGWZKANLK73U3VEL8Q577BA2F')", + }, + "modelId": "t.4.DXGWZKANLK73U3VEL8Q577BA2F:DXGWZKANLK73U3VEL8Q577BA2F", + "name": "Name of the third model (Import)", + "description": "Description of the third model which was imported", + "externalId": "", + "connectionId": "", + "systemType": None, + }, + ], + }, + } + + return json + + +def match_models(request, context): + check_authorization(request.headers) + + json = { + "models": [ + { + "modelID": "DXGWZKANLK73U3VEL8Q577BA2F", + "modelName": "Name of the third model (Import)", + "modelDescription": "Description of the third model which was imported", + "modelURL": f"{MOCK_TENANT_URL}/api/v1/dataimport/models/DXGWZKANLK73U3VEL8Q577BA2F", + }, + ], + } + + return json + + +def match_model_metadata(request, context): + check_authorization(request.headers) + + json = { + "factData": { + "keys": [ + "Account", + "FIELD1", + "FIELD2", + "FIELD3", + "Version", + ], + "columns": [ + { + "columnName": "Account", + "columnDataType": "string", + "maxLength": 256, + "isKey": True, + "propertyType": "PROPERTY", + "descriptionName": "Account", + }, + { + "columnName": "FIELD1", + "columnDataType": "string", + "maxLength": 256, + "isKey": True, + "propertyType": "PROPERTY", + "descriptionName": "FIELD1", + }, + { + "columnName": "FIELD2", + "columnDataType": "string", + "maxLength": 256, + "isKey": True, + "propertyType": "PROPERTY", + "descriptionName": "FIELD2", + }, + { + "columnName": "FIELD3", + "columnDataType": "string", + "maxLength": 256, + "isKey": True, + "propertyType": "DATE", + "descriptionName": "FIELD3", + }, + { + "columnName": "Version", + "columnDataType": "string", + "maxLength": 300, + "isKey": True, + "propertyType": "PROPERTY", + "descriptionName": "Version", + }, + { + "columnName": "SignedData", + "columnDataType": "decimal", + "maxLength": 32, + "precision": 31, + "scale": 7, + "isKey": False, + "propertyType": "PROPERTY", + "descriptionName": "SignedData", + }, + ], + }, + } + + return json diff --git a/metadata-service/war/src/main/resources/boot/data_platforms.json b/metadata-service/war/src/main/resources/boot/data_platforms.json index ab040ad2854ddb..4830311996fd94 100644 --- a/metadata-service/war/src/main/resources/boot/data_platforms.json +++ b/metadata-service/war/src/main/resources/boot/data_platforms.json @@ -664,5 +664,15 @@ "type": "OTHERS", "logoUrl": "/assets/platforms/sigmalogo.png" } + }, + { + "urn": "urn:li:dataPlatform:sac", + "aspect": { + "datasetNameDelimiter": ".", + "name": "sac", + "displayName": "SAP Analytics Cloud", + "type": "OTHERS", + "logoUrl": "/assets/platforms/saclogo.svg" + } } ] diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js index 8707f090acad36..95c63835902e8a 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js @@ -13,7 +13,7 @@ describe("ingestion source creation flow", () => { cy.goToIngestionPage(); cy.clickOptionWithId('[data-node-key="Sources"]'); cy.clickOptionWithTestId("create-ingestion-source-button"); - cy.clickOptionWithText("Snowflake"); + cy.clickOptionWithTextToScrollintoView("Snowflake"); cy.waitTextVisible("Snowflake Details"); cy.get("#account_id").type(accound_id); cy.get("#warehouse").type(warehouse_id); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js index dd331fbcbd5ae2..5a2b101ddd8bb7 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js @@ -30,7 +30,7 @@ describe("managing secrets for ingestion creation", () => { cy.goToIngestionPage(); cy.clickOptionWithId('[data-node-key="Sources"]'); cy.get("#ingestion-create-source").click(); - cy.clickOptionWithText("Snowflake"); + cy.clickOptionWithTextToScrollintoView("Snowflake"); cy.waitTextVisible("Snowflake Details"); cy.get("#account_id").type(accound_id); cy.get("#warehouse").type(warehouse_id); @@ -69,7 +69,7 @@ describe("managing secrets for ingestion creation", () => { // Verify secret is not present during ingestion source creation for password dropdown cy.clickOptionWithText("Create new source"); - cy.clickOptionWithText("Snowflake"); + cy.clickOptionWithTextToScrollintoView("Snowflake"); cy.waitTextVisible("Snowflake Details"); cy.get("#account_id").type(accound_id); cy.get("#warehouse").type(warehouse_id); From 414dc54b13d2155f5fd9352721443d1a1f4cb7f1 Mon Sep 17 00:00:00 2001 From: Peter Esselius Date: Tue, 27 Aug 2024 01:38:01 +0200 Subject: [PATCH 8/9] feat(ingest/superset): clickhousedb -> clickhouse mapping in superset ingestion (#11201) --- metadata-ingestion/src/datahub/ingestion/source/superset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 0656f13feba64b..e563a806446c43 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -233,6 +233,8 @@ def get_platform_from_database_id(self, database_id): platform_name = get_platform_from_sqlalchemy_uri(sqlalchemy_uri) if platform_name == "awsathena": return "athena" + if platform_name == "clickhousedb": + return "clickhouse" return platform_name @lru_cache(maxsize=None) From 56a563b8c5ab23c0a14d1af9d496def565056434 Mon Sep 17 00:00:00 2001 From: Jay Feldman <8128360+feldjay@users.noreply.github.com> Date: Tue, 27 Aug 2024 00:19:32 -0400 Subject: [PATCH 9/9] feat(ingest/bigquery): Add query job retries for transient errors (#11162) Co-authored-by: Gabe Lyons Co-authored-by: Gabe Lyons --- .../source/bigquery_v2/bigquery_schema.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index ba8462a6f5ff32..f248533edec8d7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -6,6 +6,7 @@ from google.api_core import retry from google.cloud import bigquery, datacatalog_v1, resourcemanager_v3 +from google.cloud.bigquery import retry as bq_retry from google.cloud.bigquery.table import ( RowIterator, TableListItem, @@ -155,8 +156,23 @@ def __init__( self.datacatalog_client = datacatalog_client def get_query_result(self, query: str) -> RowIterator: + def _should_retry(exc: BaseException) -> bool: + logger.debug(f"Exception occured for job query. Reason: {exc}") + # Jobs sometimes fail with transient errors. + # This is not currently handled by the python-bigquery client. + # https://github.com/googleapis/python-bigquery/issues/23 + return "Retrying the job may solve the problem" in str(exc) + logger.debug(f"Query : {query}") - resp = self.bq_client.query(query) + resp = self.bq_client.query( + query, + job_retry=retry.Retry( + predicate=lambda exc: ( + bq_retry.DEFAULT_JOB_RETRY._predicate(exc) or _should_retry(exc) + ), + deadline=bq_retry.DEFAULT_JOB_RETRY._deadline, + ), + ) return resp.result() def get_projects(self, max_results_per_page: int = 100) -> List[BigqueryProject]: