diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/__tests__/utils.test.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/__tests__/utils.test.ts new file mode 100644 index 00000000000000..160074d94abac4 --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/__tests__/utils.test.ts @@ -0,0 +1,26 @@ +import { validateURL } from '../../../utils'; + +describe('validateURL function', () => { + it('should resolve if the URL is valid', async () => { + const validator = validateURL('test'); + await expect(validator.validator(null, 'https://example.com')).resolves.toBeUndefined(); + await expect(validator.validator(null, 'http://example.com')).resolves.toBeUndefined(); + await expect(validator.validator(null, 'http://subdomain.example.com/path')).resolves.toBeUndefined(); + }); + + it('should reject if the URL is invalid', async () => { + const validator = validateURL('test url'); + await expect(validator.validator(null, 'http://example')).rejects.toThrowError('A valid test url is required.'); + await expect(validator.validator(null, 'example')).rejects.toThrowError('A valid test url is required.'); + await expect(validator.validator(null, 'http://example')).rejects.toThrowError( + 'A valid test url is required.', + ); + }); + + it('should resolve if the value is empty', async () => { + const validator = validateURL('test'); + await expect(validator.validator(null, '')).resolves.toBeUndefined(); + await expect(validator.validator(null, undefined)).resolves.toBeUndefined(); + await expect(validator.validator(null, null)).resolves.toBeUndefined(); + }); +}); diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/azure.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/azure.ts new file mode 100644 index 00000000000000..9dfbaaae0a26dd --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/azure.ts @@ -0,0 +1,171 @@ +import { validateURL } from '../../utils'; +import { RecipeField, FieldType, setListValuesOnRecipe } from './common'; + +export const AZURE_CLIENT_ID: RecipeField = { + name: 'client_id', + label: 'Client ID', + tooltip: 'Application ID. Found in your app registration on Azure AD Portal', + type: FieldType.TEXT, + fieldPath: 'source.config.client_id', + placeholder: '00000000-0000-0000-0000-000000000000', + required: true, + rules: null, +}; + +export const AZURE_TENANT_ID: RecipeField = { + name: 'tenant_id', + label: 'Tenant ID', + tooltip: 'Directory ID. Found in your app registration on Azure AD Portal', + type: FieldType.TEXT, + fieldPath: 'source.config.tenant_id', + placeholder: '00000000-0000-0000-0000-000000000000', + required: true, + rules: null, +}; + +export const AZURE_CLIENT_SECRET: RecipeField = { + name: 'client_secret', + label: 'Client Secret', + tooltip: 'The Azure client secret.', + type: FieldType.SECRET, + fieldPath: 'source.config.client_secret', + placeholder: '00000000-0000-0000-0000-000000000000', + required: true, + rules: null, +}; + +export const AZURE_REDIRECT_URL: RecipeField = { + name: 'redirect', + label: 'Redirect URL', + tooltip: 'Redirect URL. Found in your app registration on Azure AD Portal.', + type: FieldType.TEXT, + fieldPath: 'source.config.redirect', + placeholder: 'https://login.microsoftonline.com/common/oauth2/nativeclient', + required: true, + rules: [() => validateURL('Redirect URI')], +}; + +export const AZURE_AUTHORITY_URL: RecipeField = { + name: 'authority', + label: 'Authority URL', + tooltip: 'Is a URL that indicates a directory that MSAL can request tokens from..', + type: FieldType.TEXT, + fieldPath: 'source.config.authority', + placeholder: 'https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000', + required: true, + rules: [() => validateURL('Azure authority URL')], +}; + +export const AZURE_TOKEN_URL: RecipeField = { + name: 'token_url', + label: 'Token URL', + tooltip: + 'The token URL that acquires a token from Azure AD for authorizing requests. This source will only work with v1.0 endpoint.', + type: FieldType.TEXT, + fieldPath: 'source.config.token_url', + placeholder: 'https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000/oauth2/token', + required: true, + rules: [() => validateURL('Azure token URL')], +}; + +export const AZURE_GRAPH_URL: RecipeField = { + name: 'graph_url', + label: 'Graph URL', + tooltip: 'Microsoft Graph API endpoint', + type: FieldType.TEXT, + fieldPath: 'source.config.graph_url', + placeholder: 'https://graph.microsoft.com/v1.0', + required: true, + rules: [() => validateURL('Graph url URL')], +}; + +export const AZURE_INGEST_USERS: RecipeField = { + name: 'ingest_users', + label: 'Ingest Users', + tooltip: 'Flag to determine whether to ingest users from Azure AD or not.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.ingest_users', + rules: null, +}; + +export const AZURE_INGEST_GROUPS: RecipeField = { + name: 'ingest_groups', + label: 'Ingest Groups', + tooltip: 'Flag to determine whether to ingest groups from Azure AD or not.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.ingest_groups', + rules: null, +}; + +const schemaAllowFieldPathGroup = 'source.config.groups_pattern.allow'; +export const GROUP_ALLOW: RecipeField = { + name: 'groups.allow', + label: 'Allow Patterns', + tooltip: + 'Only include specific schemas by providing the name of a schema, or a regular expression (regex) to include specific schemas. If not provided, all schemas inside allowed databases will be included.', + placeholder: 'group_pattern', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: schemaAllowFieldPathGroup, + rules: null, + section: 'Group', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, schemaAllowFieldPathGroup), +}; + +const schemaDenyFieldPathGroup = 'source.config.groups_pattern.deny'; +export const GROUP_DENY: RecipeField = { + name: 'groups.deny', + label: 'Deny Patterns', + tooltip: + 'Exclude specific schemas by providing the name of a schema, or a regular expression (regex). If not provided, all schemas inside allowed databases will be included. Deny patterns always take precedence over allow patterns.', + placeholder: 'user_pattern', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: schemaDenyFieldPathGroup, + rules: null, + section: 'Group', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, schemaDenyFieldPathGroup), +}; + +const schemaAllowFieldPathUser = 'source.config.users_pattern.allow'; +export const USER_ALLOW: RecipeField = { + name: 'user.allow', + label: 'Allow Patterns', + tooltip: + 'Exclude specific schemas by providing the name of a schema, or a regular expression (regex). If not provided, all schemas inside allowed databases will be included. Deny patterns always take precedence over allow patterns.', + placeholder: 'user_pattern', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: schemaAllowFieldPathUser, + rules: null, + section: 'User', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, schemaAllowFieldPathUser), +}; + +const schemaDenyFieldPathUser = 'source.config.users_pattern.deny'; +export const USER_DENY: RecipeField = { + name: 'user.deny', + label: 'Deny Patterns', + tooltip: + 'Exclude specific schemas by providing the name of a schema, or a regular expression (regex). If not provided, all schemas inside allowed databases will be included. Deny patterns always take precedence over allow patterns.', + placeholder: 'user_pattern', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: schemaDenyFieldPathUser, + rules: null, + section: 'User', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, schemaDenyFieldPathUser), +}; + +export const SKIP_USERS_WITHOUT_GROUP: RecipeField = { + name: 'skip_users_without_a_group', + label: 'Skip users without group', + tooltip: 'Whether to skip users without group from Okta.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.skip_users_without_a_group', + rules: null, +}; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts index 844bf50926764a..6a5e6c9de2b96b 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts @@ -83,7 +83,7 @@ import { PROJECT_NAME, } from './lookml'; import { PRESTO, PRESTO_HOST_PORT, PRESTO_DATABASE, PRESTO_USERNAME, PRESTO_PASSWORD } from './presto'; -import { BIGQUERY_BETA, CSV, DBT_CLOUD, MYSQL, POWER_BI, UNITY_CATALOG, VERTICA } from '../constants'; +import { AZURE, BIGQUERY_BETA, CSV, DBT_CLOUD, MYSQL, OKTA, POWER_BI, UNITY_CATALOG, VERTICA } from '../constants'; import { BIGQUERY_BETA_PROJECT_ID, DATASET_ALLOW, DATASET_DENY, PROJECT_ALLOW, PROJECT_DENY } from './bigqueryBeta'; import { MYSQL_HOST_PORT, MYSQL_PASSWORD, MYSQL_USERNAME } from './mysql'; import { MSSQL, MSSQL_DATABASE, MSSQL_HOST_PORT, MSSQL_PASSWORD, MSSQL_USERNAME } from './mssql'; @@ -141,6 +141,36 @@ import { INCLUDE_PROJECTIONS_LINEAGE, } from './vertica'; import { CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_FILE_URL, CSV_WRITE_SEMANTICS } from './csv'; +import { + INCLUDE_DEPROVISIONED_USERS, + INCLUDE_SUSPENDED_USERS, + INGEST_GROUPS, + INGEST_USERS, + OKTA_API_TOKEN, + OKTA_DOMAIN_URL, + POFILE_TO_GROUP, + POFILE_TO_GROUP_REGX_ALLOW, + POFILE_TO_GROUP_REGX_DENY, + POFILE_TO_USER, + POFILE_TO_USER_REGX_ALLOW, + POFILE_TO_USER_REGX_DENY, + SKIP_USERS_WITHOUT_GROUP, +} from './okta'; +import { + AZURE_AUTHORITY_URL, + AZURE_CLIENT_ID, + AZURE_CLIENT_SECRET, + AZURE_GRAPH_URL, + AZURE_INGEST_GROUPS, + AZURE_INGEST_USERS, + AZURE_REDIRECT_URL, + AZURE_TENANT_ID, + AZURE_TOKEN_URL, + GROUP_ALLOW, + GROUP_DENY, + USER_ALLOW, + USER_DENY, +} from './azure'; export enum RecipeSections { Connection = 0, @@ -459,6 +489,36 @@ export const RECIPE_FIELDS: RecipeFields = { filterFields: [], advancedFields: [CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_WRITE_SEMANTICS], }, + [OKTA]: { + fields: [OKTA_DOMAIN_URL, OKTA_API_TOKEN, POFILE_TO_USER, POFILE_TO_GROUP], + filterFields: [ + POFILE_TO_USER_REGX_ALLOW, + POFILE_TO_USER_REGX_DENY, + POFILE_TO_GROUP_REGX_ALLOW, + POFILE_TO_GROUP_REGX_DENY, + ], + advancedFields: [ + INGEST_USERS, + INGEST_GROUPS, + INCLUDE_DEPROVISIONED_USERS, + INCLUDE_SUSPENDED_USERS, + STATEFUL_INGESTION_ENABLED, + SKIP_USERS_WITHOUT_GROUP, + ], + }, + [AZURE]: { + fields: [ + AZURE_CLIENT_ID, + AZURE_TENANT_ID, + AZURE_CLIENT_SECRET, + AZURE_REDIRECT_URL, + AZURE_AUTHORITY_URL, + AZURE_TOKEN_URL, + AZURE_GRAPH_URL, + ], + filterFields: [GROUP_ALLOW, GROUP_DENY, USER_ALLOW, USER_DENY], + advancedFields: [AZURE_INGEST_USERS, AZURE_INGEST_GROUPS, STATEFUL_INGESTION_ENABLED, SKIP_USERS_WITHOUT_GROUP], + }, }; export const CONNECTORS_WITH_FORM = new Set(Object.keys(RECIPE_FIELDS)); diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts index fba4f3b9d01641..2cb3e7edc94d70 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts @@ -1,18 +1,6 @@ +import { validateURL } from '../../utils'; import { RecipeField, FieldType } from './common'; -const validateURL = (fieldName) => { - return { - validator(_, value) { - const URLPattern = new RegExp(/^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w.-]+)+[\w\-._~:/?#[\]@!$&'()*+,;=.]+$/); - const isURLValid = URLPattern.test(value); - if (!value || isURLValid) { - return Promise.resolve(); - } - return Promise.reject(new Error(`A valid ${fieldName} is required.`)); - }, - }; -}; - export const CSV_FILE_URL: RecipeField = { name: 'filename', label: 'File URL', diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/okta.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/okta.ts new file mode 100644 index 00000000000000..6efee3769f908d --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/okta.ts @@ -0,0 +1,153 @@ +import { validateURL } from '../../utils'; +import { RecipeField, FieldType, setListValuesOnRecipe } from './common'; + +export const OKTA_DOMAIN_URL: RecipeField = { + name: 'okta_domain', + label: 'Okta Domain URL', + tooltip: 'The location of your Okta Domain, without a protocol.', + type: FieldType.TEXT, + fieldPath: 'source.config.okta_domain', + placeholder: 'dev-35531955.okta.com', + required: true, + rules: [() => validateURL('Okta Domain URL')], +}; + +export const OKTA_API_TOKEN: RecipeField = { + name: 'credential.project_id', + label: 'Token', + tooltip: 'An API token generated for the DataHub application inside your Okta Developer Console.', + type: FieldType.SECRET, + fieldPath: 'source.config.okta_api_token', + placeholder: 'd0121d0000882411234e11166c6aaa23ed5d74e0', + rules: null, + required: true, +}; + +export const POFILE_TO_USER: RecipeField = { + name: 'email', + label: 'Okta Email', + tooltip: + 'Which Okta User Profile attribute to use as input to DataHub username mapping. Common values used are - login, email.', + type: FieldType.TEXT, + fieldPath: 'source.config.okta_profile_to_username_attr', + placeholder: 'email', + rules: null, +}; + +export const POFILE_TO_GROUP: RecipeField = { + name: 'okta_profile_to_group_name_attr', + label: 'Okta Profile to group name attribute', + tooltip: 'Which Okta Group Profile attribute to use as input to DataHub group name mapping.', + type: FieldType.TEXT, + fieldPath: 'source.config.okta_profile_to_group_name_attr', + placeholder: 'Group name', + rules: null, +}; + +const schemaAllowFieldPath = 'source.config.okta_profile_to_username_regex.allow'; +export const POFILE_TO_USER_REGX_ALLOW: RecipeField = { + name: 'user.allow', + label: 'Allow Patterns', + tooltip: + 'Only include specific schemas by providing the name of a schema, or a regular expression (regex) to include specific schemas. If not provided, all schemas inside allowed databases will be included.', + placeholder: 'user_pattern', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: schemaAllowFieldPath, + rules: null, + section: 'Okta Profile To User Attribute Regex', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, schemaAllowFieldPath), +}; + +const schemaDenyFieldPath = 'source.config.okta_profile_to_username_regex.deny'; +export const POFILE_TO_USER_REGX_DENY: RecipeField = { + name: 'user.deny', + label: 'Deny Patterns', + tooltip: + 'Only include specific schemas by providing the name of a schema, or a regular expression (regex) to include specific schemas. If not provided, all schemas inside allowed databases will be included.', + placeholder: 'user_pattern', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: schemaDenyFieldPath, + rules: null, + section: 'Okta Profile To User Attribute Regex', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, schemaDenyFieldPath), +}; + +const schemaAllowFieldPathForGroup = 'source.config.okta_profile_to_group_name_regex.allow'; +export const POFILE_TO_GROUP_REGX_ALLOW: RecipeField = { + name: 'group.allow', + label: 'Allow Patterns', + tooltip: + 'Only include specific schemas by providing the name of a schema, or a regular expression (regex) to include specific schemas. If not provided, all schemas inside allowed databases will be included.', + placeholder: 'group_pattern', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: schemaAllowFieldPathForGroup, + rules: null, + section: 'Okta Profile To Group Attribute Regex', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, schemaAllowFieldPathForGroup), +}; + +const schemaDenyFieldPathForGroup = 'source.config.okta_profile_to_group_name_regex.deny'; +export const POFILE_TO_GROUP_REGX_DENY: RecipeField = { + name: 'group.deny', + label: 'Deny Patterns', + tooltip: + 'Only include specific schemas by providing the name of a schema, or a regular expression (regex) to include specific schemas. If not provided, all schemas inside allowed databases will be included.', + placeholder: 'group_pattern', + type: FieldType.LIST, + buttonLabel: 'Add pattern', + fieldPath: schemaDenyFieldPathForGroup, + rules: null, + section: 'Okta Profile To Group Attribute Regex', + setValueOnRecipeOverride: (recipe: any, values: string[]) => + setListValuesOnRecipe(recipe, values, schemaDenyFieldPathForGroup), +}; +export const INGEST_USERS: RecipeField = { + name: 'ingest_users', + label: 'Ingest Users', + tooltip: 'Whether users should be ingested into DataHub.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.ingest_users', + rules: null, +}; + +export const INGEST_GROUPS: RecipeField = { + name: 'ingest_groups', + label: 'Ingest Groups', + tooltip: 'Whether groups should be ingested into DataHub.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.ingest_groups', + rules: null, +}; + +export const INCLUDE_DEPROVISIONED_USERS: RecipeField = { + name: 'include_deprovisioned_users', + label: 'Include deprovisioned users', + tooltip: 'Whether to ingest users in the DEPROVISIONED state from Okta.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.include_deprovisioned_users', + rules: null, +}; +export const INCLUDE_SUSPENDED_USERS: RecipeField = { + name: 'include_suspended_users', + label: 'Include suspended users', + tooltip: 'Whether to ingest users in the SUSPENDED state from Okta.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.include_suspended_users', + rules: null, +}; + +export const SKIP_USERS_WITHOUT_GROUP: RecipeField = { + name: 'skip_users_without_a_group', + label: 'Skip users without group', + tooltip: 'Whether to skip users without group from Okta.', + type: FieldType.BOOLEAN, + fieldPath: 'source.config.skip_users_without_a_group', + rules: null, +}; + diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json index f4bfd45eb4f837..5d004abfa78d83 100644 --- a/datahub-web-react/src/app/ingest/source/builder/sources.json +++ b/datahub-web-react/src/app/ingest/source/builder/sources.json @@ -200,14 +200,14 @@ "name": "azure-ad", "displayName": "Azure AD", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/azure-ad/", - "recipe": "source:\n type: azure-ad\n config:\n client_id: # Your Azure Client ID, e.g. \"00000000-0000-0000-0000-000000000000\"\n tenant_id: # Your Azure Tenant ID, e.g. \"00000000-0000-0000-0000-000000000000\"\n # Add secret in Secrets Tab with this name\n client_secret: \"${AZURE_AD_CLIENT_SECRET}\"\n redirect: # Your Redirect URL, e.g. \"https://login.microsoftonline.com/common/oauth2/nativeclient\"\n authority: # Your Authority URL, e.g. \"https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000\"\n token_url: # Your Token URL, e.g. \"https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000/oauth2/token\"\n graph_url: # The Graph URL, e.g. \"https://graph.microsoft.com/v1.0\"\n \n # Optional flags to ingest users, groups, or both\n ingest_users: True\n ingest_groups: True\n \n # Optional Allow / Deny extraction of particular Groups\n # groups_pattern:\n # allow:\n # - \".*\"\n\n # Optional Allow / Deny extraction of particular Users.\n # users_pattern:\n # allow:\n # - \".*\"" + "recipe": "source:\n type: azure-ad\n config:\n client_id: # Your Azure Client ID, e.g. \"00000000-0000-0000-0000-000000000000\"\n tenant_id: # Your Azure Tenant ID, e.g. \"00000000-0000-0000-0000-000000000000\"\n # Add secret in Secrets Tab with this name\n client_secret: \n redirect: # Your Redirect URL, e.g. \"https://login.microsoftonline.com/common/oauth2/nativeclient\"\n authority: # Your Authority URL, e.g. \"https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000\"\n token_url: # Your Token URL, e.g. \"https://login.microsoftonline.com/00000000-0000-0000-0000-000000000000/oauth2/token\"\n graph_url: # The Graph URL, e.g. \"https://graph.microsoft.com/v1.0\"\n \n # Optional flags to ingest users, groups, or both\n ingest_users: True\n ingest_groups: True\n \n # Optional Allow / Deny extraction of particular Groups\n # groups_pattern:\n # allow:\n # - \".*\"\n\n # Optional Allow / Deny extraction of particular Users.\n # users_pattern:\n # allow:\n # - \".*\"" }, { "urn": "urn:li:dataPlatform:okta", "name": "okta", "displayName": "Okta", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/okta/", - "recipe": "source:\n type: okta\n config:\n # Coordinates\n okta_domain: # Your Okta Domain, e.g. \"dev-35531955.okta.com\"\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n okta_api_token: \"${OKTA_API_TOKEN}\" # Your Okta API Token, e.g. \"11be4R_M2MzDqXawbTHfKGpKee0kuEOfX1RCQSRx99\"\n\n # Optional flags to ingest users, groups, or both\n ingest_users: True\n ingest_groups: True\n\n # Optional: Customize the mapping to DataHub Username from an attribute appearing in the Okta User\n # profile. Reference: https://developer.okta.com/docs/reference/api/users/\n # okta_profile_to_username_attr: str = \"login\"\n # okta_profile_to_username_regex: str = \"([^@]+)\"\n \n # Optional: Customize the mapping to DataHub Group from an attribute appearing in the Okta Group\n # profile. Reference: https://developer.okta.com/docs/reference/api/groups/\n # okta_profile_to_group_name_attr: str = \"name\"\n # okta_profile_to_group_name_regex: str = \"(.*)\"\n \n # Optional: Include deprovisioned or suspended Okta users in the ingestion.\n # include_deprovisioned_users = False\n # include_suspended_users = False" + "recipe": "source:\n type: okta\n config:\n # Coordinates\n okta_domain: # Your Okta Domain, e.g. \"dev-35531955.okta.com\"\n\n # Credentials\n # Add secret in Secrets Tab with relevant names for each variable\n okta_api_token: # Your Okta API Token, e.g. \"11be4R_M2MzDqXawbTHfKGpKee0kuEOfX1RCQSRx99\"\n\n # Optional flags to ingest users, groups, or both\n ingest_users: True\n ingest_groups: True\n\n # Optional: Customize the mapping to DataHub Username from an attribute appearing in the Okta User\n # profile. Reference: https://developer.okta.com/docs/reference/api/users/\n # okta_profile_to_username_attr: str = \"login\"\n # okta_profile_to_username_regex: str = \"([^@]+)\"\n \n # Optional: Customize the mapping to DataHub Group from an attribute appearing in the Okta Group\n # profile. Reference: https://developer.okta.com/docs/reference/api/groups/\n # okta_profile_to_group_name_attr: str = \"name\"\n # okta_profile_to_group_name_regex: str = \"(.*)\"\n \n # Optional: Include deprovisioned or suspended Okta users in the ingestion.\n # include_deprovisioned_users = False\n # include_suspended_users = False" }, { "urn": "urn:li:dataPlatform:vertica", diff --git a/datahub-web-react/src/app/ingest/source/utils.ts b/datahub-web-react/src/app/ingest/source/utils.ts index f789ed8434721d..e55d5a598d3d94 100644 --- a/datahub-web-react/src/app/ingest/source/utils.ts +++ b/datahub-web-react/src/app/ingest/source/utils.ts @@ -129,6 +129,20 @@ export const getExecutionRequestStatusDisplayColor = (status: string) => { ); }; +export const validateURL = (fieldName: string) => { + return { + validator(_, value) { + const URLPattern = new RegExp(/^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w.-]+)+[\w\-._~:/?#[\]@!$&'()*+,;=.]+$/); + const isURLValid = URLPattern.test(value); + if (!value || isURLValid) { + return Promise.resolve(); + } + return Promise.reject(new Error(`A valid ${fieldName} is required.`)); + }, + }; +}; + + const ENTITIES_WITH_SUBTYPES = new Set([ EntityType.Dataset.toLowerCase(), EntityType.Container.toLowerCase(), diff --git a/docs-website/yarn.lock b/docs-website/yarn.lock index c7ce27dd6431d0..44bc206728532b 100644 --- a/docs-website/yarn.lock +++ b/docs-website/yarn.lock @@ -4901,13 +4901,14 @@ es-module-lexer@^1.2.1: resolved "https://registry.yarnpkg.com/es-module-lexer/-/es-module-lexer-1.3.0.tgz#6be9c9e0b4543a60cd166ff6f8b4e9dae0b0c16f" integrity sha512-vZK7T0N2CBmBOixhmjdqx2gWVbFZ4DXZ/NyRMZVlJXPa7CyFS+/a4QQsDGDQy9ZfEzxFuNEsMLeQJnKP2p5/JA== -es5-ext@^0.10.35, es5-ext@^0.10.50: - version "0.10.62" - resolved "https://registry.yarnpkg.com/es5-ext/-/es5-ext-0.10.62.tgz#5e6adc19a6da524bf3d1e02bbc8960e5eb49a9a5" - integrity sha512-BHLqn0klhEpnOKSrzn/Xsz2UIW8j+cGmo9JLzr8BiUapV8hPL9+FliFqjwr9ngW7jWdnxv6eO+/LqyhJVqgrjA== +es5-ext@^0.10.35, es5-ext@^0.10.50, es5-ext@^0.10.62, es5-ext@~0.10.14: + version "0.10.63" + resolved "https://registry.yarnpkg.com/es5-ext/-/es5-ext-0.10.63.tgz#9c222a63b6a332ac80b1e373b426af723b895bd6" + integrity sha512-hUCZd2Byj/mNKjfP9jXrdVZ62B8KuA/VoK7X8nUh5qT+AxDmcbvZz041oDVZdbIN1qW6XY9VDNwzkvKnZvK2TQ== dependencies: es6-iterator "^2.0.3" es6-symbol "^3.1.3" + esniff "^2.0.1" next-tick "^1.1.0" es6-iterator@^2.0.3: @@ -4965,6 +4966,16 @@ eslint-scope@5.1.1: esrecurse "^4.3.0" estraverse "^4.1.1" +esniff@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/esniff/-/esniff-2.0.1.tgz#a4d4b43a5c71c7ec51c51098c1d8a29081f9b308" + integrity sha512-kTUIGKQ/mDPFoJ0oVfcmyJn4iBDRptjNVIzwIFR7tqWXdVI9xfA2RMwY/gbSpJG3lkdWNEjLap/NqVHZiJsdfg== + dependencies: + d "^1.0.1" + es5-ext "^0.10.62" + event-emitter "^0.3.5" + type "^2.7.2" + esprima@^4.0.0: version "4.0.1" resolved "https://registry.yarnpkg.com/esprima/-/esprima-4.0.1.tgz#13b04cdb3e6c5d19df91ab6987a8695619b0aa71" @@ -5010,6 +5021,14 @@ eval@^0.1.8: "@types/node" "*" require-like ">= 0.1.1" +event-emitter@^0.3.5: + version "0.3.5" + resolved "https://registry.yarnpkg.com/event-emitter/-/event-emitter-0.3.5.tgz#df8c69eef1647923c7157b9ce83840610b02cc39" + integrity sha512-D9rRn9y7kLPnJ+hMq7S/nhvoKwwvVJahBi2BPmx3bvbsEdK3W9ii8cBSGjP+72/LnM4n6fo3+dkCX5FeTQruXA== + dependencies: + d "1" + es5-ext "~0.10.14" + event-target-shim@^5.0.0: version "5.0.1" resolved "https://registry.yarnpkg.com/event-target-shim/-/event-target-shim-5.0.1.tgz#5d4d3ebdf9583d63a5333ce2deb7480ab2b05789" @@ -5259,9 +5278,9 @@ flux@^4.0.1: fbjs "^3.0.1" follow-redirects@^1.0.0, follow-redirects@^1.14.7: - version "1.15.4" - resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.4.tgz#cdc7d308bf6493126b17ea2191ea0ccf3e535adf" - integrity sha512-Cr4D/5wlrb0z9dgERpUL3LrmPKVDsETIJhaCMeDfuFYcqa5bldGV6wBsAN6X/vxlXQtFBMrXdXxdL8CbDTGniw== + version "1.15.6" + resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.6.tgz#7f815c0cda4249c74ff09e95ef97c23b5fd0399b" + integrity sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA== fork-ts-checker-webpack-plugin@^6.5.0: version "6.5.3" diff --git a/docs/api/tutorials/domains.md b/docs/api/tutorials/domains.md index 617864d233b7a6..800d3dbff5614a 100644 --- a/docs/api/tutorials/domains.md +++ b/docs/api/tutorials/domains.md @@ -79,6 +79,41 @@ You can now see `Marketing` domain has been created under `Govern > Domains`.

+### Creating a Nested Domain + +You can also create a nested domain, or a domain within another domain. + + + + +```json +mutation createDomain { + createDomain(input: { name: "Verticals", description: "An optional description", parentDomain: "urn:li:domain:marketing" }) +} +``` + + + + +```shell +curl --location --request POST 'http://localhost:8080/api/graphql' \ +--header 'Authorization: Bearer ' \ +--header 'Content-Type: application/json' \ +--data-raw '{ "query": "mutation createDomain { createDomain(input: { name: \"Verticals\", description: \"Entities related to the verticals sub-domain.\", parentDomain: \"urn:li:domain:marketing\" }) }", "variables":{}}' +``` + + + + +```python +{{ inline /metadata-ingestion/examples/library/create_nested_domain.py show_path_as_comment }} +``` + + + + +This query will create a new domain, "Verticals", under the "Marketing" domain. + ## Read Domains diff --git a/docs/domains.md b/docs/domains.md index 1b2ebc9d47f397..94ba5f9f961971 100644 --- a/docs/domains.md +++ b/docs/domains.md @@ -202,6 +202,16 @@ mutation createDomain { This query will return an `urn` which you can use to fetch the Domain details. +## Create a Nested Domain + +```graphql +mutation createDomain { + createDomain(input: { name: "Verticals", description: "An optional description", parentDomain: "urn:li:domain:marketing" }) +} +``` + +This query will create a new domain, "Verticals", under the "Marketing" domain. + **Fetching a Domain by Urn** ```graphql diff --git a/metadata-ingestion/examples/library/create_nested_domain.py b/metadata-ingestion/examples/library/create_nested_domain.py new file mode 100644 index 00000000000000..da6b10700439d2 --- /dev/null +++ b/metadata-ingestion/examples/library/create_nested_domain.py @@ -0,0 +1,27 @@ +import logging + +from datahub.emitter.mce_builder import make_domain_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.metadata.schema_classes import ChangeTypeClass, DomainPropertiesClass + +log = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +domain_urn = make_domain_urn("marketing") +domain_properties_aspect = DomainPropertiesClass( + name="Verticals", + description="Entities related to the verticals sub-domain", + parentDomain="urn:li:domain:marketing", +) + +event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper( + entityType="domain", + changeType=ChangeTypeClass.UPSERT, + entityUrn=domain_urn, + aspect=domain_properties_aspect, +) + +rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080") +rest_emitter.emit(event) +log.info(f"Created domain {domain_urn}") diff --git a/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py b/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py index a1498a6ca961e9..151904f93c9b20 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py +++ b/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py @@ -1,5 +1,6 @@ import json import logging +import time from pathlib import Path from typing import Dict, Iterable, List, Optional, Tuple, Union @@ -15,19 +16,31 @@ make_data_platform_urn, make_dataset_urn, make_schema_field_urn, + make_tag_urn, + make_term_urn, + make_user_urn, + validate_ownership_type, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.extractor.schema_util import avro_schema_to_mce_fields from datahub.ingestion.graph.client import DataHubGraph, get_default_graph from datahub.metadata.schema_classes import ( + AuditStampClass, DatasetPropertiesClass, + GlobalTagsClass, + GlossaryTermAssociationClass, + GlossaryTermsClass, MetadataChangeProposalClass, OtherSchemaClass, + OwnerClass, + OwnershipClass, + OwnershipTypeClass, SchemaFieldClass, SchemaMetadataClass, StructuredPropertiesClass, StructuredPropertyValueAssignmentClass, SubTypesClass, + TagAssociationClass, UpstreamClass, ) from datahub.specific.dataset import DatasetPatchBuilder @@ -119,6 +132,16 @@ def file_must_be_avsc(cls, v): return v +class Ownership(ConfigModel): + id: str + type: str + + @validator("type") + def ownership_type_must_be_mappable_or_custom(cls, v: str) -> str: + _, _ = validate_ownership_type(v) + return v + + class StructuredPropertyValue(ConfigModel): value: Union[str, float, List[str], List[float]] created: Optional[str] @@ -137,6 +160,9 @@ class Dataset(BaseModel): properties: Optional[Dict[str, str]] subtype: Optional[str] subtypes: Optional[List[str]] + tags: Optional[List[str]] = None + glossaryTerms: Optional[List[str]] = None + owners: Optional[List[Union[str, Ownership]]] = None structured_properties: Optional[ Dict[str, Union[str, float, List[Union[str, float]]]] ] = None @@ -172,6 +198,28 @@ def platform_must_not_be_urn(cls, v): return v[len("urn:li:dataPlatform:") :] return v + def _mint_auditstamp(self, message: str) -> AuditStampClass: + return AuditStampClass( + time=int(time.time() * 1000.0), + actor="urn:li:corpuser:datahub", + message=message, + ) + + def _mint_owner(self, owner: Union[str, Ownership]) -> OwnerClass: + if isinstance(owner, str): + return OwnerClass( + owner=make_user_urn(owner), + type=OwnershipTypeClass.TECHNICAL_OWNER, + ) + else: + assert isinstance(owner, Ownership) + ownership_type, ownership_type_urn = validate_ownership_type(owner.type) + return OwnerClass( + owner=make_user_urn(owner.id), + type=ownership_type, + typeUrn=ownership_type_urn, + ) + @classmethod def from_yaml(cls, file: str) -> Iterable["Dataset"]: with open(file) as fp: @@ -220,11 +268,6 @@ def generate_mcp( ) assert field_urn.startswith("urn:li:schemaField:") if field.structured_properties: - # field_properties_flattened = ( - # Dataset.extract_structured_properties( - # field.structured_properties - # ) - # ) mcp = MetadataChangeProposalWrapper( entityUrn=field_urn, aspect=StructuredPropertiesClass( @@ -254,12 +297,43 @@ def generate_mcp( ) yield mcp + if self.tags: + mcp = MetadataChangeProposalWrapper( + entityUrn=self.urn, + aspect=GlobalTagsClass( + tags=[ + TagAssociationClass(tag=make_tag_urn(tag)) + for tag in self.tags + ] + ), + ) + yield mcp + + if self.glossaryTerms: + mcp = MetadataChangeProposalWrapper( + entityUrn=self.urn, + aspect=GlossaryTermsClass( + terms=[ + GlossaryTermAssociationClass( + urn=make_term_urn(term) + ) + for term in self.glossaryTerms + ], + auditStamp=self._mint_auditstamp("yaml"), + ), + ) + yield mcp + + if self.owners: + mcp = MetadataChangeProposalWrapper( + entityUrn=self.urn, + aspect=OwnershipClass( + owners=[self._mint_owner(o) for o in self.owners] + ), + ) + yield mcp + if self.structured_properties: - # structured_properties_flattened = ( - # Dataset.extract_structured_properties( - # self.structured_properties - # ) - # ) mcp = MetadataChangeProposalWrapper( entityUrn=self.urn, aspect=StructuredPropertiesClass( @@ -419,12 +493,34 @@ def _schema_from_schema_metadata( else: return None + @staticmethod + def extract_owners_if_exists( + owners: Optional[OwnershipClass], + ) -> Optional[List[Union[str, Ownership]]]: + yaml_owners: Optional[List[Union[str, Ownership]]] = None + if owners: + yaml_owners = [] + for o in owners.owners: + if o.type == OwnershipTypeClass.TECHNICAL_OWNER: + yaml_owners.append(o.owner) + elif o.type == OwnershipTypeClass.CUSTOM: + yaml_owners.append(Ownership(id=o.owner, type=str(o.typeUrn))) + else: + yaml_owners.append(Ownership(id=o.owner, type=str(o.type))) + return yaml_owners + @classmethod def from_datahub(cls, graph: DataHubGraph, urn: str) -> "Dataset": dataset_properties: Optional[DatasetPropertiesClass] = graph.get_aspect( urn, DatasetPropertiesClass ) subtypes: Optional[SubTypesClass] = graph.get_aspect(urn, SubTypesClass) + tags: Optional[GlobalTagsClass] = graph.get_aspect(urn, GlobalTagsClass) + glossary_terms: Optional[GlossaryTermsClass] = graph.get_aspect( + urn, GlossaryTermsClass + ) + owners: Optional[OwnershipClass] = graph.get_aspect(urn, OwnershipClass) + yaml_owners = Dataset.extract_owners_if_exists(owners) structured_properties: Optional[StructuredPropertiesClass] = graph.get_aspect( urn, StructuredPropertiesClass ) @@ -446,6 +542,11 @@ def from_datahub(cls, graph: DataHubGraph, urn: str) -> "Dataset": if dataset_properties and dataset_properties.name else None, schema=Dataset._schema_from_schema_metadata(graph, urn), + tags=[tag.tag for tag in tags.tags] if tags else None, + glossaryTerms=[term.urn for term in glossary_terms.terms] + if glossary_terms + else None, + owners=yaml_owners, properties=dataset_properties.customProperties if dataset_properties else None, diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py index 1c7d275c348672..73ce1ae2a6c1af 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py @@ -822,7 +822,7 @@ def stl_scan_based_lineage_query( WHERE qs.step_name = 'scan' AND qs.source = 'Redshift(local)' AND - qt.sequence < 320 AND -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext + qt.sequence < 16 AND -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext sti.database = '{db_name}' AND -- this was required to not retrieve some internal redshift tables, try removing to see what happens sui.user_name <> 'rdsdb' -- not entirely sure about this filter GROUP BY sti.schema, sti.table, qs.table_id, qs.query_id, sui.user_name @@ -909,7 +909,7 @@ def list_insert_create_queries_sql( cluster = '{db_name}' AND qd.start_time >= '{start_time}' AND qd.start_time < '{end_time}' AND - qt.sequence < 320 AND -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext + qt.sequence < 16 AND -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext ld.query_id IS NULL -- filter out queries which are also stored in SYS_LOAD_DETAIL ORDER BY target_table ASC ) @@ -996,7 +996,7 @@ def temp_table_ddl_query(start_time: datetime, end_time: datetime) -> str: query_type IN ('DDL', 'CTAS', 'OTHER', 'COMMAND') AND qh.start_time >= '{start_time_str}' AND qh.start_time < '{end_time_str}' - AND qt.sequence < 320 + AND qt.sequence < 16 GROUP BY qh.start_time, qh.session_id, qh.transaction_id, qh.user_id ORDER BY qh.start_time, qh.session_id, qh.transaction_id, qh.user_id ASC ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py b/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py index 1d6fe9342b8060..ef7301238e4528 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py +++ b/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py @@ -155,11 +155,12 @@ def _get_channel_info( self, cursor: Optional[str] ) -> Tuple[List[MetadataWorkUnit], Optional[str]]: result_channels: List[MetadataWorkUnit] = [] - response = self.get_slack_client().conversations_list( - types="public_channel", - limit=self.config.channels_iteration_limit, - cursor=cursor, - ) + with self.rate_limiter: + response = self.get_slack_client().conversations_list( + types="public_channel", + limit=self.config.channels_iteration_limit, + cursor=cursor, + ) assert isinstance(response.data, dict) if not response.data["ok"]: self.report.report_failure( @@ -240,9 +241,10 @@ def get_public_channels(self) -> Iterable[MetadataWorkUnit]: def populate_user_profile(self, user_obj: CorpUser) -> None: try: # https://api.slack.com/methods/users.profile.get - user_profile_res = self.get_slack_client().users_profile_get( - user=user_obj.slack_id - ) + with self.rate_limiter: + user_profile_res = self.get_slack_client().users_profile_get( + user=user_obj.slack_id + ) user_profile = user_profile_res.get("profile", {}) user_obj.title = user_profile.get("title") user_obj.image_url = user_profile.get("image_192") @@ -257,9 +259,10 @@ def populate_slack_id_from_email(self, user_obj: CorpUser) -> None: return try: # https://api.slack.com/methods/users.lookupByEmail - user_info_res = self.get_slack_client().users_lookupByEmail( - email=user_obj.email - ) + with self.rate_limiter: + user_info_res = self.get_slack_client().users_lookupByEmail( + email=user_obj.email + ) user_info = user_info_res.get("user", {}) user_obj.slack_id = user_info.get("id") except Exception as e: diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index 622f92b0bd7e93..2d82dc4001c78b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -564,6 +564,10 @@ private LineageSearchResult getSearchResultInBatches( queryFrom = Math.max(0, from - resultForBatch.getNumEntities()); querySize = Math.max(0, size - resultForBatch.getEntities().size()); finalResult = merge(finalResult, resultForBatch); + + if (querySize == 0) { + break; + } } finalResult.getMetadata().getAggregations().add(0, DEGREE_FILTER_GROUP); @@ -843,6 +847,10 @@ private LineageScrollResult getScrollResultInBatches( urnToRelationship); querySize = Math.max(0, size - resultForBatch.getEntities().size()); finalResult = mergeScrollResult(finalResult, resultForBatch); + + if (querySize == 0) { + break; + } } finalResult.getMetadata().getAggregations().add(0, DEGREE_FILTER_GROUP);