From 2c3943d72325833b9cdd467bcb5717f086d69da7 Mon Sep 17 00:00:00 2001 From: sid-acryl <155424659+sid-acryl@users.noreply.github.com> Date: Fri, 7 Jun 2024 21:46:28 +0530 Subject: [PATCH 1/5] fix(doc): Fix doc typo in transformer (#10658) --- .../docs/transformer/dataset_transformer.md | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md index 64d1438cfcc73d..00e4995218a710 100644 --- a/metadata-ingestion/docs/transformer/dataset_transformer.md +++ b/metadata-ingestion/docs/transformer/dataset_transformer.md @@ -126,7 +126,7 @@ transformers: |--------------------|----------|--------------|-------------|---------------------------------------------------------------------| | `owner_urns` | ✅ | list[string] | | List of owner urns. | | `ownership_type` | | string | "DATAOWNER" | ownership type of the owners (either as enum or ownership type urn) | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove ownership from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | For transformer behaviour on `replace_existing` and `semantics`, please refer section [Relationship Between replace_existing And semantics](#relationship-between-replace_existing-and-semantics). @@ -270,7 +270,7 @@ Note that whatever owners you send via `simple_remove_dataset_ownership` will ov |-----------------------------|----------|--------------|---------------|------------------------------------------------------------------| | `extract_tags_from` | ✅ | string | `urn` | Which field to extract tag from. Currently only `urn` is supported. | | `extract_tags_regex` | ✅ | string | `.*` | Regex to use to extract tag.| -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove globalTags from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | Let’s suppose we’d like to add a dataset tags based on part of urn. To do so, we can use the `extract_dataset_tags` transformer that’s included in the ingestion framework. @@ -297,7 +297,7 @@ a tag called `USA-ops-team` and `Canada-marketing` will be added to them respect | Field | Required | Type | Default | Description | |-----------------------------|----------|--------------|---------------|------------------------------------------------------------------| | `tag_urns` | ✅ | list[string] | | List of globalTags urn. | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove globalTags from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | Let’s suppose we’d like to add a set of dataset tags. To do so, we can use the `simple_add_dataset_tags` transformer that’s included in the ingestion framework. @@ -350,7 +350,7 @@ The config, which we’d append to our ingestion recipe YAML, would look like th | Field | Required | Type | Default | Description | |-----------------------------|----------|----------------------|-------------|---------------------------------------------------------------------------------------| | `tag_pattern` | ✅ | map[regx, list[urn]] | | Entity urn with regular expression and list of tags urn apply to matching entity urn. | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove globalTags from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | Let’s suppose we’d like to append a series of tags to specific datasets. To do so, we can use the `pattern_add_dataset_tags` module that’s included in the ingestion framework. This will match the regex pattern to `urn` of the dataset and assign the respective tags urns given in the array. @@ -407,7 +407,7 @@ The config, which we’d append to our ingestion recipe YAML, would look like th | Field | Required | Type | Default | Description | |-----------------------------|----------|--------------------------------------------|---------------|----------------------------------------------------------------------------| | `get_tags_to_add` | ✅ | callable[[str], list[TagAssociationClass]] | | A function which takes entity urn as input and return TagAssociationClass. | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove globalTags from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | If you'd like to add more complex logic for assigning tags, you can use the more generic add_dataset_tags transformer, which calls a user-provided function to determine the tags for each dataset. @@ -477,7 +477,7 @@ Finally, you can install and use your custom transformer as [shown here](#instal | Field | Required | Type | Default | Description | |-----------------------------|----------|--------------|--------------|------------------------------------------------------------------| | `path_templates` | ✅ | list[string] | | List of path templates. | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove browsePath from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | If you would like to add to browse paths of dataset can use this transformer. There are 3 optional variables that you can use to get information from the dataset `urn`: @@ -562,7 +562,7 @@ In this case, the resulting dataset will have only 1 browse path, the one from t | Field | Required | Type | Default | Description | |-----------------------------|----------|--------------|---------------|------------------------------------------------------------------| | `term_urns` | ✅ | list[string] | | List of glossaryTerms urn. | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove glossaryTerms from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | We can use a similar convention to associate [Glossary Terms](../../../docs/generated/ingestion/sources/business-glossary.md) to datasets. @@ -617,7 +617,7 @@ The config, which we’d append to our ingestion recipe YAML, would look like th | Field | Required | Type | Default | Description | |-----------------------------|--------|----------------------|--------------|-------------------------------------------------------------------------------------------------| | `term_pattern` | ✅ | map[regx, list[urn]] | | entity urn with regular expression and list of glossaryTerms urn apply to matching entity urn. | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove glossaryTerms from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | We can add glossary terms to datasets based on a regex filter. @@ -673,7 +673,7 @@ We can add glossary terms to datasets based on a regex filter. | Field | Required | Type | Default | Description | |-----------------------------|---------|----------------------|-------------|------------------------------------------------------------------------------------------------| | `term_pattern` | ✅ | map[regx, list[urn]] | | entity urn with regular expression and list of glossaryTerms urn apply to matching entity urn. | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove glossaryTerms from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | We can add glossary terms to schema fields based on a regex filter. @@ -730,7 +730,7 @@ Note that only terms from the first matching pattern will be applied. | Field | Required | Type | Default | Description | |-----------------------------|----------|----------------------|-------------|---------------------------------------------------------------------------------------| | `tag_pattern` | ✅ | map[regx, list[urn]] | | entity urn with regular expression and list of tags urn apply to matching entity urn. | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove globalTags from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | @@ -790,7 +790,7 @@ The config would look like this: | Field | Required | Type | Default | Description | |--------------------|---------|----------------|-------------|------------------------------------------------------------------| | `properties` | ✅ | dict[str, str] | | Map of key value pair. | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove datasetProperties from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | `simple_add_dataset_properties` transformer assigns the properties to dataset entity from the configuration. @@ -849,7 +849,7 @@ overwrite the previous value. | Field | Required | Type | Default | Description | |--------------------------------|----------|--------------------------------------------|-------------|------------------------------------------------------------------| | `add_properties_resolver_class`| ✅ | Type[AddDatasetPropertiesResolverBase] | | A class extends from `AddDatasetPropertiesResolverBase` | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove datasetProperties from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | If you'd like to add more complex logic for assigning properties, you can use the `add_dataset_properties` transformer, which calls a user-provided class (that extends from `AddDatasetPropertiesResolverBase` class) to determine the properties for each dataset. @@ -948,7 +948,7 @@ transformers: | Field | Required | Type | Default | Description | |--------------------|----------|------------------------|---------------|------------------------------------------------------------------| | `domains` | ✅ | list[union[urn, str]] | | List of simple domain name or domain urns. | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove domains from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | For transformer behaviour on `replace_existing` and `semantics`, please refer section [Relationship Between replace_existing And semantics](#relationship-between-replace_existing-and-semantics). @@ -1008,7 +1008,7 @@ transformers: | Field | Required | Type | Default | Description | |----------------------------|-----------|---------------------------------|-----------------|----------------------------------------------------------------------------------------------------------------------------| | `domain_pattern` | ✅ | map[regx, list[union[urn, str]] | | dataset urn with regular expression and list of simple domain name or domain urn need to be apply on matching dataset urn. | -| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `replace_existing` | | boolean | `false` | Whether to remove domains from entity sent by ingestion source. | | `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | Let’s suppose we’d like to append a series of domain to specific datasets. To do so, we can use the pattern_add_dataset_domain transformer that’s included in the ingestion framework. From 54a2d2a23ef0d81cb8db30840fdd3a889371db4c Mon Sep 17 00:00:00 2001 From: Jay <159848059+jayacryl@users.noreply.github.com> Date: Fri, 7 Jun 2024 13:03:25 -0400 Subject: [PATCH 2/5] feat(graphql) data contract resolvers (#10632) --- .../linkedin/datahub/graphql/Constants.java | 1 + .../datahub/graphql/GmsGraphQLEngine.java | 62 ++- .../datacontract/DataContractUtils.java | 32 ++ .../EntityDataContractResolver.java | 96 +++++ .../UpsertDataContractResolver.java | 278 +++++++++++++ .../datacontract/DataContractMapper.java | 112 ++++++ .../types/datacontract/DataContractType.java | 84 ++++ .../src/main/resources/contract.graphql | 183 +++++++++ .../src/main/resources/entity.graphql | 5 + .../datacontract/DataContractUtilsTest.java | 63 +++ .../EntityDataContractResolverTest.java | 206 ++++++++++ .../UpsertDataContractResolverTest.java | 379 ++++++++++++++++++ .../datacontract/DataContractMapperTest.java | 180 +++++++++ .../datacontract/DataContractTypeTest.java | 152 +++++++ .../java/com/linkedin/metadata/Constants.java | 6 + .../authorization/PoliciesConfig.java | 6 + 16 files changed, 1844 insertions(+), 1 deletion(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/DataContractUtils.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/EntityDataContractResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/UpsertDataContractResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datacontract/DataContractMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datacontract/DataContractType.java create mode 100644 datahub-graphql-core/src/main/resources/contract.graphql create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/DataContractUtilsTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/EntityDataContractResolverTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/UpsertDataContractResolverTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/datacontract/DataContractMapperTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/datacontract/DataContractTypeTest.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java index e55f1fd5ecf5bf..6fc6edc66f3572 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java @@ -22,6 +22,7 @@ private Constants() {} public static final String FORMS_SCHEMA_FILE = "forms.graphql"; public static final String ASSERTIONS_SCHEMA_FILE = "assertions.graphql"; public static final String INCIDENTS_SCHEMA_FILE = "incident.graphql"; + public static final String CONTRACTS_SCHEMA_FILE = "contract.graphql"; public static final String CONNECTIONS_SCHEMA_FILE = "connection.graphql"; public static final String BROWSE_PATH_DELIMITER = "/"; public static final String BROWSE_PATH_V2_DELIMITER = "␟"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 50a73817678ee0..9290c409ac7b11 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -54,6 +54,7 @@ import com.linkedin.datahub.graphql.generated.DataJob; import com.linkedin.datahub.graphql.generated.DataJobInputOutput; import com.linkedin.datahub.graphql.generated.DataPlatformInstance; +import com.linkedin.datahub.graphql.generated.DataQualityContract; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.DatasetStatsSummary; import com.linkedin.datahub.graphql.generated.Domain; @@ -64,6 +65,7 @@ import com.linkedin.datahub.graphql.generated.EntityRelationshipLegacy; import com.linkedin.datahub.graphql.generated.ForeignKeyConstraint; import com.linkedin.datahub.graphql.generated.FormActorAssignment; +import com.linkedin.datahub.graphql.generated.FreshnessContract; import com.linkedin.datahub.graphql.generated.GetRootGlossaryNodesResult; import com.linkedin.datahub.graphql.generated.GetRootGlossaryTermsResult; import com.linkedin.datahub.graphql.generated.GlossaryNode; @@ -102,6 +104,7 @@ import com.linkedin.datahub.graphql.generated.QuickFilter; import com.linkedin.datahub.graphql.generated.RecommendationContent; import com.linkedin.datahub.graphql.generated.ResolvedAuditStamp; +import com.linkedin.datahub.graphql.generated.SchemaContract; import com.linkedin.datahub.graphql.generated.SchemaField; import com.linkedin.datahub.graphql.generated.SchemaFieldEntity; import com.linkedin.datahub.graphql.generated.SearchAcrossLineageResult; @@ -141,6 +144,8 @@ import com.linkedin.datahub.graphql.resolvers.container.ParentContainersResolver; import com.linkedin.datahub.graphql.resolvers.dashboard.DashboardStatsSummaryResolver; import com.linkedin.datahub.graphql.resolvers.dashboard.DashboardUsageStatsResolver; +import com.linkedin.datahub.graphql.resolvers.datacontract.EntityDataContractResolver; +import com.linkedin.datahub.graphql.resolvers.datacontract.UpsertDataContractResolver; import com.linkedin.datahub.graphql.resolvers.dataproduct.BatchSetDataProductResolver; import com.linkedin.datahub.graphql.resolvers.dataproduct.CreateDataProductResolver; import com.linkedin.datahub.graphql.resolvers.dataproduct.DeleteDataProductResolver; @@ -746,6 +751,7 @@ public void configureRuntimeWiring(final RuntimeWiring.Builder builder) { configureDomainResolvers(builder); configureDataProductResolvers(builder); configureAssertionResolvers(builder); + configureContractResolvers(builder); configurePolicyResolvers(builder); configureDataProcessInstanceResolvers(builder); configureVersionedDatasetResolvers(builder); @@ -820,7 +826,8 @@ public GraphQLEngine.Builder builder() { .addSchema(fileBasedSchema(FORMS_SCHEMA_FILE)) .addSchema(fileBasedSchema(CONNECTIONS_SCHEMA_FILE)) .addSchema(fileBasedSchema(ASSERTIONS_SCHEMA_FILE)) - .addSchema(fileBasedSchema(INCIDENTS_SCHEMA_FILE)); + .addSchema(fileBasedSchema(INCIDENTS_SCHEMA_FILE)) + .addSchema(fileBasedSchema(CONTRACTS_SCHEMA_FILE)); for (GmsGraphQLPlugin plugin : this.graphQLPlugins) { List pluginSchemaFiles = plugin.getSchemaFiles(); @@ -2715,6 +2722,59 @@ private void configureAssertionResolvers(final RuntimeWiring.Builder builder) { "aspects", new WeaklyTypedAspectsResolver(entityClient, entityRegistry))); } + private void configureContractResolvers(final RuntimeWiring.Builder builder) { + builder.type( + "Dataset", + typeWiring -> + typeWiring.dataFetcher( + "contract", new EntityDataContractResolver(this.entityClient, this.graphClient))); + builder.type( + "FreshnessContract", + typeWiring -> + typeWiring.dataFetcher( + "assertion", + new LoadableTypeResolver<>( + getAssertionType(), + (env) -> { + final FreshnessContract contract = env.getSource(); + return contract.getAssertion() != null + ? contract.getAssertion().getUrn() + : null; + }))); + builder.type( + "DataQualityContract", + typeWiring -> + typeWiring.dataFetcher( + "assertion", + new LoadableTypeResolver<>( + getAssertionType(), + (env) -> { + final DataQualityContract contract = env.getSource(); + return contract.getAssertion() != null + ? contract.getAssertion().getUrn() + : null; + }))); + builder.type( + "SchemaContract", + typeWiring -> + typeWiring.dataFetcher( + "assertion", + new LoadableTypeResolver<>( + getAssertionType(), + (env) -> { + final SchemaContract contract = env.getSource(); + return contract.getAssertion() != null + ? contract.getAssertion().getUrn() + : null; + }))); + builder.type( + "Mutation", + typeWiring -> + typeWiring.dataFetcher( + "upsertDataContract", + new UpsertDataContractResolver(this.entityClient, this.graphClient))); + } + private void configurePolicyResolvers(final RuntimeWiring.Builder builder) { // Register resolvers for "resolvedUsers" and "resolvedGroups" field of the Policy type. builder.type( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/DataContractUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/DataContractUtils.java new file mode 100644 index 00000000000000..3dd7cd9df63838 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/DataContractUtils.java @@ -0,0 +1,32 @@ +package com.linkedin.datahub.graphql.resolvers.datacontract; + +import com.datahub.authorization.ConjunctivePrivilegeGroup; +import com.datahub.authorization.DisjunctivePrivilegeGroup; +import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.metadata.authorization.PoliciesConfig; +import javax.annotation.Nonnull; + +public class DataContractUtils { + + public static boolean canEditDataContract(@Nonnull QueryContext context, Urn entityUrn) { + final DisjunctivePrivilegeGroup orPrivilegeGroups = + new DisjunctivePrivilegeGroup( + ImmutableList.of( + AuthorizationUtils.ALL_PRIVILEGES_GROUP, + new ConjunctivePrivilegeGroup( + ImmutableList.of( + PoliciesConfig.EDIT_ENTITY_DATA_CONTRACT_PRIVILEGE.getType())))); + + return AuthorizationUtils.isAuthorized( + context.getAuthorizer(), + context.getActorUrn(), + entityUrn.getEntityType(), + entityUrn.toString(), + orPrivilegeGroups); + } + + private DataContractUtils() {} +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/EntityDataContractResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/EntityDataContractResolver.java new file mode 100644 index 00000000000000..338e7a0015b5a5 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/EntityDataContractResolver.java @@ -0,0 +1,96 @@ +package com.linkedin.datahub.graphql.resolvers.datacontract; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.EntityRelationship; +import com.linkedin.common.EntityRelationships; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataContract; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.types.datacontract.DataContractMapper; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.graph.GraphClient; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.r2.RemoteInvocationException; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.net.URISyntaxException; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class EntityDataContractResolver implements DataFetcher> { + static final String CONTRACT_FOR_RELATIONSHIP = "ContractFor"; + + private final EntityClient _entityClient; + private final GraphClient _graphClient; + + public EntityDataContractResolver( + final EntityClient entityClient, final GraphClient graphClient) { + _entityClient = Objects.requireNonNull(entityClient, "entityClient must not be null"); + _graphClient = Objects.requireNonNull(graphClient, "graphClient must not be null"); + } + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) { + return CompletableFuture.supplyAsync( + () -> { + final QueryContext context = environment.getContext(); + final String entityUrn = ((Entity) environment.getSource()).getUrn(); + + try { + // Step 1: Fetch the contract associated with the dataset. + final EntityRelationships relationships = + _graphClient.getRelatedEntities( + entityUrn, + ImmutableList.of(CONTRACT_FOR_RELATIONSHIP), + RelationshipDirection.INCOMING, + 0, + 1, + context.getActorUrn()); + + // If we found multiple contracts for same entity, we have an invalid system state. Log + // a warning. + if (relationships.getTotal() > 1) { + // Someone created 2 contracts for the same entity. Currently, we do not handle this + // in the UI. + log.warn( + String.format( + "Unexpectedly found multiple contracts (%s) for entity with urn %s! This may lead to inconsistent behavior.", + relationships.getRelationships(), entityUrn)); + } + + final List contractUrns = + relationships.getRelationships().stream() + .map(EntityRelationship::getEntity) + .collect(Collectors.toList()); + + if (!contractUrns.isEmpty()) { + final Urn contractUrn = contractUrns.get(0); + + // Step 2: Hydrate the contract entities based on the urns from step 1 + final EntityResponse entityResponse = + _entityClient.getV2( + context.getOperationContext(), + Constants.DATA_CONTRACT_ENTITY_NAME, + contractUrn, + null); + + if (entityResponse != null) { + // Step 4: Package and return result + return DataContractMapper.mapContract(entityResponse); + } + } + // No contract found + return null; + } catch (URISyntaxException | RemoteInvocationException e) { + throw new RuntimeException("Failed to retrieve Data Contract from GMS", e); + } + }); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/UpsertDataContractResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/UpsertDataContractResolver.java new file mode 100644 index 00000000000000..955a4ed0ee6b2b --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/datacontract/UpsertDataContractResolver.java @@ -0,0 +1,278 @@ +package com.linkedin.datahub.graphql.resolvers.datacontract; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; +import static com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils.*; + +import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.linkedin.common.EntityRelationships; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datacontract.DataContractProperties; +import com.linkedin.datacontract.DataContractState; +import com.linkedin.datacontract.DataContractStatus; +import com.linkedin.datacontract.DataQualityContract; +import com.linkedin.datacontract.DataQualityContractArray; +import com.linkedin.datacontract.FreshnessContract; +import com.linkedin.datacontract.FreshnessContractArray; +import com.linkedin.datacontract.SchemaContract; +import com.linkedin.datacontract.SchemaContractArray; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode; +import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; +import com.linkedin.datahub.graphql.generated.DataContract; +import com.linkedin.datahub.graphql.generated.DataQualityContractInput; +import com.linkedin.datahub.graphql.generated.FreshnessContractInput; +import com.linkedin.datahub.graphql.generated.SchemaContractInput; +import com.linkedin.datahub.graphql.generated.UpsertDataContractInput; +import com.linkedin.datahub.graphql.types.datacontract.DataContractMapper; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.graph.GraphClient; +import com.linkedin.metadata.key.DataContractKey; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.Objects; +import java.util.UUID; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class UpsertDataContractResolver implements DataFetcher> { + + private static final DataContractState DEFAULT_CONTRACT_STATE = DataContractState.ACTIVE; + private static final String CONTRACT_RELATIONSHIP_TYPE = "ContractFor"; + private final EntityClient _entityClient; + private final GraphClient _graphClient; + + public UpsertDataContractResolver( + final EntityClient entityClient, final GraphClient graphClient) { + _entityClient = Objects.requireNonNull(entityClient, "entityClient cannot be null"); + _graphClient = Objects.requireNonNull(graphClient, "graphClient cannot be null"); + } + + @Override + public CompletableFuture get(final DataFetchingEnvironment environment) + throws Exception { + final QueryContext context = environment.getContext(); + final UpsertDataContractInput input = + bindArgument(environment.getArgument("input"), UpsertDataContractInput.class); + final Urn entityUrn = UrnUtils.getUrn(input.getEntityUrn()); + return CompletableFuture.supplyAsync( + () -> { + if (DataContractUtils.canEditDataContract(context, entityUrn)) { + + // Verify that the provided contract, dataset, assertions all exist as valid entities. + validateInput(entityUrn, input, context); + + // First determine if there is an existing data contract + final Urn maybeExistingContractUrn = + getEntityContractUrn(entityUrn, context.getAuthentication()); + + final DataContractProperties newProperties = mapInputToProperties(entityUrn, input); + final DataContractStatus newStatus = mapInputToStatus(input); + + final Urn urn = + maybeExistingContractUrn != null + ? maybeExistingContractUrn + : EntityKeyUtils.convertEntityKeyToUrn( + new DataContractKey() + .setId( + input.getId() != null + ? input.getId() + : UUID.randomUUID().toString()), + Constants.DATA_CONTRACT_ENTITY_NAME); + + final MetadataChangeProposal propertiesProposal = + buildMetadataChangeProposalWithUrn( + urn, Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME, newProperties); + + final MetadataChangeProposal statusProposal = + buildMetadataChangeProposalWithUrn( + urn, Constants.DATA_CONTRACT_STATUS_ASPECT_NAME, newStatus); + + try { + _entityClient.batchIngestProposals( + context.getOperationContext(), + ImmutableList.of(propertiesProposal, statusProposal), + false); + + // Hydrate the contract entities based on the urns from step 1 + final EntityResponse entityResponse = + _entityClient.getV2( + context.getOperationContext(), + Constants.DATA_CONTRACT_ENTITY_NAME, + urn, + null); + + // Package and return result + return DataContractMapper.mapContract(entityResponse); + } catch (Exception e) { + throw new RuntimeException( + String.format("Failed to perform update against input %s", input.toString()), e); + } + } + throw new AuthorizationException( + "Unauthorized to perform this action. Please contact your DataHub administrator."); + }); + } + + private void validateInput( + @Nonnull final Urn entityUrn, + @Nonnull final UpsertDataContractInput input, + @Nonnull final QueryContext context) { + try { + + // Validate the target entity exists + if (!_entityClient.exists(context.getOperationContext(), entityUrn)) { + throw new DataHubGraphQLException( + String.format("Provided entity with urn %s does not exist!", entityUrn), + DataHubGraphQLErrorCode.BAD_REQUEST); + } + + // Verify Freshness assertions + if (input.getFreshness() != null) { + final List freshnessInputs = input.getFreshness(); + for (FreshnessContractInput freshnessInput : freshnessInputs) { + final Urn assertionUrn = UrnUtils.getUrn(freshnessInput.getAssertionUrn()); + if (!_entityClient.exists(context.getOperationContext(), assertionUrn)) { + throw new DataHubGraphQLException( + String.format("Provided assertion with urn %s does not exist!", assertionUrn), + DataHubGraphQLErrorCode.BAD_REQUEST); + } + } + } + + // Verify Schema assertions + if (input.getSchema() != null) { + final List schemaInputs = input.getSchema(); + for (SchemaContractInput schemaInput : schemaInputs) { + final Urn assertionUrn = UrnUtils.getUrn(schemaInput.getAssertionUrn()); + if (!_entityClient.exists(context.getOperationContext(), assertionUrn)) { + throw new DataHubGraphQLException( + String.format("Provided assertion with urn %s does not exist!", assertionUrn), + DataHubGraphQLErrorCode.BAD_REQUEST); + } + } + } + + // Verify DQ assertions + if (input.getDataQuality() != null) { + final List dqInputs = input.getDataQuality(); + for (DataQualityContractInput dqInput : dqInputs) { + final Urn assertionUrn = UrnUtils.getUrn(dqInput.getAssertionUrn()); + if (!_entityClient.exists(context.getOperationContext(), assertionUrn)) { + throw new DataHubGraphQLException( + String.format("Provided assertion with urn %s does not exist!", assertionUrn), + DataHubGraphQLErrorCode.BAD_REQUEST); + } + } + } + } catch (Exception e) { + if (e instanceof DataHubGraphQLException) { + throw (DataHubGraphQLException) e; + } else { + log.error( + "Failed to validate inputs provided when upserting data contract! Failing the create.", + e); + throw new DataHubGraphQLException( + "Failed to verify inputs. An unknown error occurred!", + DataHubGraphQLErrorCode.SERVER_ERROR); + } + } + } + + @Nullable + private Urn getEntityContractUrn(@Nonnull Urn entityUrn, @Nonnull Authentication authentication) { + EntityRelationships relationships = + _graphClient.getRelatedEntities( + entityUrn.toString(), + ImmutableList.of(CONTRACT_RELATIONSHIP_TYPE), + RelationshipDirection.INCOMING, + 0, + 1, + authentication.getActor().toUrnStr()); + + if (relationships.getTotal() > 1) { + // Bad state - There are multiple contracts for a single entity! Cannot update. + log.warn( + String.format( + "Unexpectedly found multiple contracts (%s) for entity with urn %s! This may lead to inconsistent behavior.", + relationships.getRelationships(), entityUrn)); + } + + if (relationships.getRelationships().size() == 1) { + return relationships.getRelationships().get(0).getEntity(); + } + // No Contract Found + return null; + } + + private DataContractProperties mapInputToProperties( + @Nonnull final Urn entityUrn, @Nonnull final UpsertDataContractInput input) { + final DataContractProperties result = new DataContractProperties(); + result.setEntity(entityUrn); + + // Construct the dataset contract. + if (input.getFreshness() != null) { + result.setFreshness( + new FreshnessContractArray( + input.getFreshness().stream() + .map(this::mapFreshnessInput) + .collect(Collectors.toList()))); + } + + if (input.getSchema() != null) { + result.setSchema( + new SchemaContractArray( + input.getSchema().stream().map(this::mapSchemaInput).collect(Collectors.toList()))); + } + + if (input.getDataQuality() != null) { + result.setDataQuality( + new DataQualityContractArray( + input.getDataQuality().stream() + .map(this::mapDataQualityInput) + .collect(Collectors.toList()))); + } + + return result; + } + + private DataContractStatus mapInputToStatus(@Nonnull final UpsertDataContractInput input) { + final DataContractStatus result = new DataContractStatus(); + if (input.getState() != null) { + result.setState(DataContractState.valueOf(input.getState().toString())); + } else { + result.setState(DEFAULT_CONTRACT_STATE); + } + return result; + } + + private FreshnessContract mapFreshnessInput(@Nonnull final FreshnessContractInput input) { + final FreshnessContract result = new FreshnessContract(); + result.setAssertion(UrnUtils.getUrn(input.getAssertionUrn())); + return result; + } + + private SchemaContract mapSchemaInput(@Nonnull final SchemaContractInput input) { + final SchemaContract result = new SchemaContract(); + result.setAssertion(UrnUtils.getUrn(input.getAssertionUrn())); + return result; + } + + private DataQualityContract mapDataQualityInput(@Nonnull final DataQualityContractInput input) { + final DataQualityContract result = new DataQualityContract(); + result.setAssertion(UrnUtils.getUrn(input.getAssertionUrn())); + return result; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datacontract/DataContractMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datacontract/DataContractMapper.java new file mode 100644 index 00000000000000..1fe65beed6e927 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datacontract/DataContractMapper.java @@ -0,0 +1,112 @@ +package com.linkedin.datahub.graphql.types.datacontract; + +import com.linkedin.datahub.graphql.generated.Assertion; +import com.linkedin.datahub.graphql.generated.DataContract; +import com.linkedin.datahub.graphql.generated.DataContractProperties; +import com.linkedin.datahub.graphql.generated.DataContractState; +import com.linkedin.datahub.graphql.generated.DataContractStatus; +import com.linkedin.datahub.graphql.generated.DataQualityContract; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.FreshnessContract; +import com.linkedin.datahub.graphql.generated.SchemaContract; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.metadata.Constants; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; + +public class DataContractMapper { + + public static DataContract mapContract(@Nonnull final EntityResponse entityResponse) { + final DataContract result = new DataContract(); + final EnvelopedAspectMap aspects = entityResponse.getAspects(); + + result.setUrn(entityResponse.getUrn().toString()); + result.setType(EntityType.DATA_CONTRACT); + + final EnvelopedAspect dataContractProperties = + aspects.get(Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME); + if (dataContractProperties != null) { + result.setProperties( + mapProperties( + new com.linkedin.datacontract.DataContractProperties( + dataContractProperties.getValue().data()))); + } else { + throw new RuntimeException( + String.format("Data Contract does not exist!. urn: %s", entityResponse.getUrn())); + } + + final EnvelopedAspect dataContractStatus = + aspects.get(Constants.DATA_CONTRACT_STATUS_ASPECT_NAME); + if (dataContractStatus != null) { + result.setStatus( + mapStatus( + new com.linkedin.datacontract.DataContractStatus( + dataContractStatus.getValue().data()))); + } + + return result; + } + + private static DataContractProperties mapProperties( + final com.linkedin.datacontract.DataContractProperties properties) { + final DataContractProperties result = new DataContractProperties(); + result.setEntityUrn(properties.getEntity().toString()); + if (properties.hasSchema()) { + result.setSchema( + properties.getSchema().stream() + .map(DataContractMapper::mapSchemaContract) + .collect(Collectors.toList())); + } + if (properties.hasFreshness()) { + result.setFreshness( + properties.getFreshness().stream() + .map(DataContractMapper::mapFreshnessContract) + .collect(Collectors.toList())); + } + if (properties.hasDataQuality()) { + result.setDataQuality( + properties.getDataQuality().stream() + .map(DataContractMapper::mapDataQualityContract) + .collect(Collectors.toList())); + } + return result; + } + + private static DataContractStatus mapStatus( + final com.linkedin.datacontract.DataContractStatus status) { + final DataContractStatus result = new DataContractStatus(); + result.setState(DataContractState.valueOf(status.getState().toString())); + return result; + } + + private static SchemaContract mapSchemaContract( + final com.linkedin.datacontract.SchemaContract schemaContract) { + final SchemaContract result = new SchemaContract(); + final Assertion partialAssertion = new Assertion(); + partialAssertion.setUrn(schemaContract.getAssertion().toString()); + result.setAssertion(partialAssertion); + return result; + } + + private static FreshnessContract mapFreshnessContract( + final com.linkedin.datacontract.FreshnessContract freshnessContract) { + final FreshnessContract result = new FreshnessContract(); + final Assertion partialAssertion = new Assertion(); + partialAssertion.setUrn(freshnessContract.getAssertion().toString()); + result.setAssertion(partialAssertion); + return result; + } + + private static DataQualityContract mapDataQualityContract( + final com.linkedin.datacontract.DataQualityContract qualityContract) { + final DataQualityContract result = new DataQualityContract(); + final Assertion partialAssertion = new Assertion(); + partialAssertion.setUrn(qualityContract.getAssertion().toString()); + result.setAssertion(partialAssertion); + return result; + } + + private DataContractMapper() {} +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datacontract/DataContractType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datacontract/DataContractType.java new file mode 100644 index 00000000000000..7f1756610baf71 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/datacontract/DataContractType.java @@ -0,0 +1,84 @@ +package com.linkedin.datahub.graphql.types.datacontract; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataContract; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import graphql.execution.DataFetcherResult; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; + +public class DataContractType + implements com.linkedin.datahub.graphql.types.EntityType { + + static final Set ASPECTS_TO_FETCH = + ImmutableSet.of( + Constants.DATA_CONTRACT_KEY_ASPECT_NAME, + Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME, + Constants.DATA_CONTRACT_STATUS_ASPECT_NAME); + private final EntityClient _entityClient; + + public DataContractType(final EntityClient entityClient) { + _entityClient = Objects.requireNonNull(entityClient, "entityClient must not be null"); + } + + @Override + public EntityType type() { + return EntityType.DATA_CONTRACT; + } + + @Override + public Function getKeyProvider() { + return Entity::getUrn; + } + + @Override + public Class objectClass() { + return DataContract.class; + } + + @Override + public List> batchLoad( + @Nonnull List urns, @Nonnull QueryContext context) throws Exception { + final List dataContractUrns = + urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + + try { + final Map entities = + _entityClient.batchGetV2( + context.getOperationContext(), + Constants.DATA_CONTRACT_ENTITY_NAME, + new HashSet<>(dataContractUrns), + ASPECTS_TO_FETCH); + + final List gmsResults = new ArrayList<>(); + for (Urn urn : dataContractUrns) { + gmsResults.add(entities.getOrDefault(urn, null)); + } + return gmsResults.stream() + .map( + gmsResult -> + gmsResult == null + ? null + : DataFetcherResult.newResult() + .data(DataContractMapper.mapContract(gmsResult)) + .build()) + .collect(Collectors.toList()); + } catch (Exception e) { + throw new RuntimeException("Failed to batch load Data Contracts", e); + } + } +} diff --git a/datahub-graphql-core/src/main/resources/contract.graphql b/datahub-graphql-core/src/main/resources/contract.graphql new file mode 100644 index 00000000000000..27d6510c89c24a --- /dev/null +++ b/datahub-graphql-core/src/main/resources/contract.graphql @@ -0,0 +1,183 @@ +extend type Mutation { + """ + Create or update a data contract for a given dataset. Requires the "Edit Data Contract" privilege for the provided dataset. + """ + upsertDataContract(urn: String, input: UpsertDataContractInput!): DataContract! +} + +extend type Dataset { + """ + An optional Data Contract defined for the Dataset. + """ + contract: DataContract +} + +""" +A Data Contract Entity. A Data Contract is a verifiable group of assertions regarding various aspects of the data: its freshness (sla), +schema, and data quality or validity. This group of assertions represents a data owner's commitment to producing data that confirms to the agreed +upon contract. Each dataset can have a single contract. The contract can be in a "passing" or "violating" state, depending +on whether the assertions that compose the contract are passing or failing. +Note that the data contract entity is currently in early preview (beta). +""" +type DataContract implements Entity { + """ + A primary key of the data contract + """ + urn: String! + + """ + The standard entity type + """ + type: EntityType! + + """ + Properties describing the data contract + """ + properties: DataContractProperties + + """ + The status of the data contract + """ + status: DataContractStatus + + """ + List of relationships between the source Entity and some destination entities with a given types + """ + relationships(input: RelationshipsInput!): EntityRelationshipsResult +} + +type DataContractProperties { + """ + The urn of the related entity, e.g. the Dataset today. In the future, we may support additional contract entities. + """ + entityUrn: String! + + """ + The Freshness (SLA) portion of the contract. + As of today, it is expected that there will not be more than 1 Freshness contract. If there are, only the first will be displayed. + """ + freshness: [FreshnessContract!] + + """ + The schema / structural portion of the contract. + As of today, it is expected that there will not be more than 1 Schema contract. If there are, only the first will be displayed. + """ + schema: [SchemaContract!] + + """ + A set of data quality related contracts, e.g. table and column-level contract constraints. + """ + dataQuality: [DataQualityContract!] +} + +""" +The state of the data contract +""" +enum DataContractState { + """ + The data contract is active. + """ + ACTIVE + + """ + The data contract is pending. Note that this symbol is currently experimental. + """ + PENDING +} + +type DataContractStatus { + """ + The state of the data contract + """ + state: DataContractState! +} + +type DataQualityContract { + """ + The assertion representing the schema contract. + """ + assertion: Assertion! +} + +type SchemaContract { + """ + The assertion representing the schema contract. + """ + assertion: Assertion! +} + +type FreshnessContract { + """ + The assertion representing the Freshness contract. + """ + assertion: Assertion! +} + +""" +Input required to upsert a Data Contract entity for an asset +""" +input UpsertDataContractInput { + """ + The urn of the related entity. Dataset is the only entity type supported today. + """ + entityUrn: String! + + """ + The Freshness / Freshness portion of the contract. If not provided, this will be set to none. + For Dataset Contracts, it is expected that there will not be more than 1 Freshness contract. If there are, only the first will be displayed. + """ + freshness: [FreshnessContractInput!] + + """ + The schema / structural portion of the contract. If not provided, this will be set to none. + For Dataset Contracts, it is expected that there will not be more than 1 Schema contract. If there are, only the first will be displayed. + """ + schema: [SchemaContractInput!] + + """ + The data quality portion of the contract. If not provided, this will be set to none. + """ + dataQuality: [DataQualityContractInput!] + + """ + The state of the data contract. If not provided, it will be in ACTIVE mode by default. + """ + state: DataContractState + + """ + Optional ID of the contract you want to create. Only applicable if this is a create operation. If not provided, a random + id will be generated for you. + """ + id: String +} + +""" +Input required to create an Freshness contract +""" +input FreshnessContractInput { + """ + The assertion monitoring this part of the data contract. Assertion must be of type Freshness. + """ + assertionUrn: String! +} + +""" +Input required to create a schema contract +""" +input SchemaContractInput { + """ + The assertion monitoring this part of the data contract. Assertion must be of type Data Schema. + """ + assertionUrn: String! +} + +""" +Input required to create a data quality contract +""" +input DataQualityContractInput { + """ + The assertion monitoring this part of the data contract. Assertion must be of type Dataset, Volume, Field / Column, or Custom SQL. + """ + assertionUrn: String! +} + diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 92d4a1723c0b61..98d47e2cd46266 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -1158,6 +1158,11 @@ enum EntityType { """ ROLE + """ + A data contract + """ + DATA_CONTRACT + """" An structured property on entities """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/DataContractUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/DataContractUtilsTest.java new file mode 100644 index 00000000000000..18ede7c306e424 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/DataContractUtilsTest.java @@ -0,0 +1,63 @@ +package com.linkedin.datahub.graphql.resolvers.datacontract; + +import static org.mockito.Mockito.mock; + +import com.datahub.authentication.Actor; +import com.datahub.authentication.ActorType; +import com.datahub.authentication.Authentication; +import com.datahub.authorization.AuthorizationRequest; +import com.datahub.authorization.AuthorizationResult; +import com.datahub.authorization.EntitySpec; +import com.datahub.plugins.auth.authorization.Authorizer; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import graphql.Assert; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.Optional; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class DataContractUtilsTest { + + @Test + public void testCanEditDataContract() { + Urn testUrn = UrnUtils.getUrn("urn:li:dataContract:test"); + boolean result = + DataContractUtils.canEditDataContract( + new QueryContext() { + @Override + public boolean isAuthenticated() { + return true; + } + + @Override + public Authentication getAuthentication() { + Authentication auth = new Authentication(new Actor(ActorType.USER, "test"), "TEST"); + return auth; + } + + @Override + public Authorizer getAuthorizer() { + Authorizer authorizer = mock(Authorizer.class); + Mockito.when(authorizer.authorize(Mockito.any(AuthorizationRequest.class))) + .thenReturn( + new AuthorizationResult( + new AuthorizationRequest( + "TEST", "test", Optional.of(new EntitySpec("dataset", "test"))), + AuthorizationResult.Type.ALLOW, + "TEST")); + return authorizer; + } + + @Override + public OperationContext getOperationContext() { + return TestOperationContexts.userContextNoSearchAuthorization( + getAuthorizer(), getAuthentication()); + } + }, + testUrn); + Assert.assertTrue(result); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/EntityDataContractResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/EntityDataContractResolverTest.java new file mode 100644 index 00000000000000..8b757a24d6566f --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/EntityDataContractResolverTest.java @@ -0,0 +1,206 @@ +package com.linkedin.datahub.graphql.resolvers.datacontract; + +import static com.linkedin.datahub.graphql.resolvers.datacontract.EntityDataContractResolver.*; +import static org.mockito.ArgumentMatchers.nullable; +import static org.testng.Assert.*; + +import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.EntityRelationship; +import com.linkedin.common.EntityRelationshipArray; +import com.linkedin.common.EntityRelationships; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datacontract.DataContractProperties; +import com.linkedin.datacontract.DataContractState; +import com.linkedin.datacontract.DataContractStatus; +import com.linkedin.datacontract.DataQualityContract; +import com.linkedin.datacontract.DataQualityContractArray; +import com.linkedin.datacontract.FreshnessContract; +import com.linkedin.datacontract.FreshnessContractArray; +import com.linkedin.datacontract.SchemaContract; +import com.linkedin.datacontract.SchemaContractArray; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataContract; +import com.linkedin.datahub.graphql.generated.Dataset; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.graph.GraphClient; +import com.linkedin.metadata.key.DataContractKey; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class EntityDataContractResolverTest { + + private static final Urn TEST_DATASET_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:snowflake,test,PROD)"); + private static final Urn TEST_DATA_CONTRACT_URN = UrnUtils.getUrn("urn:li:dataContract:test"); + private static final Urn TEST_QUALITY_ASSERTION_URN = UrnUtils.getUrn("urn:li:assertion:quality"); + private static final Urn TEST_FRESHNESS_ASSERTION_URN = + UrnUtils.getUrn("urn:li:assertion:freshness"); + private static final Urn TEST_SCHEMA_ASSERTION_URN = UrnUtils.getUrn("urn:li:assertion:schema"); + + @Test + public void testGetSuccessOneContract() throws Exception { + GraphClient mockGraphClient = Mockito.mock(GraphClient.class); + EntityClient mockClient = Mockito.mock(EntityClient.class); + + Mockito.when( + mockGraphClient.getRelatedEntities( + Mockito.eq(TEST_DATASET_URN.toString()), + Mockito.eq(ImmutableList.of(CONTRACT_FOR_RELATIONSHIP)), + Mockito.eq(RelationshipDirection.INCOMING), + Mockito.eq(0), + Mockito.eq(1), + Mockito.anyString())) + .thenReturn( + new EntityRelationships() + .setTotal(1) + .setCount(1) + .setStart(0) + .setRelationships( + new EntityRelationshipArray( + ImmutableList.of( + new EntityRelationship() + .setType(CONTRACT_FOR_RELATIONSHIP) + .setEntity(TEST_DATA_CONTRACT_URN) + .setCreated( + new AuditStamp() + .setActor(UrnUtils.getUrn("urn:li:corpuser:test")) + .setTime(0L)))))); + + Map dataContractAspects = new HashMap<>(); + + // 1. Key Aspect + dataContractAspects.put( + Constants.DATA_CONTRACT_KEY_ASPECT_NAME, + new com.linkedin.entity.EnvelopedAspect() + .setValue(new Aspect(new DataContractKey().setId("test").data()))); + + // 2. Properties Aspect. + DataContractProperties expectedProperties = + new DataContractProperties() + .setEntity(TEST_DATASET_URN) + .setDataQuality( + new DataQualityContractArray( + ImmutableList.of( + new DataQualityContract().setAssertion(TEST_QUALITY_ASSERTION_URN)))) + .setFreshness( + new FreshnessContractArray( + ImmutableList.of( + new FreshnessContract().setAssertion(TEST_FRESHNESS_ASSERTION_URN)))) + .setSchema( + new SchemaContractArray( + ImmutableList.of( + new SchemaContract().setAssertion(TEST_SCHEMA_ASSERTION_URN)))); + + dataContractAspects.put( + Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME, + new com.linkedin.entity.EnvelopedAspect().setValue(new Aspect(expectedProperties.data()))); + + // 3. Status Aspect + DataContractStatus expectedStatus = new DataContractStatus().setState(DataContractState.ACTIVE); + + dataContractAspects.put( + Constants.DATA_CONTRACT_STATUS_ASPECT_NAME, + new com.linkedin.entity.EnvelopedAspect().setValue(new Aspect(expectedStatus.data()))); + + Mockito.when( + mockClient.getV2( + nullable(OperationContext.class), + Mockito.eq(Constants.DATA_CONTRACT_ENTITY_NAME), + Mockito.eq(TEST_DATA_CONTRACT_URN), + Mockito.eq(null))) + .thenReturn( + new EntityResponse() + .setEntityName(Constants.DATA_CONTRACT_ENTITY_NAME) + .setUrn(TEST_DATA_CONTRACT_URN) + .setAspects(new EnvelopedAspectMap(dataContractAspects))); + + // Execute resolver + QueryContext mockContext = Mockito.mock(QueryContext.class); + Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); + Mockito.when(mockContext.getActorUrn()).thenReturn("urn:li:corpuser:test"); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Dataset parentDataset = new Dataset(); + parentDataset.setUrn(TEST_DATASET_URN.toString()); + Mockito.when(mockEnv.getSource()).thenReturn(parentDataset); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + EntityDataContractResolver resolver = + new EntityDataContractResolver(mockClient, mockGraphClient); + DataContract result = resolver.get(mockEnv).get(); + + // Assert that the result we get matches the expectations. + assertEquals(result.getUrn(), TEST_DATA_CONTRACT_URN.toString()); + assertEquals(result.getType(), EntityType.DATA_CONTRACT); + + // Verify Properties + assertEquals(result.getProperties().getDataQuality().size(), 1); + assertEquals(result.getProperties().getFreshness().size(), 1); + assertEquals(result.getProperties().getSchema().size(), 1); + assertEquals( + result.getProperties().getDataQuality().get(0).getAssertion().getUrn(), + TEST_QUALITY_ASSERTION_URN.toString()); + assertEquals( + result.getProperties().getFreshness().get(0).getAssertion().getUrn(), + TEST_FRESHNESS_ASSERTION_URN.toString()); + assertEquals( + result.getProperties().getSchema().get(0).getAssertion().getUrn(), + TEST_SCHEMA_ASSERTION_URN.toString()); + + // Verify Status + assertEquals(result.getStatus().getState().toString(), expectedStatus.getState().toString()); + } + + @Test + public void testGetSuccessNoContracts() throws Exception { + GraphClient mockGraphClient = Mockito.mock(GraphClient.class); + EntityClient mockClient = Mockito.mock(EntityClient.class); + + Mockito.when( + mockGraphClient.getRelatedEntities( + Mockito.eq(TEST_DATASET_URN.toString()), + Mockito.eq(ImmutableList.of(CONTRACT_FOR_RELATIONSHIP)), + Mockito.eq(RelationshipDirection.INCOMING), + Mockito.eq(0), + Mockito.eq(1), + Mockito.anyString())) + .thenReturn( + new EntityRelationships() + .setTotal(0) + .setCount(0) + .setStart(0) + .setRelationships(new EntityRelationshipArray(Collections.emptyList()))); + + EntityDataContractResolver resolver = + new EntityDataContractResolver(mockClient, mockGraphClient); + + // Execute resolver + QueryContext mockContext = Mockito.mock(QueryContext.class); + Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); + Mockito.when(mockContext.getActorUrn()).thenReturn("urn:li:corpuser:test"); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Dataset parentDataset = new Dataset(); + parentDataset.setUrn(TEST_DATASET_URN.toString()); + Mockito.when(mockEnv.getSource()).thenReturn(parentDataset); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + DataContract result = resolver.get(mockEnv).get(); + + assertNull(result); + Mockito.verifyNoMoreInteractions(mockClient); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/UpsertDataContractResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/UpsertDataContractResolverTest.java new file mode 100644 index 00000000000000..601fc56b251495 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/datacontract/UpsertDataContractResolverTest.java @@ -0,0 +1,379 @@ +package com.linkedin.datahub.graphql.resolvers.datacontract; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.datahub.graphql.resolvers.datacontract.EntityDataContractResolver.*; +import static org.mockito.ArgumentMatchers.any; +import static org.testng.Assert.*; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.EntityRelationship; +import com.linkedin.common.EntityRelationshipArray; +import com.linkedin.common.EntityRelationships; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.StringMap; +import com.linkedin.datacontract.DataContractProperties; +import com.linkedin.datacontract.DataContractStatus; +import com.linkedin.datacontract.DataQualityContract; +import com.linkedin.datacontract.DataQualityContractArray; +import com.linkedin.datacontract.FreshnessContract; +import com.linkedin.datacontract.FreshnessContractArray; +import com.linkedin.datacontract.SchemaContract; +import com.linkedin.datacontract.SchemaContractArray; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataContract; +import com.linkedin.datahub.graphql.generated.DataContractState; +import com.linkedin.datahub.graphql.generated.DataQualityContractInput; +import com.linkedin.datahub.graphql.generated.FreshnessContractInput; +import com.linkedin.datahub.graphql.generated.SchemaContractInput; +import com.linkedin.datahub.graphql.generated.UpsertDataContractInput; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.AspectType; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.graph.GraphClient; +import com.linkedin.metadata.key.DataContractKey; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.r2.RemoteInvocationException; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import java.util.Collections; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class UpsertDataContractResolverTest { + + private static final Urn TEST_CONTRACT_URN = UrnUtils.getUrn("urn:li:dataContract:test-id"); + private static final Urn TEST_DATASET_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:snowflake,test,PROD)"); + private static final Urn TEST_FRESHNESS_ASSERTION_URN = + UrnUtils.getUrn("urn:li:assertion:freshness"); + private static final Urn TEST_SCHEMA_ASSERTION_URN = UrnUtils.getUrn("urn:li:assertion:schema"); + private static final Urn TEST_QUALITY_ASSERTION_URN = UrnUtils.getUrn("urn:li:assertion:quality"); + + private static final UpsertDataContractInput TEST_CREATE_INPUT = + new UpsertDataContractInput( + TEST_DATASET_URN.toString(), + ImmutableList.of(new FreshnessContractInput(TEST_FRESHNESS_ASSERTION_URN.toString())), + ImmutableList.of(new SchemaContractInput(TEST_SCHEMA_ASSERTION_URN.toString())), + ImmutableList.of(new DataQualityContractInput(TEST_QUALITY_ASSERTION_URN.toString())), + DataContractState.PENDING, + "test-id"); + + private static final UpsertDataContractInput TEST_VALID_UPDATE_INPUT = + new UpsertDataContractInput( + TEST_DATASET_URN.toString(), + ImmutableList.of(new FreshnessContractInput(TEST_FRESHNESS_ASSERTION_URN.toString())), + ImmutableList.of(new SchemaContractInput(TEST_SCHEMA_ASSERTION_URN.toString())), + ImmutableList.of(new DataQualityContractInput(TEST_QUALITY_ASSERTION_URN.toString())), + DataContractState.ACTIVE, + null); + + private static final Urn TEST_ACTOR_URN = UrnUtils.getUrn("urn:li:corpuser:test"); + + @Test + public void testGetSuccessCreate() throws Exception { + + // Expected results + final DataContractKey key = new DataContractKey(); + key.setId("test-id"); + final Urn dataContractUrn = + EntityKeyUtils.convertEntityKeyToUrn(key, Constants.DATA_CONTRACT_ENTITY_NAME); + + final DataContractStatus status = new DataContractStatus(); + status.setState(com.linkedin.datacontract.DataContractState.PENDING); + + final DataContractProperties props = new DataContractProperties(); + props.setEntity(TEST_DATASET_URN); + props.setDataQuality( + new DataQualityContractArray( + ImmutableList.of(new DataQualityContract().setAssertion(TEST_QUALITY_ASSERTION_URN)))); + props.setFreshness( + new FreshnessContractArray( + ImmutableList.of(new FreshnessContract().setAssertion(TEST_FRESHNESS_ASSERTION_URN)))); + props.setSchema( + new SchemaContractArray( + ImmutableList.of(new SchemaContract().setAssertion(TEST_SCHEMA_ASSERTION_URN)))); + + // Create resolver + EntityClient mockClient = Mockito.mock(EntityClient.class); + GraphClient mockGraphClient = Mockito.mock(GraphClient.class); + initMockGraphClient(mockGraphClient, null); + initMockEntityClient(mockClient, null, props); // No existing contract + UpsertDataContractResolver resolver = + new UpsertDataContractResolver(mockClient, mockGraphClient); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_CREATE_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + DataContract result = resolver.get(mockEnv).get(); + + final MetadataChangeProposal propertiesProposal = new MetadataChangeProposal(); + propertiesProposal.setEntityUrn(dataContractUrn); + propertiesProposal.setEntityType(Constants.DATA_CONTRACT_ENTITY_NAME); + propertiesProposal.setSystemMetadata( + new SystemMetadata().setProperties(new StringMap(ImmutableMap.of("appSource", "ui")))); + propertiesProposal.setAspectName(Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME); + propertiesProposal.setAspect(GenericRecordUtils.serializeAspect(props)); + propertiesProposal.setChangeType(ChangeType.UPSERT); + + final MetadataChangeProposal statusProposal = new MetadataChangeProposal(); + statusProposal.setEntityUrn(dataContractUrn); + statusProposal.setEntityType(Constants.DATA_CONTRACT_ENTITY_NAME); + statusProposal.setSystemMetadata( + new SystemMetadata().setProperties(new StringMap(ImmutableMap.of("appSource", "ui")))); + statusProposal.setAspectName(Constants.DATA_CONTRACT_STATUS_ASPECT_NAME); + statusProposal.setAspect(GenericRecordUtils.serializeAspect(status)); + statusProposal.setChangeType(ChangeType.UPSERT); + + Mockito.verify(mockClient, Mockito.times(1)) + .batchIngestProposals( + any(OperationContext.class), + Mockito.eq(ImmutableList.of(propertiesProposal, statusProposal)), + Mockito.eq(false)); + + Assert.assertEquals(result.getUrn(), TEST_CONTRACT_URN.toString()); + } + + @Test + public void testGetSuccessUpdate() throws Exception { + + DataContractProperties props = new DataContractProperties(); + props.setEntity(TEST_DATASET_URN); + props.setDataQuality( + new DataQualityContractArray( + ImmutableList.of(new DataQualityContract().setAssertion(TEST_QUALITY_ASSERTION_URN)))); + props.setFreshness( + new FreshnessContractArray( + ImmutableList.of(new FreshnessContract().setAssertion(TEST_FRESHNESS_ASSERTION_URN)))); + props.setSchema( + new SchemaContractArray( + ImmutableList.of(new SchemaContract().setAssertion(TEST_SCHEMA_ASSERTION_URN)))); + + DataContractStatus status = new DataContractStatus(); + status.setState(com.linkedin.datacontract.DataContractState.ACTIVE); + + // Update resolver + EntityClient mockClient = Mockito.mock(EntityClient.class); + GraphClient mockGraphClient = Mockito.mock(GraphClient.class); + initMockGraphClient(mockGraphClient, TEST_CONTRACT_URN); + initMockEntityClient(mockClient, TEST_CONTRACT_URN, props); // Contract Exists + UpsertDataContractResolver resolver = + new UpsertDataContractResolver(mockClient, mockGraphClient); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_VALID_UPDATE_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + DataContract result = resolver.get(mockEnv).get(); + + final MetadataChangeProposal propertiesProposal = new MetadataChangeProposal(); + propertiesProposal.setEntityUrn(TEST_CONTRACT_URN); + propertiesProposal.setEntityType(Constants.DATA_CONTRACT_ENTITY_NAME); + propertiesProposal.setSystemMetadata( + new SystemMetadata().setProperties(new StringMap(ImmutableMap.of("appSource", "ui")))); + propertiesProposal.setAspectName(Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME); + propertiesProposal.setAspect(GenericRecordUtils.serializeAspect(props)); + propertiesProposal.setChangeType(ChangeType.UPSERT); + + final MetadataChangeProposal statusProposal = new MetadataChangeProposal(); + statusProposal.setEntityUrn(TEST_CONTRACT_URN); + statusProposal.setEntityType(Constants.DATA_CONTRACT_ENTITY_NAME); + statusProposal.setSystemMetadata( + new SystemMetadata().setProperties(new StringMap(ImmutableMap.of("appSource", "ui")))); + statusProposal.setAspectName(Constants.DATA_CONTRACT_STATUS_ASPECT_NAME); + statusProposal.setAspect(GenericRecordUtils.serializeAspect(status)); + statusProposal.setChangeType(ChangeType.UPSERT); + + Mockito.verify(mockClient, Mockito.times(1)) + .batchIngestProposals( + any(OperationContext.class), + Mockito.eq(ImmutableList.of(propertiesProposal, statusProposal)), + Mockito.eq(false)); + + Assert.assertEquals(result.getUrn(), TEST_CONTRACT_URN.toString()); + } + + @Test + public void testGetFailureEntityDoesNotExist() throws Exception { + // Update resolver + EntityClient mockClient = Mockito.mock(EntityClient.class); + GraphClient mockGraphClient = Mockito.mock(GraphClient.class); + initMockGraphClient(mockGraphClient, TEST_CONTRACT_URN); + Mockito.when(mockClient.exists(any(OperationContext.class), Mockito.eq(TEST_DATASET_URN))) + .thenReturn(false); + UpsertDataContractResolver resolver = + new UpsertDataContractResolver(mockClient, mockGraphClient); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_CREATE_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + Assert.assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } + + @Test + public void testGetFailureAssertionDoesNotExist() throws Exception { + // Update resolver + EntityClient mockClient = Mockito.mock(EntityClient.class); + GraphClient mockGraphClient = Mockito.mock(GraphClient.class); + initMockGraphClient(mockGraphClient, TEST_CONTRACT_URN); + Mockito.when(mockClient.exists(any(OperationContext.class), Mockito.eq(TEST_DATASET_URN))) + .thenReturn(true); + Mockito.when( + mockClient.exists( + any(OperationContext.class), Mockito.eq(TEST_FRESHNESS_ASSERTION_URN))) + .thenReturn(false); + Mockito.when( + mockClient.exists(any(OperationContext.class), Mockito.eq(TEST_QUALITY_ASSERTION_URN))) + .thenReturn(false); + Mockito.when( + mockClient.exists(any(OperationContext.class), Mockito.eq(TEST_SCHEMA_ASSERTION_URN))) + .thenReturn(false); + UpsertDataContractResolver resolver = + new UpsertDataContractResolver(mockClient, mockGraphClient); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_CREATE_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + Assert.assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } + + @Test + public void testGetUnauthorized() throws Exception { + // Create resolver + EntityClient mockClient = Mockito.mock(EntityClient.class); + GraphClient mockGraphClient = Mockito.mock(GraphClient.class); + UpsertDataContractResolver resolver = + new UpsertDataContractResolver(mockClient, mockGraphClient); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + QueryContext mockContext = getMockDenyContext(); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_CREATE_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + Mockito.verify(mockClient, Mockito.times(0)) + .ingestProposal(any(OperationContext.class), Mockito.any()); + } + + @Test + public void testGetEntityClientException() throws Exception { + // Create resolver + EntityClient mockClient = Mockito.mock(EntityClient.class); + GraphClient mockGraphClient = Mockito.mock(GraphClient.class); + Mockito.doThrow(RemoteInvocationException.class) + .when(mockClient) + .ingestProposal(any(OperationContext.class), Mockito.any(), Mockito.eq(false)); + UpsertDataContractResolver resolver = + new UpsertDataContractResolver(mockClient, mockGraphClient); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + QueryContext mockContext = getMockAllowContext(); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(TEST_CREATE_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } + + private void initMockGraphClient(GraphClient client, Urn existingContractUrn) { + if (existingContractUrn != null) { + Mockito.when( + client.getRelatedEntities( + Mockito.eq(TEST_DATASET_URN.toString()), + Mockito.eq(ImmutableList.of(CONTRACT_FOR_RELATIONSHIP)), + Mockito.eq(RelationshipDirection.INCOMING), + Mockito.eq(0), + Mockito.eq(1), + Mockito.anyString())) + .thenReturn( + new EntityRelationships() + .setTotal(1) + .setCount(1) + .setStart(0) + .setRelationships( + new EntityRelationshipArray( + ImmutableList.of( + new EntityRelationship() + .setEntity(existingContractUrn) + .setType(CONTRACT_FOR_RELATIONSHIP) + .setCreated( + new AuditStamp().setActor(TEST_ACTOR_URN).setTime(0L)))))); + } else { + Mockito.when( + client.getRelatedEntities( + Mockito.eq(TEST_DATASET_URN.toString()), + Mockito.eq(ImmutableList.of(CONTRACT_FOR_RELATIONSHIP)), + Mockito.eq(RelationshipDirection.INCOMING), + Mockito.eq(0), + Mockito.eq(1), + Mockito.anyString())) + .thenReturn( + new EntityRelationships() + .setTotal(0) + .setCount(0) + .setStart(0) + .setRelationships(new EntityRelationshipArray(Collections.emptyList()))); + } + } + + private void initMockEntityClient( + EntityClient client, Urn existingContractUrn, DataContractProperties newContractProperties) + throws Exception { + if (existingContractUrn != null) { + Mockito.when(client.exists(any(OperationContext.class), Mockito.eq(existingContractUrn))) + .thenReturn(true); + } + Mockito.when(client.exists(any(OperationContext.class), Mockito.eq(TEST_DATASET_URN))) + .thenReturn(true); + Mockito.when(client.exists(any(OperationContext.class), Mockito.eq(TEST_QUALITY_ASSERTION_URN))) + .thenReturn(true); + Mockito.when( + client.exists(any(OperationContext.class), Mockito.eq(TEST_FRESHNESS_ASSERTION_URN))) + .thenReturn(true); + Mockito.when(client.exists(any(OperationContext.class), Mockito.eq(TEST_SCHEMA_ASSERTION_URN))) + .thenReturn(true); + + Mockito.when( + client.getV2( + any(OperationContext.class), + Mockito.eq(Constants.DATA_CONTRACT_ENTITY_NAME), + Mockito.eq(TEST_CONTRACT_URN), + Mockito.eq(null))) + .thenReturn( + new EntityResponse() + .setUrn(TEST_CONTRACT_URN) + .setAspects( + new EnvelopedAspectMap( + ImmutableMap.of( + Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME, + new EnvelopedAspect() + .setType(AspectType.VERSIONED) + .setName(Constants.DATA_CONTRACT_ENTITY_NAME) + .setValue(new Aspect(newContractProperties.data())))))); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/datacontract/DataContractMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/datacontract/DataContractMapperTest.java new file mode 100644 index 00000000000000..49f5a985ea4a3c --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/datacontract/DataContractMapperTest.java @@ -0,0 +1,180 @@ +package com.linkedin.datahub.graphql.types.datacontract; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.StringMap; +import com.linkedin.datacontract.DataContractProperties; +import com.linkedin.datacontract.DataContractState; +import com.linkedin.datacontract.DataContractStatus; +import com.linkedin.datacontract.DataQualityContract; +import com.linkedin.datacontract.DataQualityContractArray; +import com.linkedin.datacontract.FreshnessContract; +import com.linkedin.datacontract.FreshnessContractArray; +import com.linkedin.datacontract.SchemaContract; +import com.linkedin.datacontract.SchemaContractArray; +import com.linkedin.datahub.graphql.generated.DataContract; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.metadata.Constants; +import java.util.Collections; +import org.testng.annotations.Test; + +public class DataContractMapperTest { + + @Test + public void testMapAllFields() throws Exception { + EntityResponse entityResponse = new EntityResponse(); + Urn urn = Urn.createFromString("urn:li:dataContract:1"); + Urn dataQualityAssertionUrn = Urn.createFromString("urn:li:assertion:quality"); + Urn dataQualityAssertionUrn2 = Urn.createFromString("urn:li:assertion:quality2"); + + Urn freshnessAssertionUrn = Urn.createFromString("urn:li:assertion:freshness"); + Urn schemaAssertionUrn = Urn.createFromString("urn:li:assertion:schema"); + Urn datasetUrn = + Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:snowflake,test,PROD)"); + entityResponse.setUrn(urn); + + EnvelopedAspect envelopedDataContractProperties = new EnvelopedAspect(); + DataContractProperties dataContractProperties = new DataContractProperties(); + dataContractProperties.setDataQuality( + new DataQualityContractArray( + ImmutableList.of( + new DataQualityContract().setAssertion(dataQualityAssertionUrn), + new DataQualityContract().setAssertion(dataQualityAssertionUrn2)))); + dataContractProperties.setFreshness( + new FreshnessContractArray( + ImmutableList.of(new FreshnessContract().setAssertion(freshnessAssertionUrn)))); + dataContractProperties.setSchema( + new SchemaContractArray( + ImmutableList.of(new SchemaContract().setAssertion(schemaAssertionUrn)))); + + dataContractProperties.setEntity(datasetUrn); + + envelopedDataContractProperties.setValue(new Aspect(dataContractProperties.data())); + + EnvelopedAspect envelopedDataContractStatus = new EnvelopedAspect(); + DataContractStatus status = new DataContractStatus(); + status.setState(DataContractState.PENDING); + status.setCustomProperties(new StringMap(ImmutableMap.of("key", "value"))); + + envelopedDataContractStatus.setValue(new Aspect(status.data())); + entityResponse.setAspects( + new EnvelopedAspectMap( + Collections.singletonMap( + Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME, envelopedDataContractProperties))); + + entityResponse.setAspects( + new EnvelopedAspectMap( + ImmutableMap.of( + Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME, + envelopedDataContractProperties, + Constants.DATA_CONTRACT_STATUS_ASPECT_NAME, + envelopedDataContractStatus))); + + DataContract dataContract = DataContractMapper.mapContract(entityResponse); + assertNotNull(dataContract); + assertEquals(dataContract.getUrn(), urn.toString()); + assertEquals(dataContract.getType(), EntityType.DATA_CONTRACT); + assertEquals( + dataContract.getStatus().getState(), + com.linkedin.datahub.graphql.generated.DataContractState.PENDING); + assertEquals(dataContract.getProperties().getEntityUrn(), datasetUrn.toString()); + assertEquals(dataContract.getProperties().getDataQuality().size(), 2); + assertEquals( + dataContract.getProperties().getDataQuality().get(0).getAssertion().getUrn(), + dataQualityAssertionUrn.toString()); + assertEquals( + dataContract.getProperties().getDataQuality().get(1).getAssertion().getUrn(), + dataQualityAssertionUrn2.toString()); + assertEquals(dataContract.getProperties().getFreshness().size(), 1); + assertEquals( + dataContract.getProperties().getFreshness().get(0).getAssertion().getUrn(), + freshnessAssertionUrn.toString()); + assertEquals(dataContract.getProperties().getSchema().size(), 1); + assertEquals( + dataContract.getProperties().getSchema().get(0).getAssertion().getUrn(), + schemaAssertionUrn.toString()); + } + + @Test + public void testMapRequiredFields() throws Exception { + EntityResponse entityResponse = new EntityResponse(); + Urn urn = Urn.createFromString("urn:li:dataContract:1"); + Urn datasetUrn = + Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:snowflake,test,PROD)"); + entityResponse.setUrn(urn); + + EnvelopedAspect envelopedDataContractProperties = new EnvelopedAspect(); + DataContractProperties dataContractProperties = new DataContractProperties(); + dataContractProperties.setEntity(datasetUrn); + envelopedDataContractProperties.setValue(new Aspect(dataContractProperties.data())); + + EnvelopedAspect envelopedDataContractStatus = new EnvelopedAspect(); + DataContractStatus status = new DataContractStatus(); + status.setState(DataContractState.PENDING); + status.setCustomProperties(new StringMap(ImmutableMap.of("key", "value"))); + + envelopedDataContractStatus.setValue(new Aspect(status.data())); + entityResponse.setAspects( + new EnvelopedAspectMap( + Collections.singletonMap( + Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME, envelopedDataContractProperties))); + + entityResponse.setAspects( + new EnvelopedAspectMap( + ImmutableMap.of( + Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME, + envelopedDataContractProperties, + Constants.DATA_CONTRACT_STATUS_ASPECT_NAME, + envelopedDataContractStatus))); + + DataContract dataContract = DataContractMapper.mapContract(entityResponse); + assertNotNull(dataContract); + assertEquals(dataContract.getUrn(), urn.toString()); + assertEquals(dataContract.getType(), EntityType.DATA_CONTRACT); + assertEquals( + dataContract.getStatus().getState(), + com.linkedin.datahub.graphql.generated.DataContractState.PENDING); + assertEquals(dataContract.getProperties().getEntityUrn(), datasetUrn.toString()); + assertNull(dataContract.getProperties().getDataQuality()); + assertNull(dataContract.getProperties().getSchema()); + assertNull(dataContract.getProperties().getFreshness()); + } + + @Test + public void testMapNoStatus() throws Exception { + EntityResponse entityResponse = new EntityResponse(); + Urn urn = Urn.createFromString("urn:li:dataContract:1"); + Urn datasetUrn = + Urn.createFromString("urn:li:dataset:(urn:li:dataPlatform:snowflake,test,PROD)"); + entityResponse.setUrn(urn); + + EnvelopedAspect envelopedDataContractProperties = new EnvelopedAspect(); + DataContractProperties dataContractProperties = new DataContractProperties(); + dataContractProperties.setEntity(datasetUrn); + envelopedDataContractProperties.setValue(new Aspect(dataContractProperties.data())); + + entityResponse.setAspects( + new EnvelopedAspectMap( + ImmutableMap.of( + Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME, envelopedDataContractProperties))); + + DataContract dataContract = DataContractMapper.mapContract(entityResponse); + assertNotNull(dataContract); + assertEquals(dataContract.getUrn(), urn.toString()); + assertEquals(dataContract.getType(), EntityType.DATA_CONTRACT); + assertNull(dataContract.getStatus()); + assertEquals(dataContract.getProperties().getEntityUrn(), datasetUrn.toString()); + assertNull(dataContract.getProperties().getDataQuality()); + assertNull(dataContract.getProperties().getSchema()); + assertNull(dataContract.getProperties().getFreshness()); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/datacontract/DataContractTypeTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/datacontract/DataContractTypeTest.java new file mode 100644 index 00000000000000..241775e5ab48cc --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/datacontract/DataContractTypeTest.java @@ -0,0 +1,152 @@ +package com.linkedin.datahub.graphql.types.datacontract; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.nullable; +import static org.testng.Assert.*; + +import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datacontract.DataContractProperties; +import com.linkedin.datacontract.DataContractState; +import com.linkedin.datacontract.DataContractStatus; +import com.linkedin.datacontract.DataQualityContract; +import com.linkedin.datacontract.DataQualityContractArray; +import com.linkedin.datacontract.FreshnessContract; +import com.linkedin.datacontract.FreshnessContractArray; +import com.linkedin.datacontract.SchemaContract; +import com.linkedin.datacontract.SchemaContractArray; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataContract; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.key.DataContractKey; +import com.linkedin.r2.RemoteInvocationException; +import graphql.execution.DataFetcherResult; +import io.datahubproject.metadata.context.OperationContext; +import java.util.HashSet; +import java.util.List; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class DataContractTypeTest { + + private static final Urn TEST_DATASET_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:snowflake,test,PROD)"); + private static final Urn DATA_QUALITY_ASSERTION_URN = UrnUtils.getUrn("urn:li:assertion:quality"); + private static final Urn FRESHNESS_ASSERTION_URN = UrnUtils.getUrn("urn:li:assertion:freshness"); + private static final Urn SCHEMA_ASSERTION_URN = UrnUtils.getUrn("urn:li:assertion:schema"); + private static final String TEST_DATA_CONTRACT_1_URN = "urn:li:dataContract:id-1"; + private static final DataContractKey TEST_DATA_CONTRACT_1_KEY = + new DataContractKey().setId("id-1"); + private static final DataContractProperties TEST_DATA_CONTRACT_1_PROPERTIES = + new DataContractProperties() + .setEntity(TEST_DATASET_URN) + .setDataQuality( + new DataQualityContractArray( + ImmutableList.of( + new DataQualityContract().setAssertion(DATA_QUALITY_ASSERTION_URN)))) + .setFreshness( + new FreshnessContractArray( + ImmutableList.of(new FreshnessContract().setAssertion(FRESHNESS_ASSERTION_URN)))) + .setSchema( + new SchemaContractArray( + ImmutableList.of(new SchemaContract().setAssertion(SCHEMA_ASSERTION_URN)))); + private static final DataContractStatus TEST_DATA_CONTRACT_1_STATUS = + new DataContractStatus().setState(DataContractState.ACTIVE); + + private static final String TEST_DATA_CONTRACT_2_URN = "urn:li:dataContract:id-2"; + + @Test + public void testBatchLoad() throws Exception { + + EntityClient client = Mockito.mock(EntityClient.class); + + Urn dataContractUrn1 = Urn.createFromString(TEST_DATA_CONTRACT_1_URN); + Urn dataContractUrn2 = Urn.createFromString(TEST_DATA_CONTRACT_2_URN); + + Mockito.when( + client.batchGetV2( + any(OperationContext.class), + Mockito.eq(Constants.DATA_CONTRACT_ENTITY_NAME), + Mockito.eq(new HashSet<>(ImmutableSet.of(dataContractUrn1, dataContractUrn2))), + Mockito.eq(DataContractType.ASPECTS_TO_FETCH))) + .thenReturn( + ImmutableMap.of( + dataContractUrn1, + new EntityResponse() + .setEntityName(Constants.DATA_CONTRACT_ENTITY_NAME) + .setUrn(dataContractUrn1) + .setAspects( + new EnvelopedAspectMap( + ImmutableMap.of( + Constants.DATA_CONTRACT_KEY_ASPECT_NAME, + new EnvelopedAspect() + .setValue(new Aspect(TEST_DATA_CONTRACT_1_KEY.data())), + Constants.DATA_CONTRACT_PROPERTIES_ASPECT_NAME, + new EnvelopedAspect() + .setValue(new Aspect(TEST_DATA_CONTRACT_1_PROPERTIES.data())), + Constants.DATA_CONTRACT_STATUS_ASPECT_NAME, + new EnvelopedAspect() + .setValue(new Aspect(TEST_DATA_CONTRACT_1_STATUS.data()))))))); + + DataContractType type = new DataContractType(client); + + QueryContext mockContext = getMockAllowContext(); + List> result = + type.batchLoad( + ImmutableList.of(TEST_DATA_CONTRACT_1_URN, TEST_DATA_CONTRACT_2_URN), mockContext); + + // Verify response + Mockito.verify(client, Mockito.times(1)) + .batchGetV2( + any(OperationContext.class), + Mockito.eq(Constants.DATA_CONTRACT_ENTITY_NAME), + Mockito.eq(ImmutableSet.of(dataContractUrn1, dataContractUrn2)), + Mockito.eq(DataContractType.ASPECTS_TO_FETCH)); + + assertEquals(result.size(), 2); + + DataContract dataContract1 = result.get(0).getData(); + assertEquals(dataContract1.getUrn(), TEST_DATA_CONTRACT_1_URN); + assertEquals(dataContract1.getType(), EntityType.DATA_CONTRACT); + assertEquals(dataContract1.getProperties().getEntityUrn(), TEST_DATASET_URN.toString()); + assertEquals(dataContract1.getProperties().getDataQuality().size(), 1); + assertEquals(dataContract1.getProperties().getSchema().size(), 1); + assertEquals(dataContract1.getProperties().getFreshness().size(), 1); + + // Assert second element is null. + assertNull(result.get(1)); + } + + @Test + public void testBatchLoadClientException() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + Mockito.doThrow(RemoteInvocationException.class) + .when(mockClient) + .batchGetV2( + nullable(OperationContext.class), + Mockito.anyString(), + Mockito.anySet(), + Mockito.anySet()); + DataContractType type = new DataContractType(mockClient); + + // Execute Batch load + QueryContext context = Mockito.mock(QueryContext.class); + Mockito.when(context.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); + assertThrows( + RuntimeException.class, + () -> + type.batchLoad( + ImmutableList.of(TEST_DATA_CONTRACT_1_URN, TEST_DATA_CONTRACT_2_URN), context)); + } +} diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 79ae0fbeacd940..752bf44cf43549 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -363,6 +363,12 @@ public class Constants { public static final String DATAHUB_CONNECTION_ENTITY_NAME = "dataHubConnection"; public static final String DATAHUB_CONNECTION_DETAILS_ASPECT_NAME = "dataHubConnectionDetails"; + // Data Contracts + public static final String DATA_CONTRACT_ENTITY_NAME = "dataContract"; + public static final String DATA_CONTRACT_PROPERTIES_ASPECT_NAME = "dataContractProperties"; + public static final String DATA_CONTRACT_KEY_ASPECT_NAME = "dataContractKey"; + public static final String DATA_CONTRACT_STATUS_ASPECT_NAME = "dataContractStatus"; + // Relationships public static final String IS_MEMBER_OF_GROUP_RELATIONSHIP_NAME = "IsMemberOfGroup"; public static final String IS_MEMBER_OF_NATIVE_GROUP_RELATIONSHIP_NAME = "IsMemberOfNativeGroup"; diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index 06ac8b6f307161..24fa4ec080cfa0 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -347,6 +347,12 @@ public class PoliciesConfig { "Edit Dataset Queries", "The ability to edit the Queries for a Dataset."); + public static final Privilege EDIT_ENTITY_DATA_CONTRACT_PRIVILEGE = + Privilege.of( + "EDIT_ENTITY_DATA_CONTRACT", + "Edit Data Contract", + "The ability to edit the Data Contract for an entity."); + // Tag Privileges public static final Privilege EDIT_TAG_COLOR_PRIVILEGE = Privilege.of("EDIT_TAG_COLOR", "Edit Tag Color", "The ability to change the color of a Tag."); From 1b6763fdf7a181607729d78f3745c528a8e522a9 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Fri, 7 Jun 2024 13:37:49 -0500 Subject: [PATCH 3/5] fix(openapiv3): v3 scroll response fix (#10654) Co-authored-by: Kevin Chun --- .../openapi-analytics-servlet/build.gradle | 1 + .../openapi-entity-servlet/build.gradle | 1 + .../openapi-servlet/models/build.gradle | 8 + .../controller/GenericEntitiesController.java | 641 ++++++++++++++++ .../exception/UnauthorizedException.java | 0 .../openapi/models/GenericEntity.java | 7 + .../models/GenericEntityScrollResult.java | 3 + .../{v2 => }/models/GenericScrollResult.java | 2 +- .../v2/models/BatchGetUrnResponse.java | 2 +- .../models/GenericEntityScrollResultV2.java | 15 + ...enericEntity.java => GenericEntityV2.java} | 9 +- .../models/GenericEntityScrollResultV3.java | 15 + .../openapi/v3/models/GenericEntityV3.java | 77 ++ .../v2/controller/EntityController.java | 709 +++--------------- .../v2/controller/RelationshipController.java | 2 +- .../v2/controller/TimeseriesController.java | 2 +- .../v3/controller/EntityController.java | 203 ++++- 17 files changed, 1071 insertions(+), 626 deletions(-) create mode 100644 metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java rename metadata-service/openapi-servlet/{ => models}/src/main/java/io/datahubproject/openapi/exception/UnauthorizedException.java (100%) create mode 100644 metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntity.java create mode 100644 metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntityScrollResult.java rename metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/{v2 => }/models/GenericScrollResult.java (79%) create mode 100644 metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityScrollResultV2.java rename metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/{GenericEntity.java => GenericEntityV2.java} (90%) create mode 100644 metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityScrollResultV3.java create mode 100644 metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java diff --git a/metadata-service/openapi-analytics-servlet/build.gradle b/metadata-service/openapi-analytics-servlet/build.gradle index 7c6568fa78f642..3a879cb1b0071a 100644 --- a/metadata-service/openapi-analytics-servlet/build.gradle +++ b/metadata-service/openapi-analytics-servlet/build.gradle @@ -9,6 +9,7 @@ dependencies { implementation project(':metadata-service:auth-impl') implementation project(':metadata-service:factories') implementation project(':metadata-service:openapi-servlet') + implementation project(':metadata-service:openapi-servlet:models') implementation project(':metadata-models') implementation externalDependency.springBoot diff --git a/metadata-service/openapi-entity-servlet/build.gradle b/metadata-service/openapi-entity-servlet/build.gradle index 016ac6693f55b2..4c2d587a81fd78 100644 --- a/metadata-service/openapi-entity-servlet/build.gradle +++ b/metadata-service/openapi-entity-servlet/build.gradle @@ -9,6 +9,7 @@ dependencies { implementation project(':metadata-service:auth-impl') implementation project(':metadata-service:factories') implementation project(':metadata-service:openapi-servlet') + implementation project(':metadata-service:openapi-servlet:models') implementation project(':metadata-models') implementation externalDependency.servletApi diff --git a/metadata-service/openapi-servlet/models/build.gradle b/metadata-service/openapi-servlet/models/build.gradle index e4100b2d094e04..a0e1a553fe8146 100644 --- a/metadata-service/openapi-servlet/models/build.gradle +++ b/metadata-service/openapi-servlet/models/build.gradle @@ -6,6 +6,14 @@ dependencies { implementation project(':entity-registry') implementation project(':metadata-operation-context') implementation project(':metadata-auth:auth-api') + implementation project(':metadata-service:auth-impl') + implementation project(':metadata-io') + + implementation externalDependency.springWeb + implementation(externalDependency.springDocUI) { + exclude group: 'org.springframework.boot' + } + implementation externalDependency.swaggerAnnotations implementation externalDependency.jacksonDataBind implementation externalDependency.httpClient diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java new file mode 100644 index 00000000000000..a68d87434f7aa5 --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java @@ -0,0 +1,641 @@ +package io.datahubproject.openapi.controller; + +import static com.linkedin.metadata.authorization.ApiOperation.CREATE; +import static com.linkedin.metadata.authorization.ApiOperation.DELETE; +import static com.linkedin.metadata.authorization.ApiOperation.EXISTS; +import static com.linkedin.metadata.authorization.ApiOperation.READ; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; + +import com.datahub.authentication.Actor; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authorization.AuthUtil; +import com.datahub.authorization.AuthorizerChain; +import com.datahub.util.RecordUtils; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.ByteString; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.patch.GenericJsonPatch; +import com.linkedin.metadata.aspect.patch.template.common.GenericPatchTemplate; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.UpdateAspectResult; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchService; +import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.metadata.utils.SearchUtil; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; +import io.datahubproject.openapi.exception.UnauthorizedException; +import io.datahubproject.openapi.models.GenericEntity; +import io.datahubproject.openapi.models.GenericEntityScrollResult; +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import java.lang.reflect.InvocationTargetException; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PatchMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; +import org.springframework.web.bind.annotation.RequestParam; + +public abstract class GenericEntitiesController< + E extends GenericEntity, S extends GenericEntityScrollResult> { + protected static final SearchFlags DEFAULT_SEARCH_FLAGS = + new SearchFlags().setFulltext(false).setSkipAggregates(true).setSkipHighlighting(true); + + @Autowired protected EntityRegistry entityRegistry; + @Autowired protected SearchService searchService; + @Autowired protected EntityService entityService; + @Autowired protected AuthorizerChain authorizationChain; + @Autowired protected ObjectMapper objectMapper; + + @Qualifier("systemOperationContext") + @Autowired + protected OperationContext systemOperationContext; + + /** + * Returns scroll result entities + * + * @param searchEntities the entities to contain in the result + * @param aspectNames the aspect names present + * @param withSystemMetadata whether to include system metadata in the result + * @param scrollId the pagination token + * @return result containing entities/aspects + * @throws URISyntaxException parsing error + */ + protected abstract S buildScrollResult( + @Nonnull OperationContext opContext, + SearchEntityArray searchEntities, + Set aspectNames, + boolean withSystemMetadata, + @Nullable String scrollId) + throws URISyntaxException; + + protected abstract List buildEntityList( + @Nonnull OperationContext opContext, + List urns, + Set aspectNames, + boolean withSystemMetadata) + throws URISyntaxException; + + protected abstract List buildEntityList( + Set ingestResults, boolean withSystemMetadata); + + protected abstract E buildGenericEntity( + @Nonnull String aspectName, + @Nonnull UpdateAspectResult updateAspectResult, + boolean withSystemMetadata); + + protected abstract AspectsBatch toMCPBatch( + @Nonnull OperationContext opContext, String entityArrayList, Actor actor) + throws JsonProcessingException, URISyntaxException; + + @Tag(name = "Generic Entities", description = "API for interacting with generic entities.") + @GetMapping(value = "/{entityName}", produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Scroll entities") + public ResponseEntity getEntities( + @PathVariable("entityName") String entityName, + @RequestParam(value = "aspectNames", defaultValue = "") Set aspectNames, + @RequestParam(value = "count", defaultValue = "10") Integer count, + @RequestParam(value = "query", defaultValue = "*") String query, + @RequestParam(value = "scrollId", required = false) String scrollId, + @RequestParam(value = "sort", required = false, defaultValue = "urn") String sortField, + @RequestParam(value = "sortOrder", required = false, defaultValue = "ASCENDING") + String sortOrder, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata, + @RequestParam(value = "skipCache", required = false, defaultValue = "false") + Boolean skipCache) + throws URISyntaxException { + + EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); + Authentication authentication = AuthenticationContext.getAuthentication(); + + if (!AuthUtil.isAPIAuthorizedEntityType(authentication, authorizationChain, READ, entityName)) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); + } + + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder().buildOpenapi("getEntities", entityName), + authorizationChain, + authentication, + true); + + // TODO: support additional and multiple sort params + SortCriterion sortCriterion = SearchUtil.sortBy(sortField, SortOrder.valueOf(sortOrder)); + + ScrollResult result = + searchService.scrollAcrossEntities( + opContext + .withSearchFlags(flags -> DEFAULT_SEARCH_FLAGS) + .withSearchFlags(flags -> flags.setSkipCache(skipCache)), + List.of(entitySpec.getName()), + query, + null, + sortCriterion, + scrollId, + null, + count); + + if (!AuthUtil.isAPIAuthorizedResult(authentication, authorizationChain, result)) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); + } + + return ResponseEntity.ok( + buildScrollResult( + opContext, + result.getEntities(), + aspectNames, + withSystemMetadata, + result.getScrollId())); + } + + @Tag(name = "Generic Entities") + @GetMapping( + value = "/{entityName}/{entityUrn:urn:li:.+}", + produces = MediaType.APPLICATION_JSON_VALUE) + public ResponseEntity getEntity( + @PathVariable("entityName") String entityName, + @PathVariable("entityUrn") String entityUrn, + @RequestParam(value = "aspectNames", defaultValue = "") Set aspectNames, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata) + throws URISyntaxException { + + Urn urn = UrnUtils.getUrn(entityUrn); + Authentication authentication = AuthenticationContext.getAuthentication(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + authentication, authorizationChain, READ, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); + } + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder().buildOpenapi("getEntity", entityName), + authorizationChain, + authentication, + true); + + return ResponseEntity.of( + buildEntityList(opContext, List.of(urn), aspectNames, withSystemMetadata).stream() + .findFirst()); + } + + @Tag(name = "Generic Entities") + @RequestMapping( + value = "/{entityName}/{entityUrn}", + method = {RequestMethod.HEAD}) + @Operation(summary = "Entity exists") + public ResponseEntity headEntity( + @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn) { + + Urn urn = UrnUtils.getUrn(entityUrn); + Authentication authentication = AuthenticationContext.getAuthentication(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + authentication, authorizationChain, EXISTS, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + EXISTS + " entities."); + } + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder().buildOpenapi("headEntity", entityName), + authorizationChain, + authentication, + true); + + return exists(opContext, urn, null) + ? ResponseEntity.noContent().build() + : ResponseEntity.notFound().build(); + } + + @Tag(name = "Generic Aspects", description = "API for generic aspects.") + @GetMapping( + value = "/{entityName}/{entityUrn}/{aspectName}", + produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Get an entity's generic aspect.") + public ResponseEntity getAspect( + @PathVariable("entityName") String entityName, + @PathVariable("entityUrn") String entityUrn, + @PathVariable("aspectName") String aspectName, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata) + throws URISyntaxException { + + Urn urn = UrnUtils.getUrn(entityUrn); + Authentication authentication = AuthenticationContext.getAuthentication(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + authentication, authorizationChain, READ, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); + } + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder().buildOpenapi("getAspect", entityName), + authorizationChain, + authentication, + true); + + return ResponseEntity.of( + buildEntityList(opContext, List.of(urn), Set.of(aspectName), withSystemMetadata).stream() + .findFirst() + .flatMap( + e -> + e.getAspects().entrySet().stream() + .filter( + entry -> + entry.getKey().equals(lookupAspectSpec(urn, aspectName).getName())) + .map(Map.Entry::getValue) + .findFirst())); + } + + @Tag(name = "Generic Aspects") + @RequestMapping( + value = "/{entityName}/{entityUrn}/{aspectName}", + method = {RequestMethod.HEAD}) + @Operation(summary = "Whether an entity aspect exists.") + public ResponseEntity headAspect( + @PathVariable("entityName") String entityName, + @PathVariable("entityUrn") String entityUrn, + @PathVariable("aspectName") String aspectName) { + + Urn urn = UrnUtils.getUrn(entityUrn); + Authentication authentication = AuthenticationContext.getAuthentication(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + authentication, authorizationChain, EXISTS, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + EXISTS + " entities."); + } + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder().buildOpenapi("headAspect", entityName), + authorizationChain, + authentication, + true); + + return exists(opContext, urn, lookupAspectSpec(urn, aspectName).getName()) + ? ResponseEntity.noContent().build() + : ResponseEntity.notFound().build(); + } + + @Tag(name = "Generic Entities") + @DeleteMapping(value = "/{entityName}/{entityUrn}") + @Operation(summary = "Delete an entity") + public void deleteEntity( + @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn) { + + EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); + Urn urn = UrnUtils.getUrn(entityUrn); + Authentication authentication = AuthenticationContext.getAuthentication(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + authentication, authorizationChain, DELETE, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + DELETE + " entities."); + } + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder().buildOpenapi("deleteEntity", entityName), + authorizationChain, + authentication, + true); + + entityService.deleteAspect(opContext, entityUrn, entitySpec.getKeyAspectName(), Map.of(), true); + } + + @Tag(name = "Generic Entities") + @PostMapping(value = "/{entityName}", produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Create a batch of entities.") + public ResponseEntity> createEntity( + @PathVariable("entityName") String entityName, + @RequestParam(value = "async", required = false, defaultValue = "true") Boolean async, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata, + @RequestBody @Nonnull String jsonEntityList) + throws URISyntaxException, JsonProcessingException { + + Authentication authentication = AuthenticationContext.getAuthentication(); + + if (!AuthUtil.isAPIAuthorizedEntityType( + authentication, authorizationChain, CREATE, entityName)) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + CREATE + " entities."); + } + + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder().buildOpenapi("createEntity", entityName), + authorizationChain, + authentication, + true); + + AspectsBatch batch = toMCPBatch(opContext, jsonEntityList, authentication.getActor()); + Set results = entityService.ingestProposal(opContext, batch, async); + + if (!async) { + return ResponseEntity.ok(buildEntityList(results, withSystemMetadata)); + } else { + return ResponseEntity.accepted().body(buildEntityList(results, withSystemMetadata)); + } + } + + @Tag(name = "Generic Aspects") + @DeleteMapping(value = "/{entityName}/{entityUrn}/{aspectName}") + @Operation(summary = "Delete an entity aspect.") + public void deleteAspect( + @PathVariable("entityName") String entityName, + @PathVariable("entityUrn") String entityUrn, + @PathVariable("aspectName") String aspectName) { + + Urn urn = UrnUtils.getUrn(entityUrn); + Authentication authentication = AuthenticationContext.getAuthentication(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + authentication, authorizationChain, DELETE, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + DELETE + " entities."); + } + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder().buildOpenapi("deleteAspect", entityName), + authorizationChain, + authentication, + true); + + entityService.deleteAspect( + opContext, entityUrn, lookupAspectSpec(urn, aspectName).getName(), Map.of(), true); + } + + @Tag(name = "Generic Aspects") + @PostMapping( + value = "/{entityName}/{entityUrn}/{aspectName}", + produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Create an entity aspect.") + public ResponseEntity createAspect( + @PathVariable("entityName") String entityName, + @PathVariable("entityUrn") String entityUrn, + @PathVariable("aspectName") String aspectName, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata, + @RequestParam(value = "createIfNotExists", required = false, defaultValue = "false") + Boolean createIfNotExists, + @RequestBody @Nonnull String jsonAspect) + throws URISyntaxException { + + Urn urn = UrnUtils.getUrn(entityUrn); + EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); + Authentication authentication = AuthenticationContext.getAuthentication(); + + if (!AuthUtil.isAPIAuthorizedEntityUrns( + authentication, authorizationChain, CREATE, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + CREATE + " entities."); + } + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder().buildOpenapi("createAspect", entityName), + authorizationChain, + authentication, + true); + + AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName); + ChangeMCP upsert = + toUpsertItem( + opContext.getRetrieverContext().get().getAspectRetriever(), + urn, + aspectSpec, + createIfNotExists, + jsonAspect, + authentication.getActor()); + + List results = + entityService.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext().get()) + .items(List.of(upsert)) + .build(), + true, + true); + + return ResponseEntity.of( + results.stream() + .findFirst() + .map(result -> buildGenericEntity(aspectName, result, withSystemMetadata))); + } + + @Tag(name = "Generic Aspects") + @PatchMapping( + value = "/{entityName}/{entityUrn}/{aspectName}", + consumes = "application/json-patch+json", + produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Patch an entity aspect. (Experimental)") + public ResponseEntity patchAspect( + @PathVariable("entityName") String entityName, + @PathVariable("entityUrn") String entityUrn, + @PathVariable("aspectName") String aspectName, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata, + @RequestBody @Nonnull GenericJsonPatch patch) + throws URISyntaxException, + NoSuchMethodException, + InvocationTargetException, + InstantiationException, + IllegalAccessException { + + Urn urn = UrnUtils.getUrn(entityUrn); + EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); + Authentication authentication = AuthenticationContext.getAuthentication(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + authentication, authorizationChain, UPDATE, List.of(urn))) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + UPDATE + " entities."); + } + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder().buildOpenapi("patchAspect", entityName), + authorizationChain, + authentication, + true); + + AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName); + RecordTemplate currentValue = entityService.getAspect(opContext, urn, aspectSpec.getName(), 0); + + GenericPatchTemplate genericPatchTemplate = + GenericPatchTemplate.builder() + .genericJsonPatch(patch) + .templateType(aspectSpec.getDataTemplateClass()) + .templateDefault( + aspectSpec.getDataTemplateClass().getDeclaredConstructor().newInstance()) + .build(); + ChangeMCP upsert = + toUpsertItem( + opContext.getRetrieverContext().get().getAspectRetriever(), + UrnUtils.getUrn(entityUrn), + aspectSpec, + currentValue, + genericPatchTemplate, + authentication.getActor()); + + List results = + entityService.ingestAspects( + opContext, + AspectsBatchImpl.builder() + .retrieverContext(opContext.getRetrieverContext().get()) + .items(List.of(upsert)) + .build(), + true, + true); + + return ResponseEntity.of( + results.stream() + .findFirst() + .map(result -> buildGenericEntity(aspectSpec.getName(), result, withSystemMetadata))); + } + + protected Boolean exists(@Nonnull OperationContext opContext, Urn urn, @Nullable String aspect) { + return aspect == null + ? entityService.exists(opContext, urn, true) + : entityService.exists(opContext, urn, aspect, true); + } + + protected Set resolveAspectNames(Set urns, Set requestedAspectNames) { + if (requestedAspectNames.isEmpty()) { + return urns.stream() + .flatMap(u -> entityRegistry.getEntitySpec(u.getEntityType()).getAspectSpecs().stream()) + .collect(Collectors.toSet()); + } else { + // ensure key is always present + return Stream.concat( + urns.stream() + .flatMap( + urn -> + requestedAspectNames.stream() + .map(aspectName -> lookupAspectSpec(urn, aspectName))), + urns.stream() + .map(u -> entityRegistry.getEntitySpec(u.getEntityType()).getKeyAspectSpec())) + .collect(Collectors.toSet()); + } + } + + protected Map> toAspectMap( + Urn urn, List aspects, boolean withSystemMetadata) { + return aspects.stream() + .map( + a -> + Map.entry( + a.getName(), + Pair.of( + toRecordTemplate(lookupAspectSpec(urn, a.getName()), a), + withSystemMetadata ? a.getSystemMetadata() : null))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + protected AspectSpec lookupAspectSpec(Urn urn, String aspectName) { + return lookupAspectSpec(entityRegistry.getEntitySpec(urn.getEntityType()), aspectName); + } + + protected RecordTemplate toRecordTemplate( + AspectSpec aspectSpec, EnvelopedAspect envelopedAspect) { + return RecordUtils.toRecordTemplate( + aspectSpec.getDataTemplateClass(), envelopedAspect.getValue().data()); + } + + protected ChangeMCP toUpsertItem( + @Nonnull AspectRetriever aspectRetriever, + Urn entityUrn, + AspectSpec aspectSpec, + Boolean createIfNotExists, + String jsonAspect, + Actor actor) + throws URISyntaxException { + return ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(aspectSpec.getName()) + .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT) + .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) + .recordTemplate( + GenericRecordUtils.deserializeAspect( + ByteString.copyString(jsonAspect, StandardCharsets.UTF_8), + GenericRecordUtils.JSON, + aspectSpec)) + .build(aspectRetriever); + } + + protected ChangeMCP toUpsertItem( + @Nonnull AspectRetriever aspectRetriever, + @Nonnull Urn urn, + @Nonnull AspectSpec aspectSpec, + @Nullable RecordTemplate currentValue, + @Nonnull GenericPatchTemplate genericPatchTemplate, + @Nonnull Actor actor) { + return ChangeItemImpl.fromPatch( + urn, + aspectSpec, + currentValue, + genericPatchTemplate, + AuditStampUtils.createAuditStamp(actor.toUrnStr()), + aspectRetriever); + } + + /** + * Case-insensitive fallback + * + * @return + */ + protected static AspectSpec lookupAspectSpec(EntitySpec entitySpec, String aspectName) { + return entitySpec.getAspectSpec(aspectName) != null + ? entitySpec.getAspectSpec(aspectName) + : entitySpec.getAspectSpecs().stream() + .filter(aspec -> aspec.getName().toLowerCase().equals(aspectName)) + .findFirst() + .get(); + } +} diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/exception/UnauthorizedException.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/exception/UnauthorizedException.java similarity index 100% rename from metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/exception/UnauthorizedException.java rename to metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/exception/UnauthorizedException.java diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntity.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntity.java new file mode 100644 index 00000000000000..f25f8b89f80267 --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntity.java @@ -0,0 +1,7 @@ +package io.datahubproject.openapi.models; + +import java.util.Map; + +public interface GenericEntity { + Map getAspects(); +} diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntityScrollResult.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntityScrollResult.java new file mode 100644 index 00000000000000..69b97956e0cf2f --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntityScrollResult.java @@ -0,0 +1,3 @@ +package io.datahubproject.openapi.models; + +public interface GenericEntityScrollResult {} diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericScrollResult.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericScrollResult.java similarity index 79% rename from metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericScrollResult.java rename to metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericScrollResult.java index 2befc83c003634..7864af3bb4cdde 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericScrollResult.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericScrollResult.java @@ -1,4 +1,4 @@ -package io.datahubproject.openapi.v2.models; +package io.datahubproject.openapi.models; import java.util.List; import lombok.Builder; diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnResponse.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnResponse.java index 628733e4fd4ae7..c1fd809ad3649d 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnResponse.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnResponse.java @@ -16,5 +16,5 @@ public class BatchGetUrnResponse implements Serializable { @JsonProperty("entities") @Schema(description = "List of entity responses") - List entities; + List entities; } diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityScrollResultV2.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityScrollResultV2.java new file mode 100644 index 00000000000000..685f45c60dbdc8 --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityScrollResultV2.java @@ -0,0 +1,15 @@ +package io.datahubproject.openapi.v2.models; + +import io.datahubproject.openapi.models.GenericEntity; +import io.datahubproject.openapi.models.GenericEntityScrollResult; +import java.util.List; +import lombok.Builder; +import lombok.Data; + +@Data +@Builder +public class GenericEntityScrollResultV2 + implements GenericEntityScrollResult { + private String scrollId; + private List results; +} diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntity.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityV2.java similarity index 90% rename from metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntity.java rename to metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityV2.java index cb049c5ba131a8..85d404fb57e0e3 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntity.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityV2.java @@ -7,6 +7,7 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; +import io.datahubproject.openapi.models.GenericEntity; import io.swagger.v3.oas.annotations.media.Schema; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -23,7 +24,7 @@ @JsonInclude(JsonInclude.Include.NON_NULL) @NoArgsConstructor(force = true, access = AccessLevel.PRIVATE) @AllArgsConstructor -public class GenericEntity { +public class GenericEntityV2 implements GenericEntity { @JsonProperty("urn") @Schema(description = "Urn of the entity") private String urn; @@ -32,9 +33,9 @@ public class GenericEntity { @Schema(description = "Map of aspect name to aspect") private Map aspects; - public static class GenericEntityBuilder { + public static class GenericEntityV2Builder { - public GenericEntity build( + public GenericEntityV2 build( ObjectMapper objectMapper, Map> aspects) { Map jsonObjectMap = aspects.entrySet().stream() @@ -63,7 +64,7 @@ public GenericEntity build( }) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - return new GenericEntity(urn, jsonObjectMap); + return new GenericEntityV2(urn, jsonObjectMap); } } } diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityScrollResultV3.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityScrollResultV3.java new file mode 100644 index 00000000000000..265095f0f2c6e8 --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityScrollResultV3.java @@ -0,0 +1,15 @@ +package io.datahubproject.openapi.v3.models; + +import io.datahubproject.openapi.models.GenericEntity; +import io.datahubproject.openapi.models.GenericEntityScrollResult; +import java.util.List; +import lombok.Builder; +import lombok.Data; + +@Data +@Builder +public class GenericEntityScrollResultV3 + implements GenericEntityScrollResult { + private String scrollId; + private List entities; +} diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java new file mode 100644 index 00000000000000..2e030390dd9cbd --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java @@ -0,0 +1,77 @@ +package io.datahubproject.openapi.v3.models; + +import com.datahub.util.RecordUtils; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.openapi.models.GenericEntity; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.EqualsAndHashCode; + +@EqualsAndHashCode(callSuper = true) +@Data +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +@AllArgsConstructor +public class GenericEntityV3 extends LinkedHashMap implements GenericEntity { + + public GenericEntityV3(Map m) { + super(m); + } + + @Override + public Map getAspects() { + return this; + } + + public static class GenericEntityV3Builder { + + public GenericEntityV3 build( + ObjectMapper objectMapper, + @Nonnull Urn urn, + Map> aspects) { + Map jsonObjectMap = + aspects.entrySet().stream() + .map( + e -> { + try { + Map valueMap = + Map.of( + "value", + objectMapper.readTree( + RecordUtils.toJsonString(e.getValue().getFirst()) + .getBytes(StandardCharsets.UTF_8))); + + if (e.getValue().getSecond() != null) { + return Map.entry( + e.getKey(), + Map.of( + "systemMetadata", e.getValue().getSecond(), + "value", valueMap.get("value"))); + } else { + return Map.entry(e.getKey(), Map.of("value", valueMap.get("value"))); + } + } catch (IOException ex) { + throw new RuntimeException(ex); + } + }) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + GenericEntityV3 genericEntityV3 = new GenericEntityV3(); + genericEntityV3.put("urn", urn.toString()); + genericEntityV3.putAll(jsonObjectMap); + return genericEntityV3; + } + } +} diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index 41cf972079c25c..23cd89147173ad 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -1,63 +1,42 @@ package io.datahubproject.openapi.v2.controller; -import static com.linkedin.metadata.authorization.ApiOperation.CREATE; -import static com.linkedin.metadata.authorization.ApiOperation.DELETE; -import static com.linkedin.metadata.authorization.ApiOperation.EXISTS; import static com.linkedin.metadata.authorization.ApiOperation.READ; -import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; import com.datahub.authentication.Actor; import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.authorization.AuthUtil; -import com.datahub.authorization.AuthorizerChain; -import com.datahub.util.RecordUtils; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.EnvelopedAspect; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; -import com.linkedin.metadata.aspect.batch.ChangeMCP; -import com.linkedin.metadata.aspect.patch.GenericJsonPatch; -import com.linkedin.metadata.aspect.patch.template.common.GenericPatchTemplate; import com.linkedin.metadata.entity.EntityApiUtils; -import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.query.SearchFlags; -import com.linkedin.metadata.query.filter.SortCriterion; -import com.linkedin.metadata.query.filter.SortOrder; -import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchEntityArray; -import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.utils.AuditStampUtils; import com.linkedin.metadata.utils.GenericRecordUtils; -import com.linkedin.metadata.utils.SearchUtil; import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; +import io.datahubproject.openapi.controller.GenericEntitiesController; import io.datahubproject.openapi.exception.UnauthorizedException; import io.datahubproject.openapi.v2.models.BatchGetUrnRequest; import io.datahubproject.openapi.v2.models.BatchGetUrnResponse; -import io.datahubproject.openapi.v2.models.GenericEntity; -import io.datahubproject.openapi.v2.models.GenericScrollResult; +import io.datahubproject.openapi.v2.models.GenericEntityScrollResultV2; +import io.datahubproject.openapi.v2.models.GenericEntityV2; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; -import java.lang.reflect.InvocationTargetException; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -69,100 +48,38 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.DeleteMapping; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PatchMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestMethod; -import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; @RestController @RequiredArgsConstructor @RequestMapping("/v2/entity") @Slf4j -public class EntityController { - private static final SearchFlags DEFAULT_SEARCH_FLAGS = - new SearchFlags().setFulltext(false).setSkipAggregates(true).setSkipHighlighting(true); - @Autowired private EntityRegistry entityRegistry; - @Autowired private SearchService searchService; - @Autowired private EntityService entityService; - @Autowired private AuthorizerChain authorizationChain; - @Autowired private ObjectMapper objectMapper; +public class EntityController + extends GenericEntitiesController< + GenericEntityV2, GenericEntityScrollResultV2> { - @Qualifier("systemOperationContext") - @Autowired - private OperationContext systemOperationContext; - - @Tag(name = "Generic Entities", description = "API for interacting with generic entities.") - @GetMapping(value = "/{entityName}", produces = MediaType.APPLICATION_JSON_VALUE) - @Operation(summary = "Scroll entities") - public ResponseEntity> getEntities( - @PathVariable("entityName") String entityName, - @RequestParam(value = "aspectNames", defaultValue = "") Set aspectNames, - @RequestParam(value = "count", defaultValue = "10") Integer count, - @RequestParam(value = "query", defaultValue = "*") String query, - @RequestParam(value = "scrollId", required = false) String scrollId, - @RequestParam(value = "sort", required = false, defaultValue = "urn") String sortField, - @RequestParam(value = "sortOrder", required = false, defaultValue = "ASCENDING") - String sortOrder, - @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") - Boolean withSystemMetadata) + @Override + public GenericEntityScrollResultV2 buildScrollResult( + @Nonnull OperationContext opContext, + SearchEntityArray searchEntities, + Set aspectNames, + boolean withSystemMetadata, + @Nullable String scrollId) throws URISyntaxException { - - EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); - Authentication authentication = AuthenticationContext.getAuthentication(); - - if (!AuthUtil.isAPIAuthorizedEntityType(authentication, authorizationChain, READ, entityName)) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); - } - - OperationContext opContext = - OperationContext.asSession( - systemOperationContext, - RequestContext.builder().buildOpenapi("getEntities", entityName), - authorizationChain, - authentication, - true); - - // TODO: support additional and multiple sort params - SortCriterion sortCriterion = SearchUtil.sortBy(sortField, SortOrder.valueOf(sortOrder)); - - ScrollResult result = - searchService.scrollAcrossEntities( - opContext.withSearchFlags(flags -> DEFAULT_SEARCH_FLAGS), - List.of(entitySpec.getName()), - query, - null, - sortCriterion, - scrollId, - null, - count); - - if (!AuthUtil.isAPIAuthorizedResult(authentication, authorizationChain, result)) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); - } - - return ResponseEntity.ok( - GenericScrollResult.builder() - .results( - toRecordTemplates(opContext, result.getEntities(), aspectNames, withSystemMetadata)) - .scrollId(result.getScrollId()) - .build()); + return GenericEntityScrollResultV2.builder() + .results(toRecordTemplates(opContext, searchEntities, aspectNames, withSystemMetadata)) + .scrollId(scrollId) + .build(); } @Tag(name = "Generic Entities") @@ -192,7 +109,7 @@ public ResponseEntity getEntityBatch( BatchGetUrnResponse.builder() .entities( new ArrayList<>( - toRecordTemplates( + buildEntityList( opContext, urns, new HashSet<>(request.getAspectNames()), @@ -200,506 +117,10 @@ public ResponseEntity getEntityBatch( .build())); } - @Tag(name = "Generic Entities") - @GetMapping( - value = "/{entityName}/{entityUrn:urn:li:.+}", - produces = MediaType.APPLICATION_JSON_VALUE) - public ResponseEntity getEntity( - @PathVariable("entityName") String entityName, - @PathVariable("entityUrn") String entityUrn, - @RequestParam(value = "aspectNames", defaultValue = "") Set aspectNames, - @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") - Boolean withSystemMetadata) - throws URISyntaxException { - - Urn urn = UrnUtils.getUrn(entityUrn); - Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, READ, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); - } - OperationContext opContext = - OperationContext.asSession( - systemOperationContext, - RequestContext.builder().buildOpenapi("getEntity", entityName), - authorizationChain, - authentication, - true); - - return ResponseEntity.of( - toRecordTemplates(opContext, List.of(urn), aspectNames, withSystemMetadata).stream() - .findFirst()); - } - - @Tag(name = "Generic Entities") - @RequestMapping( - value = "/{entityName}/{entityUrn}", - method = {RequestMethod.HEAD}) - @Operation(summary = "Entity exists") - public ResponseEntity headEntity( - @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn) { - - Urn urn = UrnUtils.getUrn(entityUrn); - Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, EXISTS, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + EXISTS + " entities."); - } - OperationContext opContext = - OperationContext.asSession( - systemOperationContext, - RequestContext.builder().buildOpenapi("headEntity", entityName), - authorizationChain, - authentication, - true); - - return exists(opContext, urn, null) - ? ResponseEntity.noContent().build() - : ResponseEntity.notFound().build(); - } - - @Tag(name = "Generic Aspects", description = "API for generic aspects.") - @GetMapping( - value = "/{entityName}/{entityUrn}/{aspectName}", - produces = MediaType.APPLICATION_JSON_VALUE) - @Operation(summary = "Get an entity's generic aspect.") - public ResponseEntity getAspect( - @PathVariable("entityName") String entityName, - @PathVariable("entityUrn") String entityUrn, - @PathVariable("aspectName") String aspectName, - @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") - Boolean withSystemMetadata) - throws URISyntaxException { - - Urn urn = UrnUtils.getUrn(entityUrn); - Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, READ, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); - } - OperationContext opContext = - OperationContext.asSession( - systemOperationContext, - RequestContext.builder().buildOpenapi("getAspect", entityName), - authorizationChain, - authentication, - true); - - return ResponseEntity.of( - toRecordTemplates(opContext, List.of(urn), Set.of(aspectName), withSystemMetadata).stream() - .findFirst() - .flatMap( - e -> - e.getAspects().entrySet().stream() - .filter( - entry -> - entry.getKey().equals(lookupAspectSpec(urn, aspectName).getName())) - .map(Map.Entry::getValue) - .findFirst())); - } - - @Tag(name = "Generic Aspects") - @RequestMapping( - value = "/{entityName}/{entityUrn}/{aspectName}", - method = {RequestMethod.HEAD}) - @Operation(summary = "Whether an entity aspect exists.") - public ResponseEntity headAspect( - @PathVariable("entityName") String entityName, - @PathVariable("entityUrn") String entityUrn, - @PathVariable("aspectName") String aspectName) { - - Urn urn = UrnUtils.getUrn(entityUrn); - Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, EXISTS, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + EXISTS + " entities."); - } - OperationContext opContext = - OperationContext.asSession( - systemOperationContext, - RequestContext.builder().buildOpenapi("headAspect", entityName), - authorizationChain, - authentication, - true); - - return exists(opContext, urn, lookupAspectSpec(urn, aspectName).getName()) - ? ResponseEntity.noContent().build() - : ResponseEntity.notFound().build(); - } - - @Tag(name = "Generic Entities") - @DeleteMapping(value = "/{entityName}/{entityUrn}") - @Operation(summary = "Delete an entity") - public void deleteEntity( - @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn) { - - EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); - Urn urn = UrnUtils.getUrn(entityUrn); - Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, DELETE, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + DELETE + " entities."); - } - OperationContext opContext = - OperationContext.asSession( - systemOperationContext, - RequestContext.builder().buildOpenapi("deleteEntity", entityName), - authorizationChain, - authentication, - true); - - entityService.deleteAspect(opContext, entityUrn, entitySpec.getKeyAspectName(), Map.of(), true); - } - - @Tag(name = "Generic Entities") - @PostMapping(value = "/{entityName}", produces = MediaType.APPLICATION_JSON_VALUE) - @Operation(summary = "Create a batch of entities.") - public ResponseEntity> createEntity( - @PathVariable("entityName") String entityName, - @RequestParam(value = "async", required = false, defaultValue = "true") Boolean async, - @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") - Boolean withSystemMetadata, - @RequestBody @Nonnull String jsonEntityList) - throws URISyntaxException, JsonProcessingException { - - Authentication authentication = AuthenticationContext.getAuthentication(); - - if (!AuthUtil.isAPIAuthorizedEntityType( - authentication, authorizationChain, CREATE, entityName)) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + CREATE + " entities."); - } - - OperationContext opContext = - OperationContext.asSession( - systemOperationContext, - RequestContext.builder().buildOpenapi("createEntity", entityName), - authorizationChain, - authentication, - true); - - AspectsBatch batch = toMCPBatch(opContext, jsonEntityList, authentication.getActor()); - Set results = entityService.ingestProposal(opContext, batch, async); - - if (!async) { - return ResponseEntity.ok(toEntityListResponse(results, withSystemMetadata)); - } else { - return ResponseEntity.accepted().body(toEntityListResponse(results, withSystemMetadata)); - } - } - - @Tag(name = "Generic Aspects") - @DeleteMapping(value = "/{entityName}/{entityUrn}/{aspectName}") - @Operation(summary = "Delete an entity aspect.") - public void deleteAspect( - @PathVariable("entityName") String entityName, - @PathVariable("entityUrn") String entityUrn, - @PathVariable("aspectName") String aspectName) { - - Urn urn = UrnUtils.getUrn(entityUrn); - Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, DELETE, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + DELETE + " entities."); - } - OperationContext opContext = - OperationContext.asSession( - systemOperationContext, - RequestContext.builder().buildOpenapi("deleteAspect", entityName), - authorizationChain, - authentication, - true); - - entityService.deleteAspect( - opContext, entityUrn, lookupAspectSpec(urn, aspectName).getName(), Map.of(), true); - } - - @Tag(name = "Generic Aspects") - @PostMapping( - value = "/{entityName}/{entityUrn}/{aspectName}", - produces = MediaType.APPLICATION_JSON_VALUE) - @Operation(summary = "Create an entity aspect.") - public ResponseEntity createAspect( - @PathVariable("entityName") String entityName, - @PathVariable("entityUrn") String entityUrn, - @PathVariable("aspectName") String aspectName, - @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") - Boolean withSystemMetadata, - @RequestParam(value = "createIfNotExists", required = false, defaultValue = "false") - Boolean createIfNotExists, - @RequestBody @Nonnull String jsonAspect) - throws URISyntaxException { - - Urn urn = UrnUtils.getUrn(entityUrn); - EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); - Authentication authentication = AuthenticationContext.getAuthentication(); - - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, CREATE, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + CREATE + " entities."); - } - OperationContext opContext = - OperationContext.asSession( - systemOperationContext, - RequestContext.builder().buildOpenapi("createAspect", entityName), - authorizationChain, - authentication, - true); - - AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName); - ChangeMCP upsert = - toUpsertItem( - opContext.getRetrieverContext().get().getAspectRetriever(), - urn, - aspectSpec, - createIfNotExists, - jsonAspect, - authentication.getActor()); - - List results = - entityService.ingestAspects( - opContext, - AspectsBatchImpl.builder() - .retrieverContext(opContext.getRetrieverContext().get()) - .items(List.of(upsert)) - .build(), - true, - true); - - return ResponseEntity.of( - results.stream() - .findFirst() - .map( - result -> - GenericEntity.builder() - .urn(result.getUrn().toString()) - .build( - objectMapper, - Map.of( - aspectName, - Pair.of( - result.getNewValue(), - withSystemMetadata ? result.getNewSystemMetadata() : null))))); - } - - @Tag(name = "Generic Aspects") - @PatchMapping( - value = "/{entityName}/{entityUrn}/{aspectName}", - consumes = "application/json-patch+json", - produces = MediaType.APPLICATION_JSON_VALUE) - @Operation(summary = "Patch an entity aspect. (Experimental)") - public ResponseEntity patchAspect( - @PathVariable("entityName") String entityName, - @PathVariable("entityUrn") String entityUrn, - @PathVariable("aspectName") String aspectName, - @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") - Boolean withSystemMetadata, - @RequestBody @Nonnull GenericJsonPatch patch) - throws URISyntaxException, - NoSuchMethodException, - InvocationTargetException, - InstantiationException, - IllegalAccessException { - - Urn urn = UrnUtils.getUrn(entityUrn); - EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); - Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns( - authentication, authorizationChain, UPDATE, List.of(urn))) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + UPDATE + " entities."); - } - OperationContext opContext = - OperationContext.asSession( - systemOperationContext, - RequestContext.builder().buildOpenapi("patchAspect", entityName), - authorizationChain, - authentication, - true); - - AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName); - RecordTemplate currentValue = entityService.getAspect(opContext, urn, aspectSpec.getName(), 0); - - GenericPatchTemplate genericPatchTemplate = - GenericPatchTemplate.builder() - .genericJsonPatch(patch) - .templateType(aspectSpec.getDataTemplateClass()) - .templateDefault( - aspectSpec.getDataTemplateClass().getDeclaredConstructor().newInstance()) - .build(); - ChangeMCP upsert = - toUpsertItem( - opContext.getRetrieverContext().get().getAspectRetriever(), - UrnUtils.getUrn(entityUrn), - aspectSpec, - currentValue, - genericPatchTemplate, - authentication.getActor()); - - List results = - entityService.ingestAspects( - opContext, - AspectsBatchImpl.builder() - .retrieverContext(opContext.getRetrieverContext().get()) - .items(List.of(upsert)) - .build(), - true, - true); - - return ResponseEntity.of( - results.stream() - .findFirst() - .map( - result -> - GenericEntity.builder() - .urn(result.getUrn().toString()) - .build( - objectMapper, - Map.of( - aspectSpec.getName(), - Pair.of( - result.getNewValue(), - withSystemMetadata ? result.getNewSystemMetadata() : null))))); - } - - private List toRecordTemplates( - @Nonnull OperationContext opContext, - SearchEntityArray searchEntities, - Set aspectNames, - boolean withSystemMetadata) - throws URISyntaxException { - return toRecordTemplates( - opContext, - searchEntities.stream().map(SearchEntity::getEntity).collect(Collectors.toList()), - aspectNames, - withSystemMetadata); - } - - private Boolean exists(@Nonnull OperationContext opContext, Urn urn, @Nullable String aspect) { - return aspect == null - ? entityService.exists(opContext, urn, true) - : entityService.exists(opContext, urn, aspect, true); - } - - private List toRecordTemplates( - @Nonnull OperationContext opContext, - List urns, - Set aspectNames, - boolean withSystemMetadata) - throws URISyntaxException { - if (urns.isEmpty()) { - return List.of(); - } else { - Set urnsSet = new HashSet<>(urns); - - Map> aspects = - entityService.getLatestEnvelopedAspects( - opContext, - urnsSet, - resolveAspectNames(urnsSet, aspectNames).stream() - .map(AspectSpec::getName) - .collect(Collectors.toSet())); - - return urns.stream() - .map( - u -> - GenericEntity.builder() - .urn(u.toString()) - .build( - objectMapper, - toAspectMap(u, aspects.getOrDefault(u, List.of()), withSystemMetadata))) - .collect(Collectors.toList()); - } - } - - private Set resolveAspectNames(Set urns, Set requestedAspectNames) { - if (requestedAspectNames.isEmpty()) { - return urns.stream() - .flatMap(u -> entityRegistry.getEntitySpec(u.getEntityType()).getAspectSpecs().stream()) - .collect(Collectors.toSet()); - } else { - // ensure key is always present - return Stream.concat( - urns.stream() - .flatMap( - urn -> - requestedAspectNames.stream() - .map(aspectName -> lookupAspectSpec(urn, aspectName))), - urns.stream() - .map(u -> entityRegistry.getEntitySpec(u.getEntityType()).getKeyAspectSpec())) - .collect(Collectors.toSet()); - } - } - - private Map> toAspectMap( - Urn urn, List aspects, boolean withSystemMetadata) { - return aspects.stream() - .map( - a -> - Map.entry( - a.getName(), - Pair.of( - toRecordTemplate(lookupAspectSpec(urn, a.getName()), a), - withSystemMetadata ? a.getSystemMetadata() : null))) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - } - - private AspectSpec lookupAspectSpec(Urn urn, String aspectName) { - return lookupAspectSpec(entityRegistry.getEntitySpec(urn.getEntityType()), aspectName); - } - - private RecordTemplate toRecordTemplate(AspectSpec aspectSpec, EnvelopedAspect envelopedAspect) { - return RecordUtils.toRecordTemplate( - aspectSpec.getDataTemplateClass(), envelopedAspect.getValue().data()); - } - - private ChangeMCP toUpsertItem( - @Nonnull AspectRetriever aspectRetriever, - Urn entityUrn, - AspectSpec aspectSpec, - Boolean createIfNotExists, - String jsonAspect, - Actor actor) - throws URISyntaxException { - return ChangeItemImpl.builder() - .urn(entityUrn) - .aspectName(aspectSpec.getName()) - .changeType(Boolean.TRUE.equals(createIfNotExists) ? ChangeType.CREATE : ChangeType.UPSERT) - .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) - .recordTemplate( - GenericRecordUtils.deserializeAspect( - ByteString.copyString(jsonAspect, StandardCharsets.UTF_8), - GenericRecordUtils.JSON, - aspectSpec)) - .build(aspectRetriever); - } - - private ChangeMCP toUpsertItem( - @Nonnull AspectRetriever aspectRetriever, - @Nonnull Urn urn, - @Nonnull AspectSpec aspectSpec, - @Nullable RecordTemplate currentValue, - @Nonnull GenericPatchTemplate genericPatchTemplate, - @Nonnull Actor actor) { - return ChangeItemImpl.fromPatch( - urn, - aspectSpec, - currentValue, - genericPatchTemplate, - AuditStampUtils.createAuditStamp(actor.toUrnStr()), - aspectRetriever); - } - - private AspectsBatch toMCPBatch( + @Override + protected AspectsBatch toMCPBatch( @Nonnull OperationContext opContext, String entityArrayList, Actor actor) - throws JsonProcessingException, URISyntaxException { + throws JsonProcessingException { JsonNode entities = objectMapper.readTree(entityArrayList); List items = new LinkedList<>(); @@ -707,8 +128,14 @@ private AspectsBatch toMCPBatch( Iterator entityItr = entities.iterator(); while (entityItr.hasNext()) { JsonNode entity = entityItr.next(); + if (!entity.has("urn")) { + throw new IllegalArgumentException("Missing `urn` field"); + } Urn entityUrn = UrnUtils.getUrn(entity.get("urn").asText()); + if (!entity.has("aspects")) { + throw new IllegalArgumentException("Missing `aspects` field"); + } Iterator> aspectItr = entity.get("aspects").fields(); while (aspectItr.hasNext()) { Map.Entry aspect = aspectItr.next(); @@ -747,9 +174,71 @@ private AspectsBatch toMCPBatch( .build(); } - public List toEntityListResponse( + @Override + protected List buildEntityList( + @Nonnull OperationContext opContext, + List urns, + Set aspectNames, + boolean withSystemMetadata) + throws URISyntaxException { + if (urns.isEmpty()) { + return List.of(); + } else { + Set urnsSet = new HashSet<>(urns); + + Map> aspects = + entityService.getLatestEnvelopedAspects( + opContext, + urnsSet, + resolveAspectNames(urnsSet, aspectNames).stream() + .map(AspectSpec::getName) + .collect(Collectors.toSet())); + + return urns.stream() + .map( + u -> + GenericEntityV2.builder() + .urn(u.toString()) + .build( + objectMapper, + toAspectMap(u, aspects.getOrDefault(u, List.of()), withSystemMetadata))) + .collect(Collectors.toList()); + } + } + + @Override + protected GenericEntityV2 buildGenericEntity( + @Nonnull String aspectName, + @Nonnull UpdateAspectResult updateAspectResult, + boolean withSystemMetadata) { + return GenericEntityV2.builder() + .urn(updateAspectResult.getUrn().toString()) + .build( + objectMapper, + Map.of( + aspectName, + Pair.of( + updateAspectResult.getNewValue(), + withSystemMetadata ? updateAspectResult.getNewSystemMetadata() : null))); + } + + private List toRecordTemplates( + @Nonnull OperationContext opContext, + SearchEntityArray searchEntities, + Set aspectNames, + boolean withSystemMetadata) + throws URISyntaxException { + return buildEntityList( + opContext, + searchEntities.stream().map(SearchEntity::getEntity).collect(Collectors.toList()), + aspectNames, + withSystemMetadata); + } + + @Override + protected List buildEntityList( Set ingestResults, boolean withSystemMetadata) { - List responseList = new LinkedList<>(); + List responseList = new LinkedList<>(); Map> entityMap = ingestResults.stream().collect(Collectors.groupingBy(IngestResult::getUrn)); @@ -765,24 +254,10 @@ public List toEntityListResponse( withSystemMetadata ? ingest.getRequest().getSystemMetadata() : null))) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); responseList.add( - GenericEntity.builder() + GenericEntityV2.builder() .urn(urnAspects.getKey().toString()) .build(objectMapper, aspectsMap)); } return responseList; } - - /** - * Case-insensitive fallback - * - * @return - */ - private static AspectSpec lookupAspectSpec(EntitySpec entitySpec, String aspectName) { - return entitySpec.getAspectSpec(aspectName) != null - ? entitySpec.getAspectSpec(aspectName) - : entitySpec.getAspectSpecs().stream() - .filter(aspec -> aspec.getName().toLowerCase().equals(aspectName)) - .findFirst() - .get(); - } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/RelationshipController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/RelationshipController.java index ac0b9dd8c03ef2..3e46e10857fbd8 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/RelationshipController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/RelationshipController.java @@ -18,8 +18,8 @@ import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.search.utils.QueryUtils; import io.datahubproject.openapi.exception.UnauthorizedException; +import io.datahubproject.openapi.models.GenericScrollResult; import io.datahubproject.openapi.v2.models.GenericRelationship; -import io.datahubproject.openapi.v2.models.GenericScrollResult; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; import java.util.Arrays; diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java index 267122d71a57bc..1c404006d97a46 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/TimeseriesController.java @@ -19,7 +19,7 @@ import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.openapi.exception.UnauthorizedException; -import io.datahubproject.openapi.v2.models.GenericScrollResult; +import io.datahubproject.openapi.models.GenericScrollResult; import io.datahubproject.openapi.v2.models.GenericTimeseriesAspect; import io.swagger.v3.oas.annotations.tags.Tag; import java.net.URISyntaxException; diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index 689efbf8bc6ec7..20e917f1f452ea 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -1,5 +1,43 @@ package io.datahubproject.openapi.v3.controller; +import com.datahub.authentication.Actor; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.ByteString; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.entity.EntityApiUtils; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.UpdateAspectResult; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.openapi.controller.GenericEntitiesController; +import io.datahubproject.openapi.v3.models.GenericEntityScrollResultV3; +import io.datahubproject.openapi.v3.models.GenericEntityV3; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.web.bind.annotation.RequestMapping; @@ -9,4 +47,167 @@ @RequiredArgsConstructor @RequestMapping("/v3/entity") @Slf4j -public class EntityController extends io.datahubproject.openapi.v2.controller.EntityController {} +public class EntityController + extends GenericEntitiesController< + GenericEntityV3, GenericEntityScrollResultV3> { + + @Override + public GenericEntityScrollResultV3 buildScrollResult( + @Nonnull OperationContext opContext, + SearchEntityArray searchEntities, + Set aspectNames, + boolean withSystemMetadata, + @Nullable String scrollId) + throws URISyntaxException { + return GenericEntityScrollResultV3.builder() + .entities(toRecordTemplates(opContext, searchEntities, aspectNames, withSystemMetadata)) + .scrollId(scrollId) + .build(); + } + + @Override + protected List buildEntityList( + @Nonnull OperationContext opContext, + List urns, + Set aspectNames, + boolean withSystemMetadata) + throws URISyntaxException { + if (urns.isEmpty()) { + return List.of(); + } else { + Set urnsSet = new HashSet<>(urns); + + Map> aspects = + entityService.getLatestEnvelopedAspects( + opContext, + urnsSet, + resolveAspectNames(urnsSet, aspectNames).stream() + .map(AspectSpec::getName) + .collect(Collectors.toSet())); + + return urns.stream() + .map( + u -> + GenericEntityV3.builder() + .build( + objectMapper, + u, + toAspectMap(u, aspects.getOrDefault(u, List.of()), withSystemMetadata))) + .collect(Collectors.toList()); + } + } + + @Override + protected List buildEntityList( + Set ingestResults, boolean withSystemMetadata) { + List responseList = new LinkedList<>(); + + Map> entityMap = + ingestResults.stream().collect(Collectors.groupingBy(IngestResult::getUrn)); + for (Map.Entry> urnAspects : entityMap.entrySet()) { + Map> aspectsMap = + urnAspects.getValue().stream() + .map( + ingest -> + Map.entry( + ingest.getRequest().getAspectName(), + Pair.of( + ingest.getRequest().getRecordTemplate(), + withSystemMetadata ? ingest.getRequest().getSystemMetadata() : null))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + responseList.add( + GenericEntityV3.builder().build(objectMapper, urnAspects.getKey(), aspectsMap)); + } + return responseList; + } + + @Override + protected GenericEntityV3 buildGenericEntity( + @Nonnull String aspectName, + @Nonnull UpdateAspectResult updateAspectResult, + boolean withSystemMetadata) { + return GenericEntityV3.builder() + .build( + objectMapper, + updateAspectResult.getUrn(), + Map.of( + aspectName, + Pair.of( + updateAspectResult.getNewValue(), + withSystemMetadata ? updateAspectResult.getNewSystemMetadata() : null))); + } + + private List toRecordTemplates( + @Nonnull OperationContext opContext, + SearchEntityArray searchEntities, + Set aspectNames, + boolean withSystemMetadata) + throws URISyntaxException { + return buildEntityList( + opContext, + searchEntities.stream().map(SearchEntity::getEntity).collect(Collectors.toList()), + aspectNames, + withSystemMetadata); + } + + @Override + protected AspectsBatch toMCPBatch( + @Nonnull OperationContext opContext, String entityArrayList, Actor actor) + throws JsonProcessingException { + JsonNode entities = objectMapper.readTree(entityArrayList); + + List items = new LinkedList<>(); + if (entities.isArray()) { + Iterator entityItr = entities.iterator(); + while (entityItr.hasNext()) { + JsonNode entity = entityItr.next(); + if (!entity.has("urn")) { + throw new IllegalArgumentException("Missing `urn` field"); + } + Urn entityUrn = UrnUtils.getUrn(entity.get("urn").asText()); + + Iterator> aspectItr = entity.fields(); + while (aspectItr.hasNext()) { + Map.Entry aspect = aspectItr.next(); + + if ("urn".equals(aspect.getKey())) { + continue; + } + + AspectSpec aspectSpec = lookupAspectSpec(entityUrn, aspect.getKey()); + + if (aspectSpec != null) { + + SystemMetadata systemMetadata = null; + if (aspect.getValue().has("systemMetadata")) { + systemMetadata = + EntityApiUtils.parseSystemMetadata( + objectMapper.writeValueAsString(aspect.getValue().get("systemMetadata"))); + ((ObjectNode) aspect.getValue()).remove("systemMetadata"); + } + + ChangeItemImpl.ChangeItemImplBuilder builder = + ChangeItemImpl.builder() + .urn(entityUrn) + .aspectName(aspectSpec.getName()) + .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) + .systemMetadata(systemMetadata) + .recordTemplate( + GenericRecordUtils.deserializeAspect( + ByteString.copyString( + objectMapper.writeValueAsString(aspect.getValue()), + StandardCharsets.UTF_8), + GenericRecordUtils.JSON, + aspectSpec)); + + items.add(builder.build(opContext.getRetrieverContext().get().getAspectRetriever())); + } + } + } + } + return AspectsBatchImpl.builder() + .items(items) + .retrieverContext(opContext.getRetrieverContext().get()) + .build(); + } +} From 812bcbbfb98f03007a1e6ba2056627897a189c04 Mon Sep 17 00:00:00 2001 From: Kevin Chun Date: Fri, 7 Jun 2024 15:25:35 -0700 Subject: [PATCH 4/5] Use type: string for enum schemas (#10663) --- .../openapi/v3/OpenAPIV3Generator.java | 46 ++++++++++--------- .../openapi/v3/OpenAPIV3GeneratorTest.java | 5 ++ 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index df3f6445a855a0..4966e618a16435 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -437,27 +437,31 @@ private static void addAspectSchemas(final Components components, final AspectSp final String newDefinition = definition.replaceAll("definitions", "components/schemas"); Schema s = Json.mapper().readValue(newDefinition, Schema.class); - Set requiredNames = - Optional.ofNullable(s.getRequired()) - .map(names -> Set.copyOf(names)) - .orElse(new HashSet()); - Map properties = - Optional.ofNullable(s.getProperties()).orElse(new HashMap<>()); - properties.forEach( - (name, schema) -> { - String $ref = schema.get$ref(); - boolean isNameRequired = requiredNames.contains(name); - if ($ref != null && !isNameRequired) { - // A non-required $ref property must be wrapped in a { allOf: [ $ref ] } - // object to allow the - // property to be marked as nullable - schema.setType(TYPE_OBJECT); - schema.set$ref(null); - schema.setAllOf(List.of(new Schema().$ref($ref))); - } - schema.setNullable(!isNameRequired); - }); - + // Set enums to "string". + if (s.getEnum() != null && !s.getEnum().isEmpty()) { + s.setType("string"); + } else { + Set requiredNames = + Optional.ofNullable(s.getRequired()) + .map(names -> Set.copyOf(names)) + .orElse(new HashSet()); + Map properties = + Optional.ofNullable(s.getProperties()).orElse(new HashMap<>()); + properties.forEach( + (name, schema) -> { + String $ref = schema.get$ref(); + boolean isNameRequired = requiredNames.contains(name); + if ($ref != null && !isNameRequired) { + // A non-required $ref property must be wrapped in a { allOf: [ $ref ] } + // object to allow the + // property to be marked as nullable + schema.setType(TYPE_OBJECT); + schema.set$ref(null); + schema.setAllOf(List.of(new Schema().$ref($ref))); + } + schema.setNullable(!isNameRequired); + }); + } components.addSchemas(n, s); } catch (Exception e) { throw new RuntimeException(e); diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java index 0ce62f5cb10f64..10b75fd7faed37 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java @@ -83,5 +83,10 @@ public void testOpenApiSpecBuilder() throws Exception { List.of(new Schema().$ref("#/components/schemas/SystemMetadata")), systemMetadata.getAllOf()); assertTrue(systemMetadata.getNullable()); + + // Assert enum property is string. + Schema fabricType = openAPI.getComponents().getSchemas().get("FabricType"); + assertEquals("string", fabricType.getType()); + assertFalse(fabricType.getEnum().isEmpty()); } } From 177a50f9a77f6fd85b053bc655e2d0202759a07e Mon Sep 17 00:00:00 2001 From: dushayntAW <158567391+dushayntAW@users.noreply.github.com> Date: Mon, 10 Jun 2024 11:54:20 +0200 Subject: [PATCH 5/5] fix(ingestion/airflow-plugin): airflow remove old tasks (#10485) --- docs/lineage/airflow.md | 31 +++++++ .../datahub_listener.py | 90 +++++++++++++++++++ 2 files changed, 121 insertions(+) diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md index 1745c23cb1923b..a36f3bbd4bd16a 100644 --- a/docs/lineage/airflow.md +++ b/docs/lineage/airflow.md @@ -233,6 +233,37 @@ You can also create a custom extractor to extract lineage from any operator. Thi See this [example PR](https://github.com/datahub-project/datahub/pull/10452) which adds a custom extractor for the `BigQueryInsertJobOperator` operator. +## Cleanup obsolete pipelines and tasks from Datahub + +There might be a case where the DAGs are removed from the Airflow but the corresponding pipelines and tasks are still there in the Datahub, let's call such pipelines ans tasks, `obsolete pipelines and tasks` + +Following are the steps to cleanup them from the datahub: +- create a DAG named `Datahub_Cleanup`, i.e. + +```python +from datetime import datetime + +from airflow import DAG +from airflow.operators.bash import BashOperator + +from datahub_airflow_plugin.entities import Dataset, Urn + +with DAG( + "Datahub_Cleanup", + start_date=datetime(2024, 1, 1), + schedule_interval=None, + catchup=False, +) as dag: + task = BashOperator( + task_id="cleanup_obsolete_data", + dag=dag, + bash_command="echo 'cleaning up the obsolete data from datahub'", + ) + +``` +- ingest this DAG, and it will remove all the obsolete pipelines and tasks from the Datahub based on the `cluster` value set in the `airflow.cfg` + + ## Emit Lineage Directly If you can't use the plugin or annotate inlets/outlets, you can also emit lineage using the `DatahubEmitterOperator`. diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index ed155a35a925c4..40c36d6106e2b6 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -1,3 +1,4 @@ +import asyncio import copy import functools import logging @@ -8,12 +9,15 @@ import airflow import datahub.emitter.mce_builder as builder +from airflow.models.serialized_dag import SerializedDagModel from datahub.api.entities.datajob import DataJob from datahub.api.entities.dataprocess.dataprocess_instance import InstanceRunResult from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.rest_emitter import DatahubRestEmitter from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import ( + DataFlowKeyClass, + DataJobKeyClass, FineGrainedLineageClass, FineGrainedLineageDownstreamTypeClass, FineGrainedLineageUpstreamTypeClass, @@ -68,6 +72,7 @@ def hookimpl(f: _F) -> _F: # type: ignore[misc] # noqa: F811 "1", ) _RUN_IN_THREAD_TIMEOUT = 30 +_DATAHUB_CLEANUP_DAG = "Datahub_Cleanup" def get_airflow_plugin_listener() -> Optional["DataHubListener"]: @@ -542,6 +547,81 @@ def on_dag_start(self, dag_run: "DagRun") -> None: self.emitter.emit(event) + if dag.dag_id == _DATAHUB_CLEANUP_DAG: + assert self.graph + + logger.debug("Initiating the cleanup of obsselete data from datahub") + + # get all ingested dataflow and datajob + ingested_dataflow_urns = list( + self.graph.get_urns_by_filter( + platform="airflow", + entity_types=["dataFlow"], + ) + ) + ingested_datajob_urns = list( + self.graph.get_urns_by_filter( + platform="airflow", entity_types=["dataJob"] + ) + ) + + # filter the ingested dataflow and datajob based on the cluster + filtered_ingested_dataflow_urns: List = [] + filtered_ingested_datajob_urns: List = [] + + for ingested_dataflow_urn in ingested_dataflow_urns: + data_flow_aspect = self.graph.get_aspect( + entity_urn=ingested_dataflow_urn, aspect_type=DataFlowKeyClass + ) + if ( + data_flow_aspect is not None + and data_flow_aspect.flowId != _DATAHUB_CLEANUP_DAG + and data_flow_aspect is not None + and data_flow_aspect.cluster == self.config.cluster + ): + filtered_ingested_dataflow_urns.append(ingested_dataflow_urn) + + for ingested_datajob_urn in ingested_datajob_urns: + data_job_aspect = self.graph.get_aspect( + entity_urn=ingested_datajob_urn, aspect_type=DataJobKeyClass + ) + if ( + data_job_aspect is not None + and data_job_aspect.flow in filtered_ingested_dataflow_urns + ): + filtered_ingested_datajob_urns.append(ingested_datajob_urn) + + # get all airflow dags + all_airflow_dags = SerializedDagModel.read_all_dags().values() + + airflow_flow_urns: List = [] + airflow_job_urns: List = [] + + for dag in all_airflow_dags: + flow_urn = builder.make_data_flow_urn( + orchestrator="airflow", + flow_id=dag.dag_id, + cluster=self.config.cluster, + ) + airflow_flow_urns.append(flow_urn) + + for task in dag.tasks: + airflow_job_urns.append( + builder.make_data_job_urn_with_flow(str(flow_urn), task.task_id) + ) + + obsolete_pipelines = set(filtered_ingested_dataflow_urns) - set( + airflow_flow_urns + ) + obsolete_tasks = set(filtered_ingested_datajob_urns) - set(airflow_job_urns) + + obsolete_urns = obsolete_pipelines.union(obsolete_tasks) + + asyncio.run(self._soft_delete_obsolete_urns(obsolete_urns=obsolete_urns)) + + logger.debug(f"total pipelines removed = {len(obsolete_pipelines)}") + logger.debug(f"total tasks removed = {len(obsolete_tasks)}") + if HAS_AIRFLOW_DAG_LISTENER_API: @hookimpl @@ -578,3 +658,13 @@ def on_dataset_changed(self, dataset: "Dataset") -> None: logger.debug( f"DataHub listener got notification about dataset change for {dataset}" ) + + async def _soft_delete_obsolete_urns(self, obsolete_urns): + delete_tasks = [self._delete_obsolete_data(urn) for urn in obsolete_urns] + await asyncio.gather(*delete_tasks) + + async def _delete_obsolete_data(self, obsolete_urn): + assert self.graph + + if self.graph.exists(str(obsolete_urn)): + self.graph.soft_delete_entity(str(obsolete_urn))