From 623b6f9e455f3f2712ec1762c683a0c494c135a8 Mon Sep 17 00:00:00 2001 From: Kevin Chun Date: Wed, 10 Jul 2024 07:00:15 -0700 Subject: [PATCH 1/9] Batch get entity endpoints (#10880) --- .../openapi/client/OpenApiClient.java | 4 +- .../{v2 => }/models/BatchGetUrnRequest.java | 2 +- .../{v2 => }/models/BatchGetUrnResponse.java | 6 +- .../controller/GenericEntitiesController.java | 49 +++++++++- .../v2/controller/EntityController.java | 62 ------------ .../openapi/v3/OpenAPIV3Generator.java | 94 +++++++++++++++++++ 6 files changed, 146 insertions(+), 71 deletions(-) rename metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/{v2 => }/models/BatchGetUrnRequest.java (95%) rename metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/{v2 => }/models/BatchGetUrnResponse.java (78%) diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/client/OpenApiClient.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/client/OpenApiClient.java index 267d95f1dddbfb..c5d0a28ed4ea40 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/client/OpenApiClient.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/client/OpenApiClient.java @@ -2,8 +2,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import io.datahubproject.metadata.context.OperationContext; -import io.datahubproject.openapi.v2.models.BatchGetUrnRequest; -import io.datahubproject.openapi.v2.models.BatchGetUrnResponse; +import io.datahubproject.openapi.models.BatchGetUrnRequest; +import io.datahubproject.openapi.models.BatchGetUrnResponse; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnRequest.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/BatchGetUrnRequest.java similarity index 95% rename from metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnRequest.java rename to metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/BatchGetUrnRequest.java index 02afe8d40dd528..8bae8fb08e9b95 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnRequest.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/BatchGetUrnRequest.java @@ -1,4 +1,4 @@ -package io.datahubproject.openapi.v2.models; +package io.datahubproject.openapi.models; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnResponse.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/BatchGetUrnResponse.java similarity index 78% rename from metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnResponse.java rename to metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/BatchGetUrnResponse.java index c1fd809ad3649d..baa9b31ad0055d 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnResponse.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/BatchGetUrnResponse.java @@ -1,4 +1,4 @@ -package io.datahubproject.openapi.v2.models; +package io.datahubproject.openapi.models; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; @@ -13,8 +13,8 @@ @Builder @JsonInclude(JsonInclude.Include.NON_NULL) @JsonDeserialize(builder = BatchGetUrnResponse.BatchGetUrnResponseBuilder.class) -public class BatchGetUrnResponse implements Serializable { +public class BatchGetUrnResponse implements Serializable { @JsonProperty("entities") @Schema(description = "List of entity responses") - List entities; + List entities; } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java index dd5a81a0d81ecb..08f7e45a7bad30 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java @@ -16,6 +16,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.EnvelopedAspect; @@ -48,6 +49,8 @@ import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.openapi.exception.UnauthorizedException; +import io.datahubproject.openapi.models.BatchGetUrnRequest; +import io.datahubproject.openapi.models.BatchGetUrnResponse; import io.datahubproject.openapi.models.GenericEntity; import io.datahubproject.openapi.models.GenericEntityScrollResult; import io.swagger.v3.oas.annotations.Operation; @@ -56,9 +59,7 @@ import java.lang.reflect.InvocationTargetException; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; @@ -426,6 +427,48 @@ public ResponseEntity> createEntity( } } + @Tag(name = "Generic Entities") + @PostMapping(value = "/batch/{entityName}", produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Get a batch of entities") + public ResponseEntity> getEntityBatch( + HttpServletRequest httpServletRequest, + @PathVariable("entityName") String entityName, + @RequestBody BatchGetUrnRequest request) + throws URISyntaxException { + + List urns = request.getUrns().stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + + Authentication authentication = AuthenticationContext.getAuthentication(); + if (!AuthUtil.isAPIAuthorizedEntityUrns(authentication, authorizationChain, READ, urns)) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); + } + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + httpServletRequest, + "getEntityBatch", + entityName), + authorizationChain, + authentication, + true); + + return ResponseEntity.of( + Optional.of( + BatchGetUrnResponse.builder() + .entities( + new ArrayList<>( + buildEntityList( + opContext, + urns, + new HashSet<>(request.getAspectNames()), + request.isWithSystemMetadata()))) + .build())); + } + @Tag(name = "Generic Aspects") @DeleteMapping(value = "/{entityName}/{entityUrn:urn:li:.+}/{aspectName}") @Operation(summary = "Delete an entity aspect.") diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index b40aba2d2908fc..bb63d5cb9571b7 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -1,15 +1,9 @@ package io.datahubproject.openapi.v2.controller; -import static com.linkedin.metadata.authorization.ApiOperation.READ; - import com.datahub.authentication.Actor; -import com.datahub.authentication.Authentication; -import com.datahub.authentication.AuthenticationContext; -import com.datahub.authorization.AuthUtil; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.linkedin.common.urn.Urn; -import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.EnvelopedAspect; @@ -28,37 +22,23 @@ import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; -import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.openapi.controller.GenericEntitiesController; import io.datahubproject.openapi.exception.InvalidUrnException; -import io.datahubproject.openapi.exception.UnauthorizedException; -import io.datahubproject.openapi.v2.models.BatchGetUrnRequest; -import io.datahubproject.openapi.v2.models.BatchGetUrnResponse; import io.datahubproject.openapi.v2.models.GenericEntityScrollResultV2; import io.datahubproject.openapi.v2.models.GenericEntityV2; -import io.swagger.v3.oas.annotations.Operation; -import io.swagger.v3.oas.annotations.tags.Tag; -import jakarta.servlet.http.HttpServletRequest; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.springframework.http.MediaType; -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.PathVariable; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; @@ -84,48 +64,6 @@ public GenericEntityScrollResultV2 buildScrollResult( .build(); } - @Tag(name = "Generic Entities") - @PostMapping(value = "/batch/{entityName}", produces = MediaType.APPLICATION_JSON_VALUE) - @Operation(summary = "Get a batch of entities") - public ResponseEntity getEntityBatch( - HttpServletRequest httpServletRequest, - @PathVariable("entityName") String entityName, - @RequestBody BatchGetUrnRequest request) - throws URISyntaxException { - - List urns = request.getUrns().stream().map(UrnUtils::getUrn).collect(Collectors.toList()); - - Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns(authentication, authorizationChain, READ, urns)) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); - } - OperationContext opContext = - OperationContext.asSession( - systemOperationContext, - RequestContext.builder() - .buildOpenapi( - authentication.getActor().toUrnStr(), - httpServletRequest, - "getEntityBatch", - entityName), - authorizationChain, - authentication, - true); - - return ResponseEntity.of( - Optional.of( - BatchGetUrnResponse.builder() - .entities( - new ArrayList<>( - buildEntityList( - opContext, - urns, - new HashSet<>(request.getAspectNames()), - request.isWithSystemMetadata()))) - .build())); - } - @Override protected AspectsBatch toMCPBatch( @Nonnull OperationContext opContext, String entityArrayList, Actor actor) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index 7c94ccf630a78a..66b94218eb7d3a 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -107,6 +107,8 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { entityName + ENTITY_RESPONSE_SUFFIX, buildEntitySchema(e, aspectNames, true)); components.addSchemas( "Scroll" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityScrollSchema(e)); + components.addSchemas( + "BatchGet" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityBatchGetSchema(e)); }); // Parameters entityRegistry.getEntitySpecs().values().stream() @@ -127,6 +129,9 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { paths.addPathItem( String.format("/v3/entity/%s", e.getName().toLowerCase()), buildListEntityPath(e)); + paths.addPathItem( + String.format("/v3/entity/batch/%s", e.getName().toLowerCase()), + buildBatchGetEntityPath(e)); paths.addPathItem( String.format("/v3/entity/%s/{urn}", e.getName().toLowerCase()), buildSingleEntityPath(e)); @@ -360,6 +365,74 @@ private static PathItem buildListEntityPath(final EntitySpec entity) { return result; } + private static PathItem buildBatchGetEntityPath(final EntitySpec entity) { + final String upperFirst = toUpperFirst(entity.getName()); + final PathItem result = new PathItem(); + // Post Operation + final List aspectNames = + entity.getAspectSpecs().stream() + .map(AspectSpec::getName) + .distinct() + .collect(Collectors.toList()); + if (aspectNames.isEmpty()) { + aspectNames.add(entity.getKeyAspectName()); + } + final Schema aspectNamesSchema = + new Schema() + .type(TYPE_ARRAY) + .description("List of aspect names to get") + .items( + new Schema() + .type(TYPE_STRING) + ._enum(aspectNames) + ._default(aspectNames.stream().findFirst().orElse(null))); + final Content requestBatch = + new Content() + .addMediaType( + "application/json", + new MediaType() + .schema( + new Schema() + .properties( + Map.of( + "urns", + new Schema() + .type(TYPE_ARRAY) + .items( + new Schema() + .type(TYPE_STRING) + .description("List of urns to get")), + "aspectNames", aspectNamesSchema, + "withSystemMetadata", + new Schema().type(TYPE_BOOLEAN)._default(true))))); + final ApiResponse apiResponse = + new ApiResponse() + .description("Create a batch of " + entity.getName() + " entities.") + .content( + new Content() + .addMediaType( + "application/json", + new MediaType() + .schema( + new Schema<>() + .$ref( + String.format( + "#/components/schemas/BatchGet%s%s", + upperFirst, ENTITY_RESPONSE_SUFFIX))))); + result.setPost( + new Operation() + .summary("Batch Get " + upperFirst + " entities.") + .tags(List.of(entity.getName() + " Entity")) + .requestBody( + new RequestBody() + .description("Batch Get " + entity.getName() + " entities.") + .required(true) + .content(requestBatch)) + .responses(new ApiResponses().addApiResponse("200", apiResponse))); + + return result; + } + private static void addExtraParameters(final Components components) { components.addParameters( "ScrollId" + MODEL_VERSION, @@ -556,6 +629,27 @@ private static Schema buildEntityScrollSchema(final EntitySpec entity) { toUpperFirst(entity.getName()), ENTITY_RESPONSE_SUFFIX)))); } + private static Schema buildEntityBatchGetSchema(final EntitySpec entity) { + return new Schema<>() + .type(TYPE_OBJECT) + .description("Batch get " + toUpperFirst(entity.getName()) + " objects.") + .required(List.of("entities")) + .addProperty( + "entities", + new Schema() + .properties( + Map.of( + "urn", + new Schema().type(TYPE_STRING).description("Entity key urn"), + "aspects", + new Schema<>() + .description(toUpperFirst(entity.getName()) + " object.") + .$ref( + String.format( + "#/components/schemas/%s%s", + toUpperFirst(entity.getName()), ENTITY_RESPONSE_SUFFIX))))); + } + private static Schema buildAspectRef(final String aspect, final boolean withSystemMetadata) { // A non-required $ref property must be wrapped in a { allOf: [ $ref ] } // object to allow the From 5327f80cf784610f3cdc6f476a25fa4d021457e4 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 11 Jul 2024 10:52:58 -0500 Subject: [PATCH 2/9] feat(system): support conditional write semantics (#10868) --- docs/advanced/mcp-mcl.md | 45 +- docs/api/openapi/openapi-usage-guide.md | 9 +- docs/how/updating-datahub.md | 32 + .../metadata/aspect/AspectRetriever.java | 18 + .../aspect/EnvelopedSystemAspect.java | 62 + .../metadata/aspect/SystemAspect.java | 14 + .../metadata/aspect/batch/MCPItem.java | 26 +- .../validation/AspectValidationException.java | 67 +- .../ValidationExceptionCollection.java | 2 +- .../validation/ConditionalWriteValidator.java | 225 ++++ .../ConditionalWriteValidatorTest.java | 1098 +++++++++++++++++ .../metadata/aspect/MockAspectRetriever.java | 31 + .../test/metadata/aspect/batch/TestMCP.java | 15 + .../aspect/batch/TestSystemAspect.java | 26 + .../resources/MetadataChangeProposal.avsc | 15 +- .../resources/MetadataChangeProposal.avsc | 15 +- .../java/openlineage-converter/build.gradle | 4 +- .../openlineage/HdfsPathDatasetTest.java | 18 +- .../entity/ebean/batch/ChangeItemImpl.java | 61 +- .../aspect/utils/DefaultAspectsUtil.java | 11 +- .../client/EntityClientAspectRetriever.java | 24 + .../entity/EntityServiceAspectRetriever.java | 30 + .../metadata/entity/EntityServiceImpl.java | 142 ++- .../linkedin/metadata/entity/EntityUtils.java | 75 +- .../metadata/AspectGenerationUtils.java | 16 +- .../metadata/entity/EntityServiceTest.java | 103 +- .../StructuredPropertiesValidatorTest.java | 4 +- .../CustomDataQualityRulesValidator.java | 8 +- .../CustomDataQualityRulesValidator.java | 8 +- .../linkedin/mxe/MetadataChangeProposal.pdl | 6 +- .../com/linkedin/mxe/SystemMetadata.pdl | 7 + .../src/main/resources/entity-registry.yml | 12 + .../context/TestOperationContexts.java | 8 + .../v2/delegates/EntityApiDelegateImpl.java | 6 +- .../openapi/client/OpenApiClient.java | 17 +- .../openapi/models/GenericAspect.java | 16 + .../openapi/models/GenericEntity.java | 4 +- .../models/GenericEntityScrollResult.java | 2 +- .../models/BatchGetUrnRequestV2.java} | 6 +- .../models/BatchGetUrnResponseV2.java} | 9 +- .../openapi/v2/models/GenericAspectV2.java | 32 + .../models/GenericEntityScrollResultV2.java | 7 +- .../openapi/v2/models/GenericEntityV2.java | 17 +- .../openapi/v3/models/GenericAspectV3.java | 22 + .../models/GenericEntityScrollResultV3.java | 7 +- .../openapi/v3/models/GenericEntityV3.java | 46 +- .../controller/GenericEntitiesController.java | 154 ++- .../v2/controller/EntityController.java | 110 +- .../openapi/v3/OpenAPIV3Generator.java | 147 ++- .../v3/controller/EntityController.java | 152 ++- .../com.linkedin.entity.aspects.snapshot.json | 15 +- ...com.linkedin.entity.entities.snapshot.json | 5 + ...m.linkedin.entity.entitiesV2.snapshot.json | 5 + ...n.entity.entitiesVersionedV2.snapshot.json | 5 + .../linkedin/entity/client/EntityClient.java | 11 + .../metadata/entity/EntityService.java | 15 + .../metadata/utils/GenericRecordUtils.java | 26 + .../utils/metrics/ExceptionUtils.java | 43 + 58 files changed, 2750 insertions(+), 366 deletions(-) create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/EnvelopedSystemAspect.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/ConditionalWriteValidator.java create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/ConditionalWriteValidatorTest.java create mode 100644 entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestSystemAspect.java create mode 100644 metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericAspect.java rename metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/{models/BatchGetUrnRequest.java => v2/models/BatchGetUrnRequestV2.java} (81%) rename metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/{models/BatchGetUrnResponse.java => v2/models/BatchGetUrnResponseV2.java} (57%) create mode 100644 metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericAspectV2.java create mode 100644 metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericAspectV3.java create mode 100644 metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/ExceptionUtils.java diff --git a/docs/advanced/mcp-mcl.md b/docs/advanced/mcp-mcl.md index 235c9a85ec8da1..2b2d2885428b56 100644 --- a/docs/advanced/mcp-mcl.md +++ b/docs/advanced/mcp-mcl.md @@ -60,10 +60,14 @@ record MetadataChangeProposal { **/ systemMetadata: optional SystemMetadata + /** + * Headers - intended to mimic http headers + */ + headers: optional map[string, string] } ``` -Each proposal comprises of the following: +Each proposal is comprised of the following: 1. entityType @@ -82,12 +86,13 @@ Each proposal comprises of the following: Type of change you are proposing: one of - UPSERT: Insert if not exists, update otherwise - - CREATE: Insert if not exists, fail otherwise + - CREATE: Insert aspect if not exists, fail otherwise + - CREATE_ENTITY: Insert if entity does not exist, fail otherwise - UPDATE: Update if exists, fail otherwise - DELETE: Delete - PATCH: Patch the aspect instead of doing a full replace - Only UPSERT, CREATE, DELETE, PATCH are supported as of now. + Only UPSERT, CREATE, CREATE_ENTITY, DELETE, PATCH are supported as of now. 5. aspectName @@ -110,6 +115,10 @@ Each proposal comprises of the following: Extra metadata about the proposal like run_id or updated timestamp. +8. headers + + Optional headers which are meant to mimic http headers. These are currently used for implementing conditional write logic. + GMS processes the proposal and produces the Metadata Change Log, which looks like this. ```protobuf @@ -156,4 +165,32 @@ entities: keyAspect: datasetKey aspects: - datasetProfile -``` \ No newline at end of file +``` + +## Features + +### Conditional Writes + +Conditional write semantics use extra information contained in the MCP `headers` field to possibly avoid writing new aspects +if the conditions are not met. + +#### If-Version-Match + +Each time an aspect is updated a `version` is incremented to represent the change to the aspect. This `version` is stored and returned +in `SystemMetadata`. + +A writer can provide a header with the expected `version` when initiating the request. If the expected `version` does not +match the actual `version` stored in the database, the write will fail. This prevents overwriting an aspect that has +been modified by another process. + +#### If-Modified-Since / If-Unmodified-Since + +A writer may also specify time-based conditions using http header semantics. Similar to version based conditional writes +this method can be used to prevent the write if the target aspect was modified after a reading the aspect. Per the +http specification dates must comply with ISO-8601 standard. + +`If-Unmodified-Since`: +A writer can specify that the aspect must NOT have been modified after a specific time, following [If-Unmodified-Since](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Unmodified-Since) http headers. + +`If-Modified-Since` +A writer can specify that the aspect must have been modified after a specific time, following [If-Modified-Since](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since) http headers. diff --git a/docs/api/openapi/openapi-usage-guide.md b/docs/api/openapi/openapi-usage-guide.md index f33c20c91dacb7..9c6d0bfa12fc18 100644 --- a/docs/api/openapi/openapi-usage-guide.md +++ b/docs/api/openapi/openapi-usage-guide.md @@ -119,7 +119,7 @@ curl --location --request POST 'localhost:8080/openapi/entities/v1/' \ The second POST example will write the update ONLY if the entity doesn't exist. If the entity does exist the command will return an error instead of overwriting the entity. -In this example we've added an additional URL parameter `createEntityIfNotExists=true` +In this example we've added a URL parameter `createEntityIfNotExists=true` ```shell curl --location --request POST 'localhost:8080/openapi/entities/v1/?createEntityIfNotExists=true' \ @@ -582,3 +582,10 @@ public class Main { } } ``` + +## OpenAPI v3 Features + +### Conditional Writes + +All the create/POST endpoints for aspects support `headers` in the POST body to support batch APIs. See the docs in the +[MetadataChangeProposal](../../advanced/mcp-mcl.md) section for the use of these headers to support conditional writes semantics. \ No newline at end of file diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 421812e5cca9bc..ef9990ca3804eb 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -22,6 +22,38 @@ This file documents any backwards-incompatible changes in DataHub and assists pe - Protobuf CLI will no longer create binary encoded protoc custom properties. Flag added `-protocProp` in case this behavior is required. +- #10868 - OpenAPI V3 - Creation of aspects will need to be wrapped within a `value` key and the API is now symmetric with respect to input and outputs. + +Example Global Tags Aspect: + +Previous: +```json +{ + "tags": [ + { + "tag": "string", + "context": "string" + } + ] +} +``` + +New (optional fields `systemMetadata` and `headers`): + +```json +{ + "value": { + "tags": [ + { + "tag": "string", + "context": "string" + } + ] + }, + "systemMetadata": {}, + "headers": {} +} +``` ### Potential Downtime diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java index ad47562662fd6e..e34df7db481189 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/AspectRetriever.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.aspect; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.entity.Aspect; @@ -31,6 +32,23 @@ default Aspect getLatestAspectObject(@Nonnull final Urn urn, @Nonnull final Stri @Nonnull Map> getLatestAspectObjects(Set urns, Set aspectNames); + @Nullable + default SystemAspect getLatestSystemAspect( + @Nonnull final Urn urn, @Nonnull final String aspectName) { + return getLatestSystemAspects(ImmutableMap.of(urn, ImmutableSet.of(aspectName))) + .getOrDefault(urn, Collections.emptyMap()) + .get(aspectName); + } + + /** + * Returns for each URN, the map of aspectName to Aspect + * + * @param urnAspectNames urns and aspect names to fetch + * @return urn to aspect name and values + */ + @Nonnull + Map> getLatestSystemAspects(Map> urnAspectNames); + @Nonnull default Map entityExists(Set urns) { Set keyAspectNames = diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/EnvelopedSystemAspect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/EnvelopedSystemAspect.java new file mode 100644 index 00000000000000..b4b8e095a38ac2 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/EnvelopedSystemAspect.java @@ -0,0 +1,62 @@ +package com.linkedin.metadata.aspect; + +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.mxe.SystemMetadata; +import java.sql.Timestamp; +import java.time.Instant; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Getter; + +/** Delegate to restli class */ +public class EnvelopedSystemAspect implements SystemAspect { + + public static SystemAspect of( + @Nonnull Urn urn, @Nonnull EnvelopedAspect envelopedAspect, @Nonnull EntitySpec entitySpec) { + return new EnvelopedSystemAspect(urn, envelopedAspect, entitySpec); + } + + @Getter @Nonnull private final Urn urn; + @Nonnull private final EnvelopedAspect envelopedAspect; + @Getter @Nonnull private final EntitySpec entitySpec; + @Getter @Nonnull private final AspectSpec aspectSpec; + + public EnvelopedSystemAspect( + @Nonnull Urn urn, @Nonnull EnvelopedAspect envelopedAspect, @Nonnull EntitySpec entitySpec) { + this.urn = urn; + this.envelopedAspect = envelopedAspect; + this.entitySpec = entitySpec; + this.aspectSpec = this.entitySpec.getAspectSpec(envelopedAspect.getName()); + } + + @Nullable + @Override + public RecordTemplate getRecordTemplate() { + return envelopedAspect.getValue(); + } + + @Nullable + @Override + public SystemMetadata getSystemMetadata() { + return envelopedAspect.getSystemMetadata(); + } + + @Override + public long getVersion() { + return envelopedAspect.getVersion(); + } + + @Override + public Timestamp getCreatedOn() { + return Timestamp.from(Instant.ofEpochMilli(envelopedAspect.getCreated().getTime())); + } + + @Override + public String getCreatedBy() { + return envelopedAspect.getCreated().getActor().toString(); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/SystemAspect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/SystemAspect.java index e83414c8c23a85..4c9bf3d4fdbc78 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/SystemAspect.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/SystemAspect.java @@ -2,7 +2,9 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.mxe.SystemMetadata; import java.sql.Timestamp; +import java.util.Optional; import javax.annotation.Nonnull; /** @@ -22,4 +24,16 @@ default AuditStamp getAuditStamp() { .setActor(UrnUtils.getUrn(getCreatedBy())) .setTime(getCreatedOn().getTime()); } + + /** + * If aspect version exists in system metadata, return it + * + * @return version of the aspect + */ + default Optional getSystemMetadataVersion() { + return Optional.ofNullable(getSystemMetadata()) + .filter(SystemMetadata::hasVersion) + .map(SystemMetadata::getVersion) + .map(Long::parseLong); + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPItem.java index 96b2752516e604..afe11f56cce507 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPItem.java @@ -5,18 +5,42 @@ import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.mxe.MetadataChangeProposal; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; import java.util.Set; +import javax.annotation.Nonnull; import javax.annotation.Nullable; /** Represents a proposal to write to the primary data store which may be represented by an MCP */ public interface MCPItem extends BatchItem { Set CHANGE_TYPES = - ImmutableSet.of(ChangeType.UPSERT, ChangeType.CREATE, ChangeType.CREATE_ENTITY); + ImmutableSet.of( + ChangeType.UPSERT, ChangeType.UPDATE, ChangeType.CREATE, ChangeType.CREATE_ENTITY); @Nullable MetadataChangeProposal getMetadataChangeProposal(); + @Nonnull + default Map getHeaders() { + if (getMetadataChangeProposal() != null && getMetadataChangeProposal().getHeaders() != null) { + return getMetadataChangeProposal().getHeaders(); + } + return Collections.emptyMap(); + } + + default boolean hasHeader(@Nonnull String headerName) { + return getHeaders().keySet().stream().anyMatch(hdr -> hdr.equalsIgnoreCase(headerName)); + } + + default Optional getHeader(@Nonnull String headerName) { + return getHeaders().entrySet().stream() + .filter(entry -> entry.getKey().equalsIgnoreCase(headerName)) + .map(Map.Entry::getValue) + .findAny(); + } + /** * Validates that a change type is valid for the given aspect * diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectValidationException.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectValidationException.java index f83642c5eed9ec..dd8798ee89ae6b 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectValidationException.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectValidationException.java @@ -1,12 +1,16 @@ package com.linkedin.metadata.aspect.plugins.validation; import com.linkedin.common.urn.Urn; +import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.util.Pair; -import java.util.Objects; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.EqualsAndHashCode; +import lombok.Value; +@Value +@EqualsAndHashCode(callSuper = false) public class AspectValidationException extends Exception { public static AspectValidationException forItem(BatchItem item, String msg) { @@ -14,46 +18,55 @@ public static AspectValidationException forItem(BatchItem item, String msg) { } public static AspectValidationException forItem(BatchItem item, String msg, Exception e) { - return new AspectValidationException(item.getUrn(), item.getAspectName(), msg, e); + return new AspectValidationException( + item.getChangeType(), item.getUrn(), item.getAspectName(), msg, SubType.VALIDATION, e); } - @Nonnull private final Urn entityUrn; - @Nonnull private final String aspectName; - @Nullable private final String msg; + public static AspectValidationException forPrecondition(BatchItem item, String msg) { + return forPrecondition(item, msg, null); + } + + public static AspectValidationException forPrecondition(BatchItem item, String msg, Exception e) { + return new AspectValidationException( + item.getChangeType(), item.getUrn(), item.getAspectName(), msg, SubType.PRECONDITION, e); + } - public AspectValidationException(@Nonnull Urn entityUrn, @Nonnull String aspectName, String msg) { - this(entityUrn, aspectName, msg, null); + @Nonnull ChangeType changeType; + @Nonnull Urn entityUrn; + @Nonnull String aspectName; + @Nonnull SubType subType; + @Nullable String msg; + + public AspectValidationException( + @Nonnull ChangeType changeType, + @Nonnull Urn entityUrn, + @Nonnull String aspectName, + String msg, + SubType subType) { + this(changeType, entityUrn, aspectName, msg, subType, null); } public AspectValidationException( - @Nonnull Urn entityUrn, @Nonnull String aspectName, @Nonnull String msg, Exception e) { + @Nonnull ChangeType changeType, + @Nonnull Urn entityUrn, + @Nonnull String aspectName, + @Nonnull String msg, + @Nullable SubType subType, + Exception e) { super(msg, e); + this.changeType = changeType; this.entityUrn = entityUrn; this.aspectName = aspectName; this.msg = msg; + this.subType = subType != null ? subType : SubType.VALIDATION; } - public Pair getExceptionKey() { + public Pair getAspectGroup() { return Pair.of(entityUrn, aspectName); } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - AspectValidationException that = (AspectValidationException) o; - - if (!entityUrn.equals(that.entityUrn)) return false; - if (!aspectName.equals(that.aspectName)) return false; - return Objects.equals(msg, that.msg); - } - - @Override - public int hashCode() { - int result = entityUrn.hashCode(); - result = 31 * result + aspectName.hashCode(); - result = 31 * result + (msg != null ? msg.hashCode() : 0); - return result; + public static enum SubType { + VALIDATION, + PRECONDITION } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/ValidationExceptionCollection.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/ValidationExceptionCollection.java index 559fa85cff04c4..007c196156b124 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/ValidationExceptionCollection.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/ValidationExceptionCollection.java @@ -20,7 +20,7 @@ public static ValidationExceptionCollection newCollection() { } public void addException(AspectValidationException exception) { - super.computeIfAbsent(exception.getExceptionKey(), key -> new HashSet<>()).add(exception); + super.computeIfAbsent(exception.getAspectGroup(), key -> new HashSet<>()).add(exception); } public void addException(BatchItem item, String message) { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/ConditionalWriteValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/ConditionalWriteValidator.java new file mode 100644 index 00000000000000..9927ca4c5a0985 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/ConditionalWriteValidator.java @@ -0,0 +1,225 @@ +package com.linkedin.metadata.aspect.validation; + +import com.google.common.collect.ImmutableSet; +import com.google.common.net.HttpHeaders; +import com.linkedin.common.urn.Urn; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.util.Pair; +import java.time.Instant; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; + +/** + * Implements conditional write semantics. + * + *

1. If an aspect change request includes a SystemMetadata section which includes a value for + * nextAspectVersion, then it must match the current SystemMetadata's value for nextAspectVersion or + * prevent performing the write operation. + * + *

2. Headers are included with the MCP for the following time-based checks * If-Modified-Since * + * If-Unmodified-Since + */ +@Setter +@Getter +@Accessors(chain = true) +public class ConditionalWriteValidator extends AspectPayloadValidator { + public static final String DEFAULT_ASPECT_VERSION = "1"; + public static final long DEFAULT_LAST_MODIFIED_TIME = Long.MIN_VALUE; + public static final String HTTP_HEADER_IF_VERSION_MATCH = "If-Version-Match"; + public static final Set CREATE_CHANGE_TYPES = + ImmutableSet.of(ChangeType.CREATE, ChangeType.CREATE_ENTITY); + + @Nonnull private AspectPluginConfig config; + + private static boolean hasTimePrecondition(ChangeMCP item) { + return item.getHeader(HttpHeaders.IF_MODIFIED_SINCE).isPresent() + || item.getHeader(HttpHeaders.IF_UNMODIFIED_SINCE).isPresent(); + } + + private static boolean hasVersionPrecondition(ChangeMCP item) { + return item.getHeader(HTTP_HEADER_IF_VERSION_MATCH).isPresent(); + } + + private static boolean isApplicableFilter(ChangeMCP item) { + if (ChangeType.RESTATE.equals(item.getChangeType())) { + return false; + } + + return hasTimePrecondition(item) || hasVersionPrecondition(item); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + AspectRetriever aspectRetriever = retrieverContext.getAspectRetriever(); + + List applicableMCPs = + changeMCPs.stream() + .filter(ConditionalWriteValidator::isApplicableFilter) + .collect(Collectors.toList()); + + // Batch lookup missing data + Map> missingDataUrnAspects = + applicableMCPs.stream() + // create change types not expected to have previous data + .filter(item -> !CREATE_CHANGE_TYPES.contains(item.getChangeType())) + .filter(item -> item.getPreviousSystemAspect() == null) + .collect( + Collectors.groupingBy( + ChangeMCP::getUrn, + Collectors.mapping(ChangeMCP::getAspectName, Collectors.toSet()))); + final Map> resolvedData = + aspectRetriever.getLatestSystemAspects(missingDataUrnAspects); + + for (ChangeMCP item : applicableMCPs) { + // Validate aspect version precondition + if (hasVersionPrecondition(item)) { + item.getHeader(HTTP_HEADER_IF_VERSION_MATCH) + .flatMap( + headerValue -> + validateVersionPrecondition( + item, Pair.of(HTTP_HEADER_IF_VERSION_MATCH, headerValue), resolvedData)) + .ifPresent(exceptions::addException); + } + + // Validate modified time conditional + if (hasTimePrecondition(item)) { + item.getHeader(HttpHeaders.IF_MODIFIED_SINCE) + .flatMap( + headerValue -> + validateTimePrecondition( + item, Pair.of(HttpHeaders.IF_MODIFIED_SINCE, headerValue), resolvedData)) + .ifPresent(exceptions::addException); + item.getHeader(HttpHeaders.IF_UNMODIFIED_SINCE) + .flatMap( + headerValue -> + validateTimePrecondition( + item, Pair.of(HttpHeaders.IF_UNMODIFIED_SINCE, headerValue), resolvedData)) + .ifPresent(exceptions::addException); + } + } + + return exceptions.streamAllExceptions(); + } + + private static Optional validateVersionPrecondition( + ChangeMCP item, + Pair header, + Map> resolvedData) { + final String actualAspectVersion; + switch (item.getChangeType()) { + case CREATE: + case CREATE_ENTITY: + actualAspectVersion = DEFAULT_ASPECT_VERSION; + break; + default: + actualAspectVersion = + resolvePreviousSystemAspect(item, resolvedData) + .map( + prevSystemAspect -> { + if (prevSystemAspect.getSystemMetadataVersion().isPresent()) { + return String.valueOf(prevSystemAspect.getSystemMetadataVersion().get()); + } else { + return String.valueOf(Math.max(1, prevSystemAspect.getVersion())); + } + }) + .orElse(DEFAULT_ASPECT_VERSION); + break; + } + + if (!header.getSecond().equals(actualAspectVersion)) { + return Optional.of( + AspectValidationException.forPrecondition( + item, + String.format( + "Expected version %s, actual version %s", + header.getSecond(), actualAspectVersion))); + } + + return Optional.empty(); + } + + private static Optional validateTimePrecondition( + ChangeMCP item, + Pair header, + Map> resolvedData) { + + final long lastModifiedTimeMs; + switch (item.getChangeType()) { + case CREATE: + case CREATE_ENTITY: + lastModifiedTimeMs = DEFAULT_LAST_MODIFIED_TIME; + break; + default: + lastModifiedTimeMs = + resolvePreviousSystemAspect(item, resolvedData) + .map(systemAspect -> systemAspect.getAuditStamp().getTime()) + .orElse(DEFAULT_LAST_MODIFIED_TIME); + break; + } + + long headerValueEpochMs = Instant.parse(header.getValue()).toEpochMilli(); + + switch (header.getKey()) { + case HttpHeaders.IF_MODIFIED_SINCE: + return lastModifiedTimeMs > headerValueEpochMs + ? Optional.empty() + : Optional.of( + AspectValidationException.forPrecondition( + item, + String.format( + "Item last modified %s <= %s (epoch ms)", + lastModifiedTimeMs, headerValueEpochMs))); + case HttpHeaders.IF_UNMODIFIED_SINCE: + return lastModifiedTimeMs <= headerValueEpochMs + ? Optional.empty() + : Optional.of( + AspectValidationException.forPrecondition( + item, + String.format( + "Item last modified %s > %s (epoch ms)", + lastModifiedTimeMs, headerValueEpochMs))); + default: + return Optional.empty(); + } + } + + private static Optional resolvePreviousSystemAspect( + ChangeMCP item, Map> resolvedData) { + if (item.getPreviousSystemAspect() != null) { + return Optional.of(item.getPreviousSystemAspect()); + } + if (resolvedData.getOrDefault(item.getUrn(), Collections.emptyMap()).get(item.getAspectName()) + != null) { + return Optional.of(resolvedData.get(item.getUrn()).get(item.getAspectName())); + } + return Optional.empty(); + } + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + return Stream.empty(); + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/ConditionalWriteValidatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/ConditionalWriteValidatorTest.java new file mode 100644 index 00000000000000..b62a95d8ab91c2 --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/ConditionalWriteValidatorTest.java @@ -0,0 +1,1098 @@ +package com.linkedin.metadata.aspect.validators; + +import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; + +import com.google.common.net.HttpHeaders; +import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.StringMap; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.validation.ConditionalWriteValidator; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import com.linkedin.test.metadata.aspect.batch.TestSystemAspect; +import java.sql.Timestamp; +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class ConditionalWriteValidatorTest { + private EntityRegistry entityRegistry; + private RetrieverContext mockRetrieverContext; + private static final List supportedChangeTypes = + List.of( + ChangeType.CREATE, + ChangeType.CREATE_ENTITY, + ChangeType.DELETE, + ChangeType.UPSERT, + ChangeType.UPDATE, + ChangeType.RESTATE, + ChangeType.PATCH); + + private static final AspectPluginConfig validatorConfig = + AspectPluginConfig.builder() + .supportedOperations( + supportedChangeTypes.stream().map(Object::toString).collect(Collectors.toList())) + .className(ConditionalWriteValidator.class.getName()) + .supportedEntityAspectNames(List.of(AspectPluginConfig.EntityAspectName.ALL)) + .enabled(true) + .build(); + + @BeforeTest + public void init() { + entityRegistry = new TestEntityRegistry(); + AspectRetriever mockAspectRetriever = mock(AspectRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(entityRegistry); + GraphRetriever mockGraphRetriever = mock(GraphRetriever.class); + mockRetrieverContext = mock(RetrieverContext.class); + when(mockRetrieverContext.getAspectRetriever()).thenReturn(mockAspectRetriever); + when(mockRetrieverContext.getGraphRetriever()).thenReturn(mockGraphRetriever); + } + + @Test + public void testNextVersionSuccess() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + case RESTATE: + case DELETE: + case CREATE_ENTITY: + case CREATE: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "1")) + .build(); + break; + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "1")) + .previousSystemAspect( + TestSystemAspect.builder() + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + // Previous / actual + .systemMetadata(new SystemMetadata().setVersion("1")) + .build()) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + assertEquals(Set.of(), exceptions, "Expected no exceptions for change type " + changeType); + } + } + + @Test + public void testNoSystemMetadataNextVersionNextVersionSuccess() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "1")) + .previousSystemAspect( + TestSystemAspect.builder() + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + // Missing previous system metadata, expect fallback to version + .version(0) + .build()) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + assertEquals(Set.of(), exceptions, "Expected no exceptions for change type " + changeType); + } + } + + @Test + public void testNoPreviousVersionsLookupSchemaMetadataNextVersionSuccess() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + // Prepare mock lookup based on version + when(mockRetrieverContext + .getAspectRetriever() + .getLatestSystemAspects(eq(Map.of(testEntityUrn, Set.of("status"))))) + .thenReturn( + Map.of( + testEntityUrn, + Map.of( + "status", + TestSystemAspect.builder() + .systemMetadata( + new SystemMetadata().setVersion("2")) // expected next version 2 + .build()))); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + case CREATE: + case CREATE_ENTITY: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected is always 1 + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "1")) + .build(); + break; + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "2")) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + assertEquals(Set.of(), exceptions, "Expected no exceptions for change type " + changeType); + } + } + + @Test + public void testNoPreviousVersionsLookupVersionNextVersionSuccess() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + // Prepare mock lookup based on version + when(mockRetrieverContext + .getAspectRetriever() + .getLatestSystemAspects(eq(Map.of(testEntityUrn, Set.of("status"))))) + .thenReturn( + Map.of( + testEntityUrn, + Map.of( + "status", + TestSystemAspect.builder() + .version(2) // expected next version 2 + .build()))); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + case CREATE: + case CREATE_ENTITY: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected is always 1 + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "1")) + .build(); + break; + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "2")) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + assertEquals(Set.of(), exceptions, "Expected no exceptions for change type " + changeType); + } + } + + @Test + public void testNextVersionFail() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + case RESTATE: + case DELETE: + case CREATE_ENTITY: + case CREATE: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Incorrect Expected + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "2")) + .build(); + break; + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Incorrect Expected + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "2")) + .previousSystemAspect( + TestSystemAspect.builder() + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + // Previous / actual + .systemMetadata(new SystemMetadata().setVersion("3")) + .build()) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + switch (changeType) { + case RESTATE: + assertEquals(Set.of(), exceptions, "Expected no exception for change type RESTATE"); + break; + case CREATE: + case CREATE_ENTITY: + case DELETE: + assertEquals(exceptions.size(), 1, "Expected exception for change type " + changeType); + assertEquals( + exceptions.stream().findFirst().get().getMessage(), + "Expected version 2, actual version 1"); + break; + default: + assertEquals(exceptions.size(), 1, "Expected exception for change type " + changeType); + assertEquals( + exceptions.stream().findFirst().get().getMessage(), + "Expected version 2, actual version 3"); + break; + } + } + } + + @Test + public void testNoSystemMetadataNextVersionNextVersionFail() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "2")) + .previousSystemAspect( + TestSystemAspect.builder() + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + // Missing previous system metadata, expect fallback to version + .version(3) + .build()) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + switch (changeType) { + case RESTATE: + assertEquals(Set.of(), exceptions, "Expected no exception for change type RESTATE"); + break; + case CREATE: + case CREATE_ENTITY: + assertEquals(exceptions.size(), 1, "Expected exception for change type " + changeType); + assertEquals( + exceptions.stream().findFirst().get().getMessage(), + "Expected version 2, actual version 1"); + break; + default: + assertEquals(exceptions.size(), 1, "Expected exception for change type " + changeType); + assertEquals( + exceptions.stream().findFirst().get().getMessage(), + "Expected version 2, actual version 3"); + break; + } + } + } + + @Test + public void testNoPreviousVersionsLookupSchemaMetadataNextVersionFail() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + // Prepare mock lookup based on version + when(mockRetrieverContext + .getAspectRetriever() + .getLatestSystemAspects(eq(Map.of(testEntityUrn, Set.of("status"))))) + .thenReturn( + Map.of( + testEntityUrn, + Map.of( + "status", + TestSystemAspect.builder() + .systemMetadata( + new SystemMetadata().setVersion("3")) // expected next version 3 + .build()))); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + case CREATE: + case CREATE_ENTITY: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected is always 1 + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "2")) + .build(); + break; + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "2")) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + switch (changeType) { + case RESTATE: + assertEquals(Set.of(), exceptions, "Expected no exception for change type RESTATE"); + break; + case CREATE: + case CREATE_ENTITY: + assertEquals(exceptions.size(), 1, "Expected exception for change type " + changeType); + assertEquals( + exceptions.stream().findFirst().get().getMessage(), + "Expected version 2, actual version 1"); + break; + default: + assertEquals(exceptions.size(), 1, "Expected exception for change type " + changeType); + assertEquals( + exceptions.stream().findFirst().get().getMessage(), + "Expected version 2, actual version 3"); + break; + } + } + } + + @Test + public void testNoPreviousVersionsLookupVersionNextVersionFail() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + // Prepare mock lookup based on version + when(mockRetrieverContext + .getAspectRetriever() + .getLatestSystemAspects(eq(Map.of(testEntityUrn, Set.of("status"))))) + .thenReturn( + Map.of( + testEntityUrn, + Map.of( + "status", + TestSystemAspect.builder() + .version(3) // expected next version 3 + .build()))); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + case CREATE: + case CREATE_ENTITY: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected is always 1 + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "2")) + .build(); + break; + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .headers(Map.of(HTTP_HEADER_IF_VERSION_MATCH, "2")) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + switch (changeType) { + case RESTATE: + assertEquals(Set.of(), exceptions, "Expected no exception for change type RESTATE"); + break; + case CREATE: + case CREATE_ENTITY: + assertEquals(exceptions.size(), 1, "Expected exception for change type " + changeType); + assertEquals( + exceptions.stream().findFirst().get().getMessage(), + "Expected version 2, actual version 1"); + break; + default: + assertEquals(exceptions.size(), 1, "Expected exception for change type " + changeType); + assertEquals( + exceptions.stream().findFirst().get().getMessage(), + "Expected version 2, actual version 3"); + break; + } + } + } + + @Test + public void testModifiedSinceSuccess() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + StringMap headers = + new StringMap( + Map.of( + HttpHeaders.IF_UNMODIFIED_SINCE, "2024-07-03T00:00:00Z", + HttpHeaders.IF_MODIFIED_SINCE, "2024-07-01T00:00:00Z")); + Timestamp modified = Timestamp.from(Instant.parse("2024-07-02T00:00:00Z")); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + case RESTATE: + case CREATE_ENTITY: + case CREATE: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .metadataChangeProposal( + new MetadataChangeProposal() + .setHeaders( + new StringMap( + Map.of( + HttpHeaders.IF_UNMODIFIED_SINCE, + headers.get(HttpHeaders.IF_UNMODIFIED_SINCE))))) + .build(); + break; + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .metadataChangeProposal(new MetadataChangeProposal().setHeaders(headers)) + .previousSystemAspect( + TestSystemAspect.builder() + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + // Previous / actual + .createdOn(modified) + .createdBy("urn:li:corpuser:test") + .build()) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + assertEquals(Set.of(), exceptions, "Expected no exceptions for change type " + changeType); + } + } + + @Test + public void testNoPreviousLookupAuditStampModifiedSinceSuccess() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + StringMap headers = + new StringMap( + Map.of( + HttpHeaders.IF_UNMODIFIED_SINCE, "2024-07-03T00:00:00Z", + HttpHeaders.IF_MODIFIED_SINCE, "2024-07-01T00:00:00Z")); + Timestamp modified = Timestamp.from(Instant.parse("2024-07-02T00:00:00Z")); + + // Prepare mock lookup + when(mockRetrieverContext + .getAspectRetriever() + .getLatestSystemAspects(eq(Map.of(testEntityUrn, Set.of("status"))))) + .thenReturn( + Map.of( + testEntityUrn, + Map.of( + "status", + TestSystemAspect.builder() + .createdOn(modified) + .createdBy("urn:li:corpuser:test") + .build()))); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + case CREATE: + case CREATE_ENTITY: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + .metadataChangeProposal( + new MetadataChangeProposal() + .setHeaders( + new StringMap( + Map.of( + HttpHeaders.IF_UNMODIFIED_SINCE, + headers.get(HttpHeaders.IF_UNMODIFIED_SINCE))))) + .build(); + break; + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .metadataChangeProposal(new MetadataChangeProposal().setHeaders(headers)) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + assertEquals(Set.of(), exceptions, "Expected no exceptions for change type " + changeType); + } + } + + @Test + public void testModifiedSinceBeforeRangeFail() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + StringMap headers = + new StringMap( + Map.of( + HttpHeaders.IF_UNMODIFIED_SINCE, "2024-07-03T00:00:00Z", + HttpHeaders.IF_MODIFIED_SINCE, "2024-07-01T00:00:00Z")); + Timestamp modified = Timestamp.from(Instant.parse("2024-06-30T00:00:00Z")); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + case RESTATE: + case CREATE_ENTITY: + case CREATE: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .metadataChangeProposal( + new MetadataChangeProposal() + .setHeaders( + new StringMap( + Map.of( + HttpHeaders.IF_UNMODIFIED_SINCE, + headers.get(HttpHeaders.IF_UNMODIFIED_SINCE))))) + .build(); + break; + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .metadataChangeProposal(new MetadataChangeProposal().setHeaders(headers)) + .previousSystemAspect( + TestSystemAspect.builder() + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + // Previous / actual + .createdOn(modified) + .createdBy("urn:li:corpuser:test") + .build()) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + switch (changeType) { + case CREATE: + case CREATE_ENTITY: + case RESTATE: + assertEquals(Set.of(), exceptions, "Expected no exception for change type " + changeType); + break; + default: + assertEquals(exceptions.size(), 1, "Expected exception for change type " + changeType); + assertEquals( + exceptions.stream().findFirst().get().getMessage(), + "Item last modified 1719705600000 <= 1719792000000 (epoch ms)"); + break; + } + } + } + + @Test + public void testModifiedSinceAfterRangeFail() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + StringMap headers = + new StringMap( + Map.of( + HttpHeaders.IF_UNMODIFIED_SINCE, "2024-07-03T00:00:00Z", + HttpHeaders.IF_MODIFIED_SINCE, "2024-07-01T00:00:00Z")); + Timestamp modified = Timestamp.from(Instant.parse("2024-07-04T00:00:00Z")); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + case RESTATE: + case CREATE_ENTITY: + case CREATE: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .metadataChangeProposal( + new MetadataChangeProposal() + .setHeaders( + new StringMap( + Map.of( + HttpHeaders.IF_UNMODIFIED_SINCE, + headers.get(HttpHeaders.IF_UNMODIFIED_SINCE))))) + .build(); + break; + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .metadataChangeProposal(new MetadataChangeProposal().setHeaders(headers)) + .previousSystemAspect( + TestSystemAspect.builder() + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + // Previous / actual + .createdOn(modified) + .createdBy("urn:li:corpuser:test") + .build()) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + switch (changeType) { + case CREATE: + case CREATE_ENTITY: + case RESTATE: + assertEquals(Set.of(), exceptions, "Expected no exception for change type " + changeType); + break; + default: + assertEquals(exceptions.size(), 1, "Expected exception for change type " + changeType); + assertEquals( + exceptions.stream().findFirst().get().getMessage(), + "Item last modified 1720051200000 > 1719964800000 (epoch ms)"); + break; + } + } + } + + @Test + public void testNoPreviousLookupAuditStampModifiedSinceBeforeRangeFail() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + StringMap headers = + new StringMap( + Map.of( + HttpHeaders.IF_UNMODIFIED_SINCE, "2024-07-03T00:00:00Z", + HttpHeaders.IF_MODIFIED_SINCE, "2024-07-01T00:00:00Z")); + Timestamp modified = Timestamp.from(Instant.parse("2024-06-30T00:00:00Z")); + + // Prepare mock lookup + when(mockRetrieverContext + .getAspectRetriever() + .getLatestSystemAspects(eq(Map.of(testEntityUrn, Set.of("status"))))) + .thenReturn( + Map.of( + testEntityUrn, + Map.of( + "status", + TestSystemAspect.builder() + .createdOn(modified) + .createdBy("urn:li:corpuser:test") + .build()))); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + case CREATE: + case CREATE_ENTITY: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + .metadataChangeProposal( + new MetadataChangeProposal() + .setHeaders( + new StringMap( + Map.of( + HttpHeaders.IF_UNMODIFIED_SINCE, + headers.get(HttpHeaders.IF_UNMODIFIED_SINCE))))) + .build(); + break; + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .metadataChangeProposal(new MetadataChangeProposal().setHeaders(headers)) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + switch (changeType) { + case CREATE: + case CREATE_ENTITY: + case RESTATE: + assertEquals(Set.of(), exceptions, "Expected no exception for change type " + changeType); + break; + default: + assertEquals(exceptions.size(), 1, "Expected exception for change type " + changeType); + assertEquals( + exceptions.stream().findFirst().get().getMessage(), + "Item last modified 1719705600000 <= 1719792000000 (epoch ms)"); + break; + } + } + } + + @Test + public void testNoPreviousLookupAuditStampModifiedSinceAfterRangeFail() { + ConditionalWriteValidator test = new ConditionalWriteValidator().setConfig(validatorConfig); + Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); + + StringMap headers = + new StringMap( + Map.of( + HttpHeaders.IF_UNMODIFIED_SINCE, "2024-07-03T00:00:00Z", + HttpHeaders.IF_MODIFIED_SINCE, "2024-07-01T00:00:00Z")); + Timestamp modified = Timestamp.from(Instant.parse("2024-07-04T00:00:00Z")); + + // Prepare mock lookup + when(mockRetrieverContext + .getAspectRetriever() + .getLatestSystemAspects(eq(Map.of(testEntityUrn, Set.of("status"))))) + .thenReturn( + Map.of( + testEntityUrn, + Map.of( + "status", + TestSystemAspect.builder() + .createdOn(modified) + .createdBy("urn:li:corpuser:test") + .build()))); + + for (ChangeType changeType : supportedChangeTypes) { + final ChangeMCP testMCP; + switch (changeType) { + case CREATE: + case CREATE_ENTITY: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + .metadataChangeProposal( + new MetadataChangeProposal() + .setHeaders( + new StringMap( + Map.of( + HttpHeaders.IF_UNMODIFIED_SINCE, + headers.get(HttpHeaders.IF_UNMODIFIED_SINCE))))) + .build(); + break; + default: + testMCP = + TestMCP.builder() + .changeType(changeType) + .urn(testEntityUrn) + .entitySpec(entityRegistry.getEntitySpec(testEntityUrn.getEntityType())) + .aspectSpec( + entityRegistry + .getEntitySpec(testEntityUrn.getEntityType()) + .getAspectSpec("status")) + .recordTemplate(new Status().setRemoved(false)) + // Expected + .metadataChangeProposal(new MetadataChangeProposal().setHeaders(headers)) + .build(); + break; + } + + Set exceptions = + test.validatePreCommit(List.of(testMCP), mockRetrieverContext) + .collect(Collectors.toSet()); + + switch (changeType) { + case CREATE: + case CREATE_ENTITY: + case RESTATE: + assertEquals(Set.of(), exceptions, "Expected no exception for change type " + changeType); + break; + default: + assertEquals(exceptions.size(), 1, "Expected exception for change type " + changeType); + assertEquals( + exceptions.stream().findFirst().get().getMessage(), + "Item last modified 1720051200000 > 1719964800000 (epoch ms)"); + break; + } + } + } +} diff --git a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java index 62e22efa9da165..65705f15022b6b 100644 --- a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java +++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java @@ -6,9 +6,14 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.Aspect; import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.mxe.SystemMetadata; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.test.metadata.aspect.batch.TestSystemAspect; import com.linkedin.util.Pair; +import java.sql.Timestamp; +import java.time.Instant; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -19,6 +24,7 @@ public class MockAspectRetriever implements AspectRetriever { private final Map> data; + private final Map> systemData = new HashMap<>(); public MockAspectRetriever(@Nonnull Map> data) { this.data = @@ -39,6 +45,21 @@ public MockAspectRetriever(@Nonnull Map> data) { }) .collect(Collectors.toMap(Pair::getKey, Pair::getValue)))) .collect(Collectors.toMap(Pair::getKey, Pair::getValue))); + for (Map.Entry> urnEntry : this.data.entrySet()) { + for (Map.Entry aspectEntry : urnEntry.getValue().entrySet()) { + systemData + .computeIfAbsent(urnEntry.getKey(), urn -> new HashMap<>()) + .computeIfAbsent( + aspectEntry.getKey(), + aspectName -> + TestSystemAspect.builder() + .urn(urnEntry.getKey()) + .version(0) + .systemMetadata(new SystemMetadata().setVersion("1")) + .createdOn(Timestamp.from(Instant.now())) + .build()); + } + } } public MockAspectRetriever( @@ -60,6 +81,16 @@ public Map> getLatestAspectObjects( .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); } + @Nonnull + @Override + public Map> getLatestSystemAspects( + Map> urnAspectNames) { + return urnAspectNames.keySet().stream() + .filter(systemData::containsKey) + .map(urn -> Pair.of(urn, systemData.get(urn))) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + } + @Nonnull @Override public EntityRegistry getEntityRegistry() { diff --git a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java index 1e1efe4238187b..e562390a959a74 100644 --- a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java +++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java @@ -20,7 +20,10 @@ import com.linkedin.test.metadata.aspect.TestEntityRegistry; import java.net.URISyntaxException; import java.util.Collection; +import java.util.Map; +import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.Builder; @@ -119,10 +122,22 @@ public static Set ofOneMCP( private MetadataChangeProposal metadataChangeProposal; @Setter private SystemAspect previousSystemAspect; @Setter private long nextAspectVersion; + @Setter private Map headers; @Nonnull @Override public SystemAspect getSystemAspect(@Nullable Long nextAspectVersion) { return null; } + + @Override + public Map getHeaders() { + return Optional.ofNullable(metadataChangeProposal) + .filter(MetadataChangeProposal::hasHeaders) + .map( + mcp -> + mcp.getHeaders().entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))) + .orElse(headers); + } } diff --git a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestSystemAspect.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestSystemAspect.java new file mode 100644 index 00000000000000..54afdd214f2b23 --- /dev/null +++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestSystemAspect.java @@ -0,0 +1,26 @@ +package com.linkedin.test.metadata.aspect.batch; + +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.SystemAspect; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.mxe.SystemMetadata; +import java.sql.Timestamp; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; + +@Builder +@Getter +@EqualsAndHashCode +public class TestSystemAspect implements SystemAspect { + private Urn urn; + private long version; + private RecordTemplate recordTemplate; + private SystemMetadata systemMetadata; + private EntitySpec entitySpec; + private AspectSpec aspectSpec; + private Timestamp createdOn; + private String createdBy; +} diff --git a/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc b/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc index 12ace42f3af273..126d5cf6bcee38 100644 --- a/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc +++ b/metadata-integration/java/datahub-client/src/main/resources/MetadataChangeProposal.avsc @@ -167,9 +167,22 @@ } ], "doc" : "Additional properties", "default" : null + }, { + "name" : "version", + "type" : [ "null", "string" ], + "doc" : "Aspect version\n Initial implementation will use the aspect version's number, however stored as\n a string in the case where a different aspect versioning scheme is later adopted.", + "default" : null } ] } ], - "doc" : "A string->string map of custom properties that one might want to attach to an event", + "doc" : "System properties that one might want to attach to an event", + "default" : null + }, { + "name" : "headers", + "type" : [ "null", { + "type" : "map", + "values" : "string" + } ], + "doc" : "Headers - intended to mimic http headers", "default" : null } ] } \ No newline at end of file diff --git a/metadata-integration/java/datahub-event/src/main/resources/MetadataChangeProposal.avsc b/metadata-integration/java/datahub-event/src/main/resources/MetadataChangeProposal.avsc index 12ace42f3af273..126d5cf6bcee38 100644 --- a/metadata-integration/java/datahub-event/src/main/resources/MetadataChangeProposal.avsc +++ b/metadata-integration/java/datahub-event/src/main/resources/MetadataChangeProposal.avsc @@ -167,9 +167,22 @@ } ], "doc" : "Additional properties", "default" : null + }, { + "name" : "version", + "type" : [ "null", "string" ], + "doc" : "Aspect version\n Initial implementation will use the aspect version's number, however stored as\n a string in the case where a different aspect versioning scheme is later adopted.", + "default" : null } ] } ], - "doc" : "A string->string map of custom properties that one might want to attach to an event", + "doc" : "System properties that one might want to attach to an event", + "default" : null + }, { + "name" : "headers", + "type" : [ "null", { + "type" : "map", + "values" : "string" + } ], + "doc" : "Headers - intended to mimic http headers", "default" : null } ] } \ No newline at end of file diff --git a/metadata-integration/java/openlineage-converter/build.gradle b/metadata-integration/java/openlineage-converter/build.gradle index 4fb35f6b3c563c..145a88229ad4af 100644 --- a/metadata-integration/java/openlineage-converter/build.gradle +++ b/metadata-integration/java/openlineage-converter/build.gradle @@ -24,9 +24,7 @@ dependencies { implementation externalDependency.json - testImplementation externalDependency.junit - // Use JUnit Jupiter for testing. - testImplementation 'org.junit.jupiter:junit-jupiter:5.9.2' + testImplementation externalDependency.testng } jacocoTestReport { diff --git a/metadata-integration/java/openlineage-converter/src/test/java/io/datahubproject/openlineage/HdfsPathDatasetTest.java b/metadata-integration/java/openlineage-converter/src/test/java/io/datahubproject/openlineage/HdfsPathDatasetTest.java index 3693ddc15e6f06..82ba172f841918 100644 --- a/metadata-integration/java/openlineage-converter/src/test/java/io/datahubproject/openlineage/HdfsPathDatasetTest.java +++ b/metadata-integration/java/openlineage-converter/src/test/java/io/datahubproject/openlineage/HdfsPathDatasetTest.java @@ -13,8 +13,8 @@ import java.util.LinkedList; import java.util.List; import java.util.Optional; -import org.junit.Assert; -import org.junit.Test; +import org.testng.Assert; +import org.testng.annotations.Test; public class HdfsPathDatasetTest { @@ -42,7 +42,7 @@ public void testPathSpecList() "s3", Collections.singletonList( PathSpec.builder() - .env("PROD") + .env(Optional.of("PROD")) .platform("s3") .pathSpecList( new LinkedList<>( @@ -89,7 +89,7 @@ public void testNoMatchPathSpecList() "s3", Collections.singletonList( PathSpec.builder() - .env("PROD") + .env(Optional.of("PROD")) .platform("s3") .pathSpecList( new LinkedList<>( @@ -121,7 +121,7 @@ public void testPathSpecListPlatformInstance() "s3", Collections.singletonList( PathSpec.builder() - .env("PROD") + .env(Optional.of("PROD")) .platform("s3") .pathSpecList( new LinkedList<>( @@ -154,7 +154,7 @@ public void testPathSpecListPathSpecPlatformInstance() "s3", Collections.singletonList( PathSpec.builder() - .env("PROD") + .env(Optional.of("PROD")) .platform("s3") .platformInstance(Optional.of("s3Instance")) .pathSpecList( @@ -187,7 +187,7 @@ public void testPathAliasList() "s3", Collections.singletonList( PathSpec.builder() - .env("PROD") + .env(Optional.of("PROD")) .platform("s3") .pathSpecList( new LinkedList<>( @@ -219,7 +219,7 @@ public void testGcsNoPathSpecList() "s3", Collections.singletonList( PathSpec.builder() - .env("PROD") + .env(Optional.of("PROD")) .platform("gcs") .pathSpecList( new LinkedList<>( @@ -249,7 +249,7 @@ public void testGcsPathSpecList() "s3", Collections.singletonList( PathSpec.builder() - .env("PROD") + .env(Optional.of("PROD")) .platform("gcs") .pathSpecList( new LinkedList<>( diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java index 30e9251982f10f..6f45a36d1daf46 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java @@ -4,6 +4,7 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.StringMap; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.SystemAspect; @@ -23,7 +24,10 @@ import com.linkedin.mxe.SystemMetadata; import java.io.IOException; import java.sql.Timestamp; +import java.util.Map; import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.Builder; @@ -69,7 +73,7 @@ public static ChangeItemImpl fromPatch( @Nonnull private final RecordTemplate recordTemplate; - @Nonnull private final SystemMetadata systemMetadata; + @Nonnull private SystemMetadata systemMetadata; @Nonnull private final AuditStamp auditStamp; @@ -80,19 +84,44 @@ public static ChangeItemImpl fromPatch( @Nonnull private final AspectSpec aspectSpec; @Setter @Nullable private SystemAspect previousSystemAspect; - @Setter private long nextAspectVersion; + private long nextAspectVersion; + private final Map headers; + + @Override + public void setNextAspectVersion(long nextAspectVersion) { + this.nextAspectVersion = nextAspectVersion; + try { + this.systemMetadata = new SystemMetadata(getSystemMetadata().copy().data()); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + this.systemMetadata.setVersion(String.valueOf(nextAspectVersion + 1)); + } @Nonnull @Override - public SystemAspect getSystemAspect(@Nullable Long version) { + public SystemAspect getSystemAspect(@Nullable Long nextAspectVersion) { EntityAspect entityAspect = new EntityAspect(); entityAspect.setAspect(getAspectName()); entityAspect.setMetadata(EntityApiUtils.toJsonAspect(getRecordTemplate())); entityAspect.setUrn(getUrn().toString()); - entityAspect.setVersion(version == null ? getNextAspectVersion() : version); + entityAspect.setVersion(nextAspectVersion == null ? getNextAspectVersion() : nextAspectVersion); entityAspect.setCreatedOn(new Timestamp(getAuditStamp().getTime())); entityAspect.setCreatedBy(getAuditStamp().getActor().toString()); - entityAspect.setSystemMetadata(EntityApiUtils.toJsonAspect(getSystemMetadata())); + if (nextAspectVersion != null) { + // Apply version to system metadata (copy to ensure we don't pollute shared systemMetadata + // objects across aspects) + SystemMetadata updatedSystemMetadata = null; + try { + updatedSystemMetadata = new SystemMetadata(getSystemMetadata().copy().data()); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + updatedSystemMetadata.setVersion(String.valueOf(nextAspectVersion + 1)); + entityAspect.setSystemMetadata(EntityApiUtils.toJsonAspect(updatedSystemMetadata)); + } else { + entityAspect.setSystemMetadata(EntityApiUtils.toJsonAspect(getSystemMetadata())); + } return EntityAspect.EntitySystemAspect.builder() .build(getEntitySpec(), getAspectSpec(), entityAspect); } @@ -112,10 +141,24 @@ public MetadataChangeProposal getMetadataChangeProposal() { mcp.setEntityKeyAspect( GenericRecordUtils.serializeAspect( EntityKeyUtils.convertUrnToEntityKey(getUrn(), entitySpec.getKeyAspectSpec()))); + if (!headers.isEmpty()) { + mcp.setHeaders(new StringMap(headers)); + } return mcp; } } + @Override + public Map getHeaders() { + return Optional.ofNullable(metadataChangeProposal) + .filter(MetadataChangeProposal::hasHeaders) + .map( + mcp -> + mcp.getHeaders().entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))) + .orElse(headers); + } + public static class ChangeItemImplBuilder { // Ensure use of other builders @@ -133,6 +176,11 @@ public ChangeItemImpl build(AspectRetriever aspectRetriever) { // Apply change type default this.changeType = validateOrDefaultChangeType(changeType); + // Apply empty headers + if (this.headers == null) { + this.headers = Map.of(); + } + ValidationApiUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); log.debug("entity type = {}", this.urn.getEntityType()); @@ -156,7 +204,8 @@ public ChangeItemImpl build(AspectRetriever aspectRetriever) { this.entitySpec, this.aspectSpec, this.previousSystemAspect, - this.nextAspectVersion); + this.nextAspectVersion, + this.headers); } public static ChangeItemImpl build( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java index 6f0cd51af0793b..a4b2e991b6e1e0 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java @@ -12,6 +12,7 @@ import com.linkedin.common.BrowsePathsV2; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.SetMode; import com.linkedin.data.template.StringArray; import com.linkedin.dataplatform.DataPlatformInfo; import com.linkedin.entity.EntityResponse; @@ -29,6 +30,7 @@ import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import java.util.Collection; @@ -348,7 +350,14 @@ public static MetadataChangeProposal getProposalFromAspectForDefault( // Set fields determined from original if (templateItem.getSystemMetadata() != null) { - proposal.setSystemMetadata(templateItem.getSystemMetadata()); + SystemMetadata systemMetadata = null; + try { + systemMetadata = new SystemMetadata(templateItem.getSystemMetadata().copy().data()); + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + systemMetadata.setVersion(null, SetMode.REMOVE_IF_NULL); + proposal.setSystemMetadata(systemMetadata); } if (templateItem.getUrn() != null) { proposal.setEntityUrn(templateItem.getUrn()); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java index 3ee382ab0d74f3..bba8324d0c5612 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/EntityClientAspectRetriever.java @@ -4,12 +4,15 @@ import com.linkedin.entity.Aspect; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.aspect.CachingAspectRetriever; +import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.r2.RemoteInvocationException; import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; +import java.util.Collection; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.Builder; @@ -52,4 +55,25 @@ public Map> getLatestAspectObjects( } } } + + @Nonnull + @Override + public Map> getLatestSystemAspects( + Map> urnAspectNames) { + if (urnAspectNames.isEmpty()) { + return Map.of(); + } else { + try { + // TODO: This generates over-fetching if not all aspects are needed for each URN + return entityClient.getLatestSystemAspect( + systemOperationContext, + urnAspectNames.keySet(), + urnAspectNames.values().stream() + .flatMap(Collection::stream) + .collect(Collectors.toSet())); + } catch (RemoteInvocationException | URISyntaxException e) { + throw new RuntimeException(e); + } + } + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java index 53772207585341..626a1f72f5fb73 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceAspectRetriever.java @@ -1,15 +1,19 @@ package com.linkedin.metadata.entity; import static com.linkedin.metadata.utils.GenericRecordUtils.entityResponseToAspectMap; +import static com.linkedin.metadata.utils.GenericRecordUtils.entityResponseToSystemAspectMap; import com.linkedin.common.urn.Urn; import com.linkedin.entity.Aspect; import com.linkedin.metadata.aspect.CachingAspectRetriever; +import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.models.registry.EntityRegistry; import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; +import java.util.Collection; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.Builder; @@ -48,4 +52,30 @@ public Map> getLatestAspectObjects( } } } + + @Nonnull + @Override + public Map> getLatestSystemAspects( + Map> urnAspectNames) { + if (urnAspectNames.isEmpty()) { + return Map.of(); + } else { + String entityName = + urnAspectNames.keySet().stream().findFirst().map(Urn::getEntityType).get(); + try { + // TODO - This causes over-fetching if not all aspects are required for every URN + return entityResponseToSystemAspectMap( + entityService.getEntitiesV2( + systemOperationContext, + entityName, + urnAspectNames.keySet(), + urnAspectNames.values().stream() + .flatMap(Collection::stream) + .collect(Collectors.toSet())), + entityRegistry); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 34c836d760a7d7..6f2f95b8b115ab 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -10,6 +10,7 @@ import static com.linkedin.metadata.utils.PegasusUtils.constructMCL; import static com.linkedin.metadata.utils.PegasusUtils.getDataTemplateClassFromSchema; import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; +import static com.linkedin.metadata.utils.metrics.ExceptionUtils.collectMetrics; import com.codahale.metrics.Timer; import com.datahub.util.RecordUtils; @@ -408,23 +409,102 @@ public Map> getLatestEnvelopedAspects( @Nonnull OperationContext opContext, @Nonnull Set urns, @Nonnull Set aspectNames, - boolean alwaysIncludeKeyAspect) - throws URISyntaxException { + boolean alwaysIncludeKeyAspect) { - final Set dbKeys = + return getEnvelopedVersionedAspects( + opContext, urns.stream() .map( urn -> - aspectNames.stream() - .map( - aspectName -> - new EntityAspectIdentifier( - urn.toString(), aspectName, ASPECT_LATEST_VERSION)) - .collect(Collectors.toList())) - .flatMap(List::stream) + Map.entry( + urn, + aspectNames.stream() + .map(aspectName -> Map.entry(aspectName, 0L)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)), + alwaysIncludeKeyAspect); + } + + @Override + public Map> getEnvelopedVersionedAspects( + @Nonnull OperationContext opContext, + @Nonnull Map> urnAspectVersions, + boolean alwaysIncludeKeyAspect) { + + // we will always need to fetch latest aspects in case the requested version is version 0 being + // requested with version != 0 + Map>> withLatest = + urnAspectVersions.entrySet().stream() + .map( + entry -> + Map.entry( + entry.getKey(), + entry.getValue().entrySet().stream() + .map( + aspectEntry -> + Map.entry( + aspectEntry.getKey(), + Stream.of(0L, aspectEntry.getValue()) + .collect(Collectors.toSet()))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + Map> latestResult = + getEnvelopedVersionedAspectsInternal(opContext, withLatest, alwaysIncludeKeyAspect); + + return latestResult.entrySet().stream() + .collect( + Collectors.toMap( + Map.Entry::getKey, + a -> + a.getValue().stream() + .filter( + v -> + matchVersion(v, urnAspectVersions.get(a.getKey()).get(v.getName()))) + .collect(Collectors.toList()))); + } + + private static boolean matchVersion( + @Nonnull EnvelopedAspect envelopedAspect, @Nullable Long expectedVersion) { + if (expectedVersion == null) { + return true; + } + if (Objects.equals(envelopedAspect.getVersion(GetMode.NULL), expectedVersion)) { + return true; + } + if (envelopedAspect.hasSystemMetadata() + && envelopedAspect.getSystemMetadata().hasVersion() + && envelopedAspect.getSystemMetadata().getVersion() != null) { + return Objects.equals( + Long.parseLong(envelopedAspect.getSystemMetadata().getVersion()), expectedVersion); + } + + return false; + } + + private Map> getEnvelopedVersionedAspectsInternal( + @Nonnull OperationContext opContext, + @Nonnull Map>> urnAspectVersions, + boolean alwaysIncludeKeyAspect) { + final Set dbKeys = + urnAspectVersions.entrySet().stream() + .flatMap( + entry -> { + Urn urn = entry.getKey(); + return entry.getValue().entrySet().stream() + .flatMap( + aspectNameVersion -> + aspectNameVersion.getValue().stream() + .map( + version -> + new EntityAspectIdentifier( + urn.toString(), + aspectNameVersion.getKey(), + version))); + }) .collect(Collectors.toSet()); - return getCorrespondingAspects(opContext, dbKeys, urns, alwaysIncludeKeyAspect); + return getCorrespondingAspects(opContext, dbKeys, alwaysIncludeKeyAspect); } /** @@ -481,22 +561,17 @@ public Map> getVersionedEnvelopedAspects( .flatMap(List::stream) .collect(Collectors.toSet())); - return getCorrespondingAspects( - opContext, - dbKeys, - versionedUrns.stream() - .map(versionedUrn -> versionedUrn.getUrn().toString()) - .map(UrnUtils::getUrn) - .collect(Collectors.toSet()), - alwaysIncludeKeyAspect); + return getCorrespondingAspects(opContext, dbKeys, alwaysIncludeKeyAspect); } private Map> getCorrespondingAspects( @Nonnull OperationContext opContext, Set dbKeys, - Set urns, boolean alwaysIncludeKeyAspect) { + Set urns = + dbKeys.stream().map(dbKey -> UrnUtils.getUrn(dbKey.getUrn())).collect(Collectors.toSet()); + final Map envelopedAspectMap = getEnvelopedAspects(opContext, dbKeys); @@ -779,9 +854,9 @@ private List ingestAspectsToLocalDB( EntityUtils.toSystemAspects( opContext.getRetrieverContext().get(), aspectDao.getLatestAspects(urnAspects)); - // read #2 + // read #2 (potentially) final Map> nextVersions = - aspectDao.getNextVersions(urnAspects); + EntityUtils.calculateNextVersions(aspectDao, latestAspects, urnAspects); // 1. Convert patches to full upserts // 2. Run any entity/aspect level hooks @@ -796,10 +871,13 @@ private List ingestAspectsToLocalDB( EntityUtils.toSystemAspects( opContext.getRetrieverContext().get(), aspectDao.getLatestAspects(updatedItems.getFirst())); - Map> newNextVersions = - aspectDao.getNextVersions(updatedItems.getFirst()); // merge updatedLatestAspects = AspectsBatch.merge(latestAspects, newLatestAspects); + + Map> newNextVersions = + EntityUtils.calculateNextVersions( + aspectDao, updatedLatestAspects, updatedItems.getFirst()); + // merge updatedNextVersions = AspectsBatch.merge(nextVersions, newNextVersions); } else { updatedLatestAspects = latestAspects; @@ -836,16 +914,16 @@ private List ingestAspectsToLocalDB( }) .collect(Collectors.toList()); + // No changes, return + if (changeMCPs.isEmpty()) { + return Collections.emptyList(); + } + // do final pre-commit checks with previous aspect value ValidationExceptionCollection exceptions = AspectsBatch.validatePreCommit(changeMCPs, opContext.getRetrieverContext().get()); if (!exceptions.isEmpty()) { - throw new ValidationException(exceptions.toString()); - } - - // No changes, return - if (changeMCPs.isEmpty()) { - return Collections.emptyList(); + throw new ValidationException(collectMetrics(exceptions).toString()); } // Database Upsert results @@ -2132,7 +2210,7 @@ private RollbackResult deleteAspectWithoutMCL( ValidationExceptionCollection exceptions = AspectsBatch.validateProposed(List.of(deleteItem), opContext.getRetrieverContext().get()); if (!exceptions.isEmpty()) { - throw new ValidationException(exceptions.toString()); + throw new ValidationException(collectMetrics(exceptions).toString()); } final RollbackResult result = @@ -2206,7 +2284,7 @@ private RollbackResult deleteAspectWithoutMCL( .collect(Collectors.toList()), opContext.getRetrieverContext().get()); if (!preCommitExceptions.isEmpty()) { - throw new ValidationException(preCommitExceptions.toString()); + throw new ValidationException(collectMetrics(preCommitExceptions).toString()); } // 5. Apply deletes and fix up latest row diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java index e542b10af4ddc6..7842365ce429be 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java @@ -34,7 +34,9 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @@ -159,7 +161,7 @@ static EntityResponse toEntityResponse( * Prefer batched interfaces * * @param entityAspect optional entity aspect - * @param aspectRetriever + * @param retrieverContext * @return */ public static Optional toSystemAspect( @@ -175,7 +177,7 @@ public static Optional toSystemAspect( * translate that into our java classes * * @param rawAspects `Map>` - * @param aspectRetriever used for read mutations + * @param retrieverContext used for read mutations * @return the java map for the given database object map */ @Nonnull @@ -278,4 +280,73 @@ public static List toSystemAspects( return systemAspects; } + + /** + * Use the precalculated next version from system metadata if it exists, otherwise lookup the next + * version the normal way from the database + * + * @param aspectDao database access + * @param latestAspects aspect version 0 with system metadata + * @param urnAspects urn/aspects which we need next version information for + * @return map of the urn/aspect to the next aspect version + */ + public static Map> calculateNextVersions( + AspectDao aspectDao, + Map> latestAspects, + Map> urnAspects) { + Map> precalculatedVersions = + latestAspects.entrySet().stream() + .map( + entry -> + Map.entry( + entry.getKey(), convertSystemAspectToNextVersionMap(entry.getValue()))) + .filter(entry -> !entry.getValue().isEmpty()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + Map> missingAspectVersions = + urnAspects.entrySet().stream() + .flatMap( + entry -> + entry.getValue().stream() + .map(aspectName -> Pair.of(entry.getKey(), aspectName))) + .filter( + urnAspectName -> + !precalculatedVersions + .getOrDefault(urnAspectName.getKey(), Map.of()) + .containsKey(urnAspectName.getValue())) + .collect( + Collectors.groupingBy( + Pair::getKey, Collectors.mapping(Pair::getValue, Collectors.toSet()))); + Map> databaseVersions = + missingAspectVersions.isEmpty() + ? Map.of() + : aspectDao.getNextVersions(missingAspectVersions); + + // stitch back together the precalculated and database versions + return Stream.concat( + precalculatedVersions.entrySet().stream(), databaseVersions.entrySet().stream()) + .collect( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (m1, m2) -> + Stream.concat(m1.entrySet().stream(), m2.entrySet().stream()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))); + } + + /** + * Given a map of aspect name to system aspect, extract the next version if it exists + * + * @param aspectMap aspect name to system aspect map + * @return aspect name to next aspect version + */ + private static Map convertSystemAspectToNextVersionMap( + Map aspectMap) { + return aspectMap.entrySet().stream() + .filter(entry -> entry.getValue().getVersion() == 0) + .map(entry -> Map.entry(entry.getKey(), entry.getValue().getSystemMetadataVersion())) + .filter(entry -> entry.getValue().isPresent()) + .map(entry -> Map.entry(entry.getKey(), entry.getValue().get())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectGenerationUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectGenerationUtils.java index 9a64e397c9184e..b9a1817f476fba 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/AspectGenerationUtils.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectGenerationUtils.java @@ -6,6 +6,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.SetMode; import com.linkedin.dataset.UpstreamArray; import com.linkedin.dataset.UpstreamLineage; import com.linkedin.identity.CorpUserInfo; @@ -14,6 +15,7 @@ import com.linkedin.metadata.utils.PegasusUtils; import com.linkedin.mxe.SystemMetadata; import javax.annotation.Nonnull; +import javax.annotation.Nullable; public class AspectGenerationUtils { @@ -29,18 +31,28 @@ public static SystemMetadata createSystemMetadata() { return createSystemMetadata(1625792689, "run-123"); } + @Nonnull + public static SystemMetadata createSystemMetadata(long nextAspectVersion) { + return createSystemMetadata( + 1625792689, "run-123", "run-123", String.valueOf(nextAspectVersion)); + } + @Nonnull public static SystemMetadata createSystemMetadata(long lastObserved, @Nonnull String runId) { - return createSystemMetadata(lastObserved, runId, runId); + return createSystemMetadata(lastObserved, runId, runId, null); } @Nonnull public static SystemMetadata createSystemMetadata( - long lastObserved, @Nonnull String runId, @Nonnull String lastRunId) { + long lastObserved, + @Nonnull String runId, + @Nonnull String lastRunId, + @Nullable String version) { SystemMetadata metadata = new SystemMetadata(); metadata.setLastObserved(lastObserved); metadata.setRunId(runId); metadata.setLastRunId(lastRunId); + metadata.setVersion(version, SetMode.IGNORE_NULL); return metadata; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index 91b01c55aac396..33598be8fc72b2 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -520,7 +520,7 @@ public void testReingestAspectsGetLatestAspects() throws Exception { String aspectName1 = AspectGenerationUtils.getAspectName(writeAspect1); pairToIngest.add(getAspectRecordPair(writeAspect1, CorpUserInfo.class)); - SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(); + SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(1); _entityServiceImpl.ingestAspects( opContext, entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); @@ -586,10 +586,12 @@ public void testReingestLineageAspect() throws Exception { String aspectName1 = AspectGenerationUtils.getAspectName(upstreamLineage); pairToIngest.add(getAspectRecordPair(upstreamLineage, UpstreamLineage.class)); - SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(); - _entityServiceImpl.ingestAspects( - opContext, entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); + opContext, + entityUrn, + pairToIngest, + TEST_AUDIT_STAMP, + AspectGenerationUtils.createSystemMetadata()); final MetadataChangeLog initialChangeLog = new MetadataChangeLog(); initialChangeLog.setEntityType(entityUrn.getEntityType()); @@ -601,7 +603,7 @@ public void testReingestLineageAspect() throws Exception { GenericAspect aspect = GenericRecordUtils.serializeAspect(pairToIngest.get(0).getSecond()); initialChangeLog.setAspect(aspect); - initialChangeLog.setSystemMetadata(metadata1); + initialChangeLog.setSystemMetadata(AspectGenerationUtils.createSystemMetadata(1)); initialChangeLog.setEntityKeyAspect( GenericRecordUtils.serializeAspect( EntityKeyUtils.convertUrnToEntityKey( @@ -615,9 +617,10 @@ public void testReingestLineageAspect() throws Exception { restateChangeLog.setAspectName(aspectName1); restateChangeLog.setCreated(TEST_AUDIT_STAMP); restateChangeLog.setAspect(aspect); - restateChangeLog.setSystemMetadata(metadata1); + restateChangeLog.setSystemMetadata(AspectGenerationUtils.createSystemMetadata(1)); restateChangeLog.setPreviousAspectValue(aspect); - restateChangeLog.setPreviousSystemMetadata(simulatePullFromDB(metadata1, SystemMetadata.class)); + restateChangeLog.setPreviousSystemMetadata( + simulatePullFromDB(AspectGenerationUtils.createSystemMetadata(1), SystemMetadata.class)); restateChangeLog.setEntityKeyAspect( GenericRecordUtils.serializeAspect( EntityKeyUtils.convertUrnToEntityKey( @@ -638,7 +641,11 @@ public void testReingestLineageAspect() throws Exception { clearInvocations(_mockProducer); _entityServiceImpl.ingestAspects( - opContext, entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); + opContext, + entityUrn, + pairToIngest, + TEST_AUDIT_STAMP, + AspectGenerationUtils.createSystemMetadata()); verify(_mockProducer, times(1)) .produceMetadataChangeLog( @@ -658,7 +665,7 @@ public void testReingestLineageProposal() throws Exception { final UpstreamLineage upstreamLineage = AspectGenerationUtils.createUpstreamLineage(); String aspectName1 = AspectGenerationUtils.getAspectName(upstreamLineage); - SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(); + SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(1); MetadataChangeProposal mcp1 = new MetadataChangeProposal(); mcp1.setEntityType(entityUrn.getEntityType()); GenericAspect genericAspect = GenericRecordUtils.serializeAspect(upstreamLineage); @@ -687,7 +694,7 @@ public void testReingestLineageProposal() throws Exception { restateChangeLog.setAspectName(aspectName1); restateChangeLog.setCreated(TEST_AUDIT_STAMP); restateChangeLog.setAspect(genericAspect); - restateChangeLog.setSystemMetadata(metadata1); + restateChangeLog.setSystemMetadata(AspectGenerationUtils.createSystemMetadata(1)); restateChangeLog.setPreviousAspectValue(genericAspect); restateChangeLog.setPreviousSystemMetadata(simulatePullFromDB(metadata1, SystemMetadata.class)); @@ -1177,8 +1184,10 @@ public void testIngestGetLatestAspect() throws AssertionError { CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); String aspectName = AspectGenerationUtils.getAspectName(writeAspect1); - SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(1625792689, "run-123"); - SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-456"); + SystemMetadata metadata1 = + AspectGenerationUtils.createSystemMetadata(1625792689, "run-123", "run-123", "1"); + SystemMetadata metadata2 = + AspectGenerationUtils.createSystemMetadata(1635792689, "run-456", "run-456", "2"); List items = List.of( @@ -1287,8 +1296,10 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); String aspectName = AspectGenerationUtils.getAspectName(writeAspect1); - SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(1625792689, "run-123"); - SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-456"); + SystemMetadata metadata1 = + AspectGenerationUtils.createSystemMetadata(1625792689, "run-123", "run-123", "1"); + SystemMetadata metadata2 = + AspectGenerationUtils.createSystemMetadata(1635792689, "run-456", "run-456", "2"); List items = List.of( @@ -1384,7 +1395,7 @@ public void testIngestSameAspect() throws AssertionError { SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(1625792689, "run-123"); SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-456"); SystemMetadata metadata3 = - AspectGenerationUtils.createSystemMetadata(1635792689, "run-123", "run-456"); + AspectGenerationUtils.createSystemMetadata(1635792689, "run-123", "run-456", "1"); List items = List.of( @@ -1488,8 +1499,6 @@ public void testIngestSameAspect() throws AssertionError { public void testRetention() throws AssertionError { Urn entityUrn = UrnUtils.getUrn("urn:li:corpuser:test1"); - SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(); - String aspectName = AspectGenerationUtils.getAspectName(new CorpUserInfo()); // Ingest CorpUserInfo Aspect @@ -1509,42 +1518,42 @@ public void testRetention() throws AssertionError { .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1) - .systemMetadata(metadata1) + .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .auditStamp(TEST_AUDIT_STAMP) .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1a) - .systemMetadata(metadata1) + .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .auditStamp(TEST_AUDIT_STAMP) .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1b) - .systemMetadata(metadata1) + .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .auditStamp(TEST_AUDIT_STAMP) .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2) - .systemMetadata(metadata1) + .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .auditStamp(TEST_AUDIT_STAMP) .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2a) - .systemMetadata(metadata1) + .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .auditStamp(TEST_AUDIT_STAMP) .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2b) - .systemMetadata(metadata1) + .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .auditStamp(TEST_AUDIT_STAMP) .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( @@ -1585,14 +1594,14 @@ public void testRetention() throws AssertionError { .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1c) - .systemMetadata(metadata1) + .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .auditStamp(TEST_AUDIT_STAMP) .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2c) - .systemMetadata(metadata1) + .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .auditStamp(TEST_AUDIT_STAMP) .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( @@ -1634,32 +1643,60 @@ public void testRetention() throws AssertionError { public void testIngestAspectIfNotPresent() throws AssertionError { Urn entityUrn = UrnUtils.getUrn("urn:li:corpuser:test1"); - SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(); - String aspectName = AspectGenerationUtils.getAspectName(new CorpUserInfo()); // Ingest CorpUserInfo Aspect CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); _entityServiceImpl.ingestAspectIfNotPresent( - opContext, entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); + opContext, + entityUrn, + aspectName, + writeAspect1, + TEST_AUDIT_STAMP, + AspectGenerationUtils.createSystemMetadata()); CorpUserInfo writeAspect1a = AspectGenerationUtils.createCorpUserInfo("email_a@test.com"); _entityServiceImpl.ingestAspectIfNotPresent( - opContext, entityUrn, aspectName, writeAspect1a, TEST_AUDIT_STAMP, metadata1); + opContext, + entityUrn, + aspectName, + writeAspect1a, + TEST_AUDIT_STAMP, + AspectGenerationUtils.createSystemMetadata()); CorpUserInfo writeAspect1b = AspectGenerationUtils.createCorpUserInfo("email_b@test.com"); _entityServiceImpl.ingestAspectIfNotPresent( - opContext, entityUrn, aspectName, writeAspect1b, TEST_AUDIT_STAMP, metadata1); + opContext, + entityUrn, + aspectName, + writeAspect1b, + TEST_AUDIT_STAMP, + AspectGenerationUtils.createSystemMetadata()); String aspectName2 = AspectGenerationUtils.getAspectName(new Status()); // Ingest Status Aspect Status writeAspect2 = new Status().setRemoved(true); _entityServiceImpl.ingestAspectIfNotPresent( - opContext, entityUrn, aspectName2, writeAspect2, TEST_AUDIT_STAMP, metadata1); + opContext, + entityUrn, + aspectName2, + writeAspect2, + TEST_AUDIT_STAMP, + AspectGenerationUtils.createSystemMetadata()); Status writeAspect2a = new Status().setRemoved(false); _entityServiceImpl.ingestAspectIfNotPresent( - opContext, entityUrn, aspectName2, writeAspect2a, TEST_AUDIT_STAMP, metadata1); + opContext, + entityUrn, + aspectName2, + writeAspect2a, + TEST_AUDIT_STAMP, + AspectGenerationUtils.createSystemMetadata()); Status writeAspect2b = new Status().setRemoved(true); _entityServiceImpl.ingestAspectIfNotPresent( - opContext, entityUrn, aspectName2, writeAspect2b, TEST_AUDIT_STAMP, metadata1); + opContext, + entityUrn, + aspectName2, + writeAspect2b, + TEST_AUDIT_STAMP, + AspectGenerationUtils.createSystemMetadata()); assertEquals(_entityServiceImpl.getAspect(opContext, entityUrn, aspectName, 0), writeAspect1); assertEquals(_entityServiceImpl.getAspect(opContext, entityUrn, aspectName2, 0), writeAspect2); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/StructuredPropertiesValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/StructuredPropertiesValidatorTest.java index 7a2b8fd69f3686..0ab5b68327f2fc 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/StructuredPropertiesValidatorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/StructuredPropertiesValidatorTest.java @@ -389,7 +389,7 @@ public void testValidateImmutableMutation() throws URISyntaxException { .collect(Collectors.toList()); Assert.assertEquals(exceptions.size(), 1, "Expected rejected mutation of immutable property."); - Assert.assertEquals(exceptions.get(0).getExceptionKey().getKey(), TEST_DATASET_URN); + Assert.assertEquals(exceptions.get(0).getAspectGroup().getKey(), TEST_DATASET_URN); Assert.assertTrue( exceptions.get(0).getMessage().contains("Cannot mutate an immutable property")); } @@ -484,7 +484,7 @@ public void testValidateImmutableDelete() throws URISyntaxException { .collect(Collectors.toList()); Assert.assertEquals(exceptions.size(), 1, "Expected rejected delete of immutable property."); - Assert.assertEquals(exceptions.get(0).getExceptionKey().getKey(), TEST_DATASET_URN); + Assert.assertEquals(exceptions.get(0).getAspectGroup().getKey(), TEST_DATASET_URN); Assert.assertTrue( exceptions.get(0).getMessage().contains("Cannot delete an immutable property")); } diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesValidator.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesValidator.java index 3cd1acaf9645d1..5181d749ba3fcc 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesValidator.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesValidator.java @@ -49,8 +49,7 @@ protected Stream validateProposedAspects( DataQualityRules rules = new DataQualityRules(item.getRecordTemplate().data()); // Enforce at least 1 rule return rules.getRules().isEmpty() - ? new AspectValidationException( - item.getUrn(), item.getAspectName(), "At least one rule is required.") + ? AspectValidationException.forItem(item, "At least one rule is required.") : null; }) .filter(Objects::nonNull); @@ -79,9 +78,8 @@ protected Stream validatePreCommitAspects( if (!newFieldTypeMap .getOrDefault(oldRule.getField(), oldRule.getType()) .equals(oldRule.getType())) { - return new AspectValidationException( - changeMCP.getUrn(), - changeMCP.getAspectName(), + return AspectValidationException.forItem( + changeMCP, String.format( "Field type mismatch. Field: %s Old: %s New: %s", oldRule.getField(), diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java index b95d3381d9c8f9..590e09cd816c5f 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java @@ -26,8 +26,7 @@ protected Stream validateProposedAspects( DataQualityRules rules = new DataQualityRules(item.getRecordTemplate().data()); // Enforce at least 1 rule return rules.getRules().isEmpty() - ? new AspectValidationException( - item.getUrn(), item.getAspectName(), "At least one rule is required.") + ? AspectValidationException.forItem(item, "At least one rule is required.") : null; }) .filter(Objects::nonNull); @@ -56,9 +55,8 @@ protected Stream validatePreCommitAspects( if (!newFieldTypeMap .getOrDefault(oldRule.getField(), oldRule.getType()) .equals(oldRule.getType())) { - return new AspectValidationException( - changeMCP.getUrn(), - changeMCP.getAspectName(), + return AspectValidationException.forItem( + changeMCP, String.format( "Field type mismatch. Field: %s Old: %s New: %s", oldRule.getField(), diff --git a/metadata-models/src/main/pegasus/com/linkedin/mxe/MetadataChangeProposal.pdl b/metadata-models/src/main/pegasus/com/linkedin/mxe/MetadataChangeProposal.pdl index 3d540f58a686dc..f4b3bc2a292d3e 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/mxe/MetadataChangeProposal.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/mxe/MetadataChangeProposal.pdl @@ -47,8 +47,12 @@ record MetadataChangeProposal { aspect: optional GenericAspect /** - * A string->string map of custom properties that one might want to attach to an event + * System properties that one might want to attach to an event **/ systemMetadata: optional SystemMetadata + /** + * Headers - intended to mimic http headers + */ + headers: optional map[string, string] } diff --git a/metadata-models/src/main/pegasus/com/linkedin/mxe/SystemMetadata.pdl b/metadata-models/src/main/pegasus/com/linkedin/mxe/SystemMetadata.pdl index 101557fca7949d..94c8aa77aa5d02 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/mxe/SystemMetadata.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/mxe/SystemMetadata.pdl @@ -38,4 +38,11 @@ record SystemMetadata { * Additional properties */ properties: optional map[string, string] + + /** + * Aspect version + * Initial implementation will use the aspect version's number, however stored as + * a string in the case where a different aspect versioning scheme is later adopted. + */ + version: optional string } diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 6a6683418bf386..2e713ee4104bf3 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -619,6 +619,18 @@ plugins: supportedEntityAspectNames: - entityName: '*' aspectName: '*' + - className: 'com.linkedin.metadata.aspect.validation.ConditionalWriteValidator' + enabled: true + supportedOperations: + - CREATE + - CREATE_ENTITY + - DELETE + - UPSERT + - UPDATE + - PATCH + supportedEntityAspectNames: + - entityName: '*' + aspectName: '*' mcpSideEffects: - className: 'com.linkedin.metadata.structuredproperties.hooks.PropertyDefinitionDeleteSideEffect' packageScan: diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java index 0e8c165468a4b4..e54c040fe13b58 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java @@ -10,6 +10,7 @@ import com.linkedin.entity.Aspect; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; import com.linkedin.metadata.entity.SearchRetriever; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; @@ -269,6 +270,13 @@ public Map> getLatestAspectObjects( return Map.of(); } + @Nonnull + @Override + public Map> getLatestSystemAspects( + Map> urnAspectNames) { + return Map.of(); + } + @Nonnull @Override public EntityRegistry getEntityRegistry() { diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java index 12caba3e39dd68..9e671bfb7e01e5 100644 --- a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java +++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/v2/delegates/EntityApiDelegateImpl.java @@ -70,12 +70,16 @@ import javax.annotation.Nullable; import javax.validation.Valid; import javax.validation.constraints.Min; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; import lombok.extern.slf4j.Slf4j; import org.springframework.http.HttpEntity; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @Slf4j +@Accessors(chain = true) public class EntityApiDelegateImpl { private final OperationContext systemOperationContext; private final EntityRegistry _entityRegistry; @@ -86,7 +90,7 @@ public class EntityApiDelegateImpl { private final Class _reqClazz; private final Class _respClazz; private final Class _scrollRespClazz; - private final HttpServletRequest request; + @Setter @Getter private HttpServletRequest request; private static final String BUSINESS_ATTRIBUTE_ERROR_MESSAGE = "business attribute is disabled, enable it using featureflag : BUSINESS_ATTRIBUTE_ENTITY_ENABLED"; diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/client/OpenApiClient.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/client/OpenApiClient.java index c5d0a28ed4ea40..bdea174faa2430 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/client/OpenApiClient.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/client/OpenApiClient.java @@ -2,8 +2,8 @@ import com.fasterxml.jackson.databind.ObjectMapper; import io.datahubproject.metadata.context.OperationContext; -import io.datahubproject.openapi.models.BatchGetUrnRequest; -import io.datahubproject.openapi.models.BatchGetUrnResponse; +import io.datahubproject.openapi.v2.models.BatchGetUrnRequestV2; +import io.datahubproject.openapi.v2.models.BatchGetUrnResponseV2; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; @@ -40,15 +40,16 @@ public OpenApiClient( this.systemOperationContext = systemOperationContext; } - public BatchGetUrnResponse getBatchUrnsSystemAuth(String entityName, BatchGetUrnRequest request) { + public BatchGetUrnResponseV2 getBatchUrnsSystemAuth( + String entityName, BatchGetUrnRequestV2 request) { return getBatchUrns( entityName, request, systemOperationContext.getSystemAuthentication().get().getCredentials()); } - public BatchGetUrnResponse getBatchUrns( - String entityName, BatchGetUrnRequest request, String authCredentials) { + public BatchGetUrnResponseV2 getBatchUrns( + String entityName, BatchGetUrnRequestV2 request, String authCredentials) { String url = (useSsl ? "https://" : "http://") + gmsHost + ":" + gmsPort + OPENAPI_PATH + entityName; HttpPost httpPost = new HttpPost(url); @@ -65,8 +66,8 @@ public BatchGetUrnResponse getBatchUrns( } } - private static BatchGetUrnResponse mapResponse(ClassicHttpResponse response) { - BatchGetUrnResponse serializedResponse; + private static BatchGetUrnResponseV2 mapResponse(ClassicHttpResponse response) { + BatchGetUrnResponseV2 serializedResponse; try { ByteArrayOutputStream result = new ByteArrayOutputStream(); InputStream contentStream = response.getEntity().getContent(); @@ -78,7 +79,7 @@ private static BatchGetUrnResponse mapResponse(ClassicHttpResponse response) { } serializedResponse = OBJECT_MAPPER.readValue( - result.toString(StandardCharsets.UTF_8), BatchGetUrnResponse.class); + result.toString(StandardCharsets.UTF_8), BatchGetUrnResponseV2.class); } catch (IOException e) { log.error("Wasn't able to convert response into expected type.", e); throw new RuntimeException(e); diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericAspect.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericAspect.java new file mode 100644 index 00000000000000..eb558644004943 --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericAspect.java @@ -0,0 +1,16 @@ +package io.datahubproject.openapi.models; + +import java.util.Map; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public interface GenericAspect { + @Nonnull + Map getValue(); + + @Nullable + Map getSystemMetadata(); + + @Nullable + Map getHeaders(); +} diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntity.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntity.java index f25f8b89f80267..fcb626ca60fc1e 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntity.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntity.java @@ -2,6 +2,6 @@ import java.util.Map; -public interface GenericEntity { - Map getAspects(); +public interface GenericEntity { + Map getAspects(); } diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntityScrollResult.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntityScrollResult.java index 69b97956e0cf2f..59ea624e05c70b 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntityScrollResult.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/GenericEntityScrollResult.java @@ -1,3 +1,3 @@ package io.datahubproject.openapi.models; -public interface GenericEntityScrollResult {} +public interface GenericEntityScrollResult> {} diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/BatchGetUrnRequest.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnRequestV2.java similarity index 81% rename from metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/BatchGetUrnRequest.java rename to metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnRequestV2.java index 8bae8fb08e9b95..bb3bcd29abda7c 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/BatchGetUrnRequest.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnRequestV2.java @@ -1,4 +1,4 @@ -package io.datahubproject.openapi.models; +package io.datahubproject.openapi.v2.models; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; @@ -13,9 +13,9 @@ @Value @EqualsAndHashCode @Builder -@JsonDeserialize(builder = BatchGetUrnRequest.BatchGetUrnRequestBuilder.class) +@JsonDeserialize(builder = BatchGetUrnRequestV2.BatchGetUrnRequestV2Builder.class) @JsonInclude(JsonInclude.Include.NON_NULL) -public class BatchGetUrnRequest implements Serializable { +public class BatchGetUrnRequestV2 implements Serializable { @JsonProperty("urns") @Schema(required = true, description = "The list of urns to get.") List urns; diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/BatchGetUrnResponse.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnResponseV2.java similarity index 57% rename from metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/BatchGetUrnResponse.java rename to metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnResponseV2.java index baa9b31ad0055d..391a1e8627642f 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/models/BatchGetUrnResponse.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/BatchGetUrnResponseV2.java @@ -1,8 +1,10 @@ -package io.datahubproject.openapi.models; +package io.datahubproject.openapi.v2.models; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import io.datahubproject.openapi.models.GenericAspect; +import io.datahubproject.openapi.models.GenericEntity; import io.swagger.v3.oas.annotations.media.Schema; import java.io.Serializable; import java.util.List; @@ -12,8 +14,9 @@ @Value @Builder @JsonInclude(JsonInclude.Include.NON_NULL) -@JsonDeserialize(builder = BatchGetUrnResponse.BatchGetUrnResponseBuilder.class) -public class BatchGetUrnResponse implements Serializable { +@JsonDeserialize(builder = BatchGetUrnResponseV2.BatchGetUrnResponseV2Builder.class) +public class BatchGetUrnResponseV2> + implements Serializable { @JsonProperty("entities") @Schema(description = "List of entity responses") List entities; diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericAspectV2.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericAspectV2.java new file mode 100644 index 00000000000000..dd50f8d8b2a5e8 --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericAspectV2.java @@ -0,0 +1,32 @@ +package io.datahubproject.openapi.v2.models; + +import io.datahubproject.openapi.models.GenericAspect; +import java.util.LinkedHashMap; +import java.util.Map; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class GenericAspectV2 extends LinkedHashMap implements GenericAspect { + + public GenericAspectV2(Map m) { + super(m); + } + + @Nonnull + @Override + public Map getValue() { + return this; + } + + @Nullable + @Override + public Map getSystemMetadata() { + return null; + } + + @Nullable + @Override + public Map getHeaders() { + return null; + } +} diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityScrollResultV2.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityScrollResultV2.java index 685f45c60dbdc8..221c4a519168c4 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityScrollResultV2.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityScrollResultV2.java @@ -1,6 +1,5 @@ package io.datahubproject.openapi.v2.models; -import io.datahubproject.openapi.models.GenericEntity; import io.datahubproject.openapi.models.GenericEntityScrollResult; import java.util.List; import lombok.Builder; @@ -8,8 +7,8 @@ @Data @Builder -public class GenericEntityScrollResultV2 - implements GenericEntityScrollResult { +public class GenericEntityScrollResultV2 + implements GenericEntityScrollResult { private String scrollId; - private List results; + private List results; } diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityV2.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityV2.java index 85d404fb57e0e3..83c23f3552409b 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityV2.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v2/models/GenericEntityV2.java @@ -24,20 +24,20 @@ @JsonInclude(JsonInclude.Include.NON_NULL) @NoArgsConstructor(force = true, access = AccessLevel.PRIVATE) @AllArgsConstructor -public class GenericEntityV2 implements GenericEntity { +public class GenericEntityV2 implements GenericEntity { @JsonProperty("urn") @Schema(description = "Urn of the entity") private String urn; @JsonProperty("aspects") @Schema(description = "Map of aspect name to aspect") - private Map aspects; + private Map aspects; public static class GenericEntityV2Builder { public GenericEntityV2 build( ObjectMapper objectMapper, Map> aspects) { - Map jsonObjectMap = + Map jsonObjectMap = aspects.entrySet().stream() .map( e -> { @@ -52,11 +52,14 @@ public GenericEntityV2 build( if (e.getValue().getSecond() != null) { return Map.entry( e.getKey(), - Map.of( - "systemMetadata", e.getValue().getSecond(), - "value", valueMap.get("value"))); + new GenericAspectV2( + Map.of( + "systemMetadata", e.getValue().getSecond(), + "value", valueMap.get("value")))); } else { - return Map.entry(e.getKey(), Map.of("value", valueMap.get("value"))); + return Map.entry( + e.getKey(), + new GenericAspectV2(Map.of("value", valueMap.get("value")))); } } catch (IOException ex) { throw new RuntimeException(ex); diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericAspectV3.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericAspectV3.java new file mode 100644 index 00000000000000..4db2c3288d1547 --- /dev/null +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericAspectV3.java @@ -0,0 +1,22 @@ +package io.datahubproject.openapi.v3.models; + +import com.fasterxml.jackson.annotation.JsonInclude; +import io.datahubproject.openapi.models.GenericAspect; +import java.util.Map; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Value; + +@EqualsAndHashCode +@Value +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +@AllArgsConstructor +public class GenericAspectV3 implements GenericAspect { + @Nonnull Map value; + @Nullable Map systemMetadata; + @Nullable Map headers; +} diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityScrollResultV3.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityScrollResultV3.java index 265095f0f2c6e8..b2d096e8140bd2 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityScrollResultV3.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityScrollResultV3.java @@ -1,6 +1,5 @@ package io.datahubproject.openapi.v3.models; -import io.datahubproject.openapi.models.GenericEntity; import io.datahubproject.openapi.models.GenericEntityScrollResult; import java.util.List; import lombok.Builder; @@ -8,8 +7,8 @@ @Data @Builder -public class GenericEntityScrollResultV3 - implements GenericEntityScrollResult { +public class GenericEntityScrollResultV3 + implements GenericEntityScrollResult { private String scrollId; - private List entities; + private List entities; } diff --git a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java index 2e030390dd9cbd..3af3b25028fadc 100644 --- a/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java +++ b/metadata-service/openapi-servlet/models/src/main/java/io/datahubproject/openapi/v3/models/GenericEntityV3.java @@ -2,6 +2,7 @@ import com.datahub.util.RecordUtils; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; @@ -24,15 +25,18 @@ @Builder @JsonInclude(JsonInclude.Include.NON_NULL) @AllArgsConstructor -public class GenericEntityV3 extends LinkedHashMap implements GenericEntity { +public class GenericEntityV3 extends LinkedHashMap + implements GenericEntity { public GenericEntityV3(Map m) { super(m); } @Override - public Map getAspects() { - return this; + public Map getAspects() { + return this.entrySet().stream() + .filter(entry -> !"urn".equals(entry.getKey())) + .collect(Collectors.toMap(Map.Entry::getKey, entry -> (GenericAspectV3) entry.getValue())); } public static class GenericEntityV3Builder { @@ -41,27 +45,29 @@ public GenericEntityV3 build( ObjectMapper objectMapper, @Nonnull Urn urn, Map> aspects) { - Map jsonObjectMap = + Map jsonObjectMap = aspects.entrySet().stream() .map( - e -> { + entry -> { try { - Map valueMap = - Map.of( - "value", - objectMapper.readTree( - RecordUtils.toJsonString(e.getValue().getFirst()) - .getBytes(StandardCharsets.UTF_8))); + String aspectName = entry.getKey(); + Map aspectValue = + objectMapper.readValue( + RecordUtils.toJsonString(entry.getValue().getFirst()) + .getBytes(StandardCharsets.UTF_8), + new TypeReference<>() {}); + Map systemMetadata = + entry.getValue().getSecond() != null + ? objectMapper.convertValue( + entry.getValue().getSecond(), new TypeReference<>() {}) + : null; - if (e.getValue().getSecond() != null) { - return Map.entry( - e.getKey(), - Map.of( - "systemMetadata", e.getValue().getSecond(), - "value", valueMap.get("value"))); - } else { - return Map.entry(e.getKey(), Map.of("value", valueMap.get("value"))); - } + return Map.entry( + aspectName, + GenericAspectV3.builder() + .value(aspectValue) + .systemMetadata(systemMetadata) + .build()); } catch (IOException ex) { throw new RuntimeException(ex); } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java index 08f7e45a7bad30..6a6230622f44ff 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java @@ -16,7 +16,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; -import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.EnvelopedAspect; @@ -49,8 +48,7 @@ import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.openapi.exception.UnauthorizedException; -import io.datahubproject.openapi.models.BatchGetUrnRequest; -import io.datahubproject.openapi.models.BatchGetUrnResponse; +import io.datahubproject.openapi.models.GenericAspect; import io.datahubproject.openapi.models.GenericEntity; import io.datahubproject.openapi.models.GenericEntityScrollResult; import io.swagger.v3.oas.annotations.Operation; @@ -79,7 +77,9 @@ import org.springframework.web.bind.annotation.RequestParam; public abstract class GenericEntitiesController< - E extends GenericEntity, S extends GenericEntityScrollResult> { + A extends GenericAspect, + E extends GenericEntity, + S extends GenericEntityScrollResult> { public static final String NOT_FOUND_HEADER = "Not-Found-Reason"; protected static final SearchFlags DEFAULT_SEARCH_FLAGS = new SearchFlags().setFulltext(false).setSkipAggregates(true).setSkipHighlighting(true); @@ -112,11 +112,30 @@ protected abstract S buildScrollResult( @Nullable String scrollId) throws URISyntaxException; - protected abstract List buildEntityList( + protected List buildEntityList( @Nonnull OperationContext opContext, List urns, Set aspectNames, boolean withSystemMetadata) + throws URISyntaxException { + Map> versionMap = + resolveAspectNames( + urns.stream() + .map( + urn -> + Map.entry( + urn, + aspectNames.stream() + .map(aspectName -> Map.entry(aspectName, 0L)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); + return buildEntityVersionedAspectList(opContext, versionMap, withSystemMetadata); + } + + protected abstract List buildEntityVersionedAspectList( + @Nonnull OperationContext opContext, + Map> urnAspectVersions, + boolean withSystemMetadata) throws URISyntaxException; protected abstract List buildEntityList( @@ -283,11 +302,12 @@ public ResponseEntity headEntity( value = "/{entityName}/{entityUrn:urn:li:.+}/{aspectName}", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Get an entity's generic aspect.") - public ResponseEntity getAspect( + public ResponseEntity getAspect( HttpServletRequest request, @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn, @PathVariable("aspectName") String aspectName, + @RequestParam(value = "version", required = false, defaultValue = "0") Long version, @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") Boolean withSystemMetadata) throws URISyntaxException { @@ -309,13 +329,21 @@ public ResponseEntity getAspect( authentication, true); - return buildEntityList(opContext, List.of(urn), Set.of(aspectName), withSystemMetadata).stream() + final List resultList; + if (version == 0) { + resultList = buildEntityList(opContext, List.of(urn), Set.of(aspectName), withSystemMetadata); + } else { + resultList = + buildEntityVersionedAspectList( + opContext, Map.of(urn, Map.of(aspectName, version)), withSystemMetadata); + } + + return resultList.stream() .findFirst() .flatMap( e -> e.getAspects().entrySet().stream() - .filter( - entry -> entry.getKey().equals(lookupAspectSpec(urn, aspectName).getName())) + .filter(entry -> entry.getKey().equalsIgnoreCase(aspectName)) .map(Map.Entry::getValue) .findFirst()) .map(ResponseEntity::ok) @@ -427,48 +455,6 @@ public ResponseEntity> createEntity( } } - @Tag(name = "Generic Entities") - @PostMapping(value = "/batch/{entityName}", produces = MediaType.APPLICATION_JSON_VALUE) - @Operation(summary = "Get a batch of entities") - public ResponseEntity> getEntityBatch( - HttpServletRequest httpServletRequest, - @PathVariable("entityName") String entityName, - @RequestBody BatchGetUrnRequest request) - throws URISyntaxException { - - List urns = request.getUrns().stream().map(UrnUtils::getUrn).collect(Collectors.toList()); - - Authentication authentication = AuthenticationContext.getAuthentication(); - if (!AuthUtil.isAPIAuthorizedEntityUrns(authentication, authorizationChain, READ, urns)) { - throw new UnauthorizedException( - authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); - } - OperationContext opContext = - OperationContext.asSession( - systemOperationContext, - RequestContext.builder() - .buildOpenapi( - authentication.getActor().toUrnStr(), - httpServletRequest, - "getEntityBatch", - entityName), - authorizationChain, - authentication, - true); - - return ResponseEntity.of( - Optional.of( - BatchGetUrnResponse.builder() - .entities( - new ArrayList<>( - buildEntityList( - opContext, - urns, - new HashSet<>(request.getAspectNames()), - request.isWithSystemMetadata()))) - .build())); - } - @Tag(name = "Generic Aspects") @DeleteMapping(value = "/{entityName}/{entityUrn:urn:li:.+}/{aspectName}") @Operation(summary = "Delete an entity aspect.") @@ -648,23 +634,55 @@ protected Boolean exists( opContext, urn, aspect, includeSoftDelete != null ? includeSoftDelete : false); } - protected Set resolveAspectNames(Set urns, Set requestedAspectNames) { - if (requestedAspectNames.isEmpty()) { - return urns.stream() - .flatMap(u -> entityRegistry.getEntitySpec(u.getEntityType()).getAspectSpecs().stream()) - .collect(Collectors.toSet()); - } else { - // ensure key is always present - return Stream.concat( - urns.stream() - .flatMap( - urn -> - requestedAspectNames.stream() - .map(aspectName -> lookupAspectSpec(urn, aspectName))), - urns.stream() - .map(u -> entityRegistry.getEntitySpec(u.getEntityType()).getKeyAspectSpec())) - .collect(Collectors.toSet()); - } + /** + * Given a map with aspect names from the API, normalized them into actual aspect names (casing + * fixes) + * + * @param requestedAspectNames requested aspects + * @return updated map + * @param map values + */ + protected Map> resolveAspectNames( + Map> requestedAspectNames) { + return requestedAspectNames.entrySet().stream() + .map( + entry -> { + final Urn urn = entry.getKey(); + final Set requestedNames; + if (entry.getValue().isEmpty()) { + requestedNames = + entityRegistry.getEntitySpec(urn.getEntityType()).getAspectSpecs().stream() + .map(AspectSpec::getName) + .collect(Collectors.toSet()); + } else { + // add key aspect + requestedNames = + Stream.concat( + entry.getValue().keySet().stream(), + Stream.of( + entityRegistry + .getEntitySpec(urn.getEntityType()) + .getKeyAspectName())) + .collect(Collectors.toSet()); + } + final Map normalizedNames = + requestedNames.stream() + .map( + requestAspectName -> + Map.entry( + requestAspectName, + lookupAspectSpec(urn, requestAspectName).getName())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + return Map.entry( + urn, + entry.getValue().entrySet().stream() + .map( + reqEntry -> + Map.entry( + normalizedNames.get(reqEntry.getKey()), reqEntry.getValue())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); + }) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } protected Map> toAspectMap( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index bb63d5cb9571b7..92a4cb2bd79f38 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -1,9 +1,15 @@ package io.datahubproject.openapi.v2.controller; +import static com.linkedin.metadata.authorization.ApiOperation.READ; + import com.datahub.authentication.Actor; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authorization.AuthUtil; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.EnvelopedAspect; @@ -22,23 +28,38 @@ import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.openapi.controller.GenericEntitiesController; import io.datahubproject.openapi.exception.InvalidUrnException; +import io.datahubproject.openapi.exception.UnauthorizedException; +import io.datahubproject.openapi.v2.models.BatchGetUrnRequestV2; +import io.datahubproject.openapi.v2.models.BatchGetUrnResponseV2; +import io.datahubproject.openapi.v2.models.GenericAspectV2; import io.datahubproject.openapi.v2.models.GenericEntityScrollResultV2; import io.datahubproject.openapi.v2.models.GenericEntityV2; +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import jakarta.servlet.http.HttpServletRequest; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; @@ -48,17 +69,59 @@ @Slf4j public class EntityController extends GenericEntitiesController< - GenericEntityV2, GenericEntityScrollResultV2> { + GenericAspectV2, GenericEntityV2, GenericEntityScrollResultV2> { + + @Tag(name = "Generic Entities") + @PostMapping(value = "/batch/{entityName}", produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Get a batch of entities") + public ResponseEntity> getEntityBatch( + HttpServletRequest httpServletRequest, + @PathVariable("entityName") String entityName, + @RequestBody BatchGetUrnRequestV2 request) + throws URISyntaxException { + + List urns = request.getUrns().stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + + Authentication authentication = AuthenticationContext.getAuthentication(); + if (!AuthUtil.isAPIAuthorizedEntityUrns(authentication, authorizationChain, READ, urns)) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); + } + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + httpServletRequest, + "getEntityBatch", + entityName), + authorizationChain, + authentication, + true); + + return ResponseEntity.of( + Optional.of( + BatchGetUrnResponseV2.builder() + .entities( + new ArrayList<>( + buildEntityList( + opContext, + urns, + new HashSet<>(request.getAspectNames()), + request.isWithSystemMetadata()))) + .build())); + } @Override - public GenericEntityScrollResultV2 buildScrollResult( + public GenericEntityScrollResultV2 buildScrollResult( @Nonnull OperationContext opContext, SearchEntityArray searchEntities, Set aspectNames, boolean withSystemMetadata, @Nullable String scrollId) throws URISyntaxException { - return GenericEntityScrollResultV2.builder() + return GenericEntityScrollResultV2.builder() .results(toRecordTemplates(opContext, searchEntities, aspectNames, withSystemMetadata)) .scrollId(scrollId) .build(); @@ -122,35 +185,24 @@ protected AspectsBatch toMCPBatch( } @Override - protected List buildEntityList( + protected List buildEntityVersionedAspectList( @Nonnull OperationContext opContext, - List urns, - Set aspectNames, + Map> urnAspectVersions, boolean withSystemMetadata) throws URISyntaxException { - if (urns.isEmpty()) { - return List.of(); - } else { - Set urnsSet = new HashSet<>(urns); - - Map> aspects = - entityService.getLatestEnvelopedAspects( - opContext, - urnsSet, - resolveAspectNames(urnsSet, aspectNames).stream() - .map(AspectSpec::getName) - .collect(Collectors.toSet())); - - return urns.stream() - .map( - u -> - GenericEntityV2.builder() - .urn(u.toString()) - .build( - objectMapper, - toAspectMap(u, aspects.getOrDefault(u, List.of()), withSystemMetadata))) - .collect(Collectors.toList()); - } + Map> aspects = + entityService.getEnvelopedVersionedAspects( + opContext, resolveAspectNames(urnAspectVersions), true); + + return urnAspectVersions.keySet().stream() + .map( + u -> + GenericEntityV2.builder() + .urn(u.toString()) + .build( + objectMapper, + toAspectMap(u, aspects.getOrDefault(u, List.of()), withSystemMetadata))) + .collect(Collectors.toList()); } @Override diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index 66b94218eb7d3a..3a93eb304b8f80 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -41,6 +41,7 @@ public class OpenAPIV3Generator { private static final String NAME_PATH = "path"; private static final String NAME_SYSTEM_METADATA = "systemMetadata"; private static final String NAME_ASYNC = "async"; + private static final String NAME_VERSION = "version"; private static final String NAME_SCROLL_ID = "scrollId"; private static final String NAME_INCLUDE_SOFT_DELETE = "includeSoftDelete"; private static final String PROPERTY_VALUE = "value"; @@ -108,7 +109,8 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { components.addSchemas( "Scroll" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityScrollSchema(e)); components.addSchemas( - "BatchGet" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityBatchGetSchema(e)); + "BatchGet" + entityName + ENTITY_REQUEST_SUFFIX, + buildEntityBatchGetRequestSchema(e, aspectNames)); }); // Parameters entityRegistry.getEntitySpecs().values().stream() @@ -130,7 +132,7 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { String.format("/v3/entity/%s", e.getName().toLowerCase()), buildListEntityPath(e)); paths.addPathItem( - String.format("/v3/entity/batch/%s", e.getName().toLowerCase()), + String.format("/v3/entity/%s/batchGet", e.getName().toLowerCase()), buildBatchGetEntityPath(e)); paths.addPathItem( String.format("/v3/entity/%s/{urn}", e.getName().toLowerCase()), @@ -369,23 +371,6 @@ private static PathItem buildBatchGetEntityPath(final EntitySpec entity) { final String upperFirst = toUpperFirst(entity.getName()); final PathItem result = new PathItem(); // Post Operation - final List aspectNames = - entity.getAspectSpecs().stream() - .map(AspectSpec::getName) - .distinct() - .collect(Collectors.toList()); - if (aspectNames.isEmpty()) { - aspectNames.add(entity.getKeyAspectName()); - } - final Schema aspectNamesSchema = - new Schema() - .type(TYPE_ARRAY) - .description("List of aspect names to get") - .items( - new Schema() - .type(TYPE_STRING) - ._enum(aspectNames) - ._default(aspectNames.stream().findFirst().orElse(null))); final Content requestBatch = new Content() .addMediaType( @@ -393,42 +378,47 @@ private static PathItem buildBatchGetEntityPath(final EntitySpec entity) { new MediaType() .schema( new Schema() - .properties( - Map.of( - "urns", - new Schema() - .type(TYPE_ARRAY) - .items( - new Schema() - .type(TYPE_STRING) - .description("List of urns to get")), - "aspectNames", aspectNamesSchema, - "withSystemMetadata", - new Schema().type(TYPE_BOOLEAN)._default(true))))); - final ApiResponse apiResponse = + .type(TYPE_ARRAY) + .items( + new Schema() + .$ref( + String.format( + "#/components/schemas/%s%s", + "BatchGet" + upperFirst, ENTITY_REQUEST_SUFFIX))))); + final ApiResponse apiBatchGetResponse = new ApiResponse() - .description("Create a batch of " + entity.getName() + " entities.") + .description("Get a batch of " + entity.getName() + " entities.") .content( new Content() .addMediaType( "application/json", new MediaType() .schema( - new Schema<>() - .$ref( - String.format( - "#/components/schemas/BatchGet%s%s", - upperFirst, ENTITY_RESPONSE_SUFFIX))))); + new Schema() + .type(TYPE_ARRAY) + .items( + new Schema<>() + .$ref( + String.format( + "#/components/schemas/%s%s", + upperFirst, ENTITY_RESPONSE_SUFFIX)))))); result.setPost( new Operation() .summary("Batch Get " + upperFirst + " entities.") .tags(List.of(entity.getName() + " Entity")) + .parameters( + List.of( + new Parameter() + .in(NAME_QUERY) + .name("systemMetadata") + .description("Include systemMetadata with response.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(false)))) .requestBody( new RequestBody() .description("Batch Get " + entity.getName() + " entities.") .required(true) .content(requestBatch)) - .responses(new ApiResponses().addApiResponse("200", apiResponse))); + .responses(new ApiResponses().addApiResponse("200", apiBatchGetResponse))); return result; } @@ -581,7 +571,30 @@ private static Schema buildAspectRefResponseSchema(final String aspectName) { } private static Schema buildAspectRefRequestSchema(final String aspectName) { - return new Schema<>().$ref(PATH_DEFINITIONS + aspectName); + final Schema result = + new Schema<>() + .type(TYPE_OBJECT) + .description(ASPECT_DESCRIPTION) + .required(List.of(PROPERTY_VALUE)) + .addProperty(PROPERTY_VALUE, new Schema<>().$ref(PATH_DEFINITIONS + aspectName)); + result.addProperty( + "systemMetadata", + new Schema<>() + .type(TYPE_OBJECT) + .allOf(List.of(new Schema().$ref(PATH_DEFINITIONS + "SystemMetadata"))) + .description("System metadata for the aspect.") + .nullable(true)); + + Schema stringTypeSchema = new Schema<>(); + stringTypeSchema.setType(TYPE_STRING); + result.addProperty( + "headers", + new Schema<>() + .type(TYPE_OBJECT) + .additionalProperties(stringTypeSchema) + .description("System headers for the operation.") + .nullable(true)); + return result; } private static Schema buildEntitySchema( @@ -629,25 +642,37 @@ private static Schema buildEntityScrollSchema(final EntitySpec entity) { toUpperFirst(entity.getName()), ENTITY_RESPONSE_SUFFIX)))); } - private static Schema buildEntityBatchGetSchema(final EntitySpec entity) { + private static Schema buildEntityBatchGetRequestSchema( + final EntitySpec entity, Set aspectNames) { + + final Schema stringTypeSchema = new Schema<>(); + stringTypeSchema.setType(TYPE_STRING); + final Map headers = + Map.of( + "headers", + new Schema<>() + .type(TYPE_OBJECT) + .additionalProperties(stringTypeSchema) + .description("System headers for the operation.") + .nullable(true)); + + final Map properties = + entity.getAspectSpecMap().entrySet().stream() + .filter(a -> aspectNames.contains(a.getValue().getName())) + .collect( + Collectors.toMap( + Map.Entry::getKey, a -> new Schema().type(TYPE_OBJECT).properties(headers))); + properties.put( + PROPERTY_URN, + new Schema<>().type(TYPE_STRING).description("Unique id for " + entity.getName())); + + properties.put(entity.getKeyAspectName(), new Schema().type(TYPE_OBJECT).properties(headers)); + return new Schema<>() .type(TYPE_OBJECT) - .description("Batch get " + toUpperFirst(entity.getName()) + " objects.") - .required(List.of("entities")) - .addProperty( - "entities", - new Schema() - .properties( - Map.of( - "urn", - new Schema().type(TYPE_STRING).description("Entity key urn"), - "aspects", - new Schema<>() - .description(toUpperFirst(entity.getName()) + " object.") - .$ref( - String.format( - "#/components/schemas/%s%s", - toUpperFirst(entity.getName()), ENTITY_RESPONSE_SUFFIX))))); + .description(toUpperFirst(entity.getName()) + " object.") + .required(List.of(PROPERTY_URN)) + .properties(properties); } private static Schema buildAspectRef(final String aspect, final boolean withSystemMetadata) { @@ -710,6 +735,12 @@ private static PathItem buildSingleEntityAspectPath( .name(NAME_SYSTEM_METADATA) .description("Include systemMetadata with response.") .schema(new Schema().type(TYPE_BOOLEAN)._default(false)); + final Parameter versionParameter = + new Parameter() + .in(NAME_QUERY) + .name(NAME_VERSION) + .description("Return a specific aspect version.") + .schema(new Schema().type(TYPE_INTEGER)._default(0).minimum(BigDecimal.ZERO)); final ApiResponse successApiResponse = new ApiResponse() .description("Success") @@ -728,7 +759,7 @@ private static PathItem buildSingleEntityAspectPath( new Operation() .summary(String.format("Get %s for %s.", aspect, entity.getName())) .tags(tags) - .parameters(List.of(getParameter)) + .parameters(List.of(getParameter, versionParameter)) .responses(new ApiResponses().addApiResponse("200", successApiResponse)); // Head Operation final ApiResponse successHeadResponse = diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index be5558f821d4ff..84cc4858d5fe47 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -1,7 +1,14 @@ package io.datahubproject.openapi.v3.controller; +import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; +import static com.linkedin.metadata.authorization.ApiOperation.READ; + import com.datahub.authentication.Actor; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authorization.AuthUtil; import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.linkedin.common.urn.Urn; @@ -23,24 +30,36 @@ import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.openapi.controller.GenericEntitiesController; import io.datahubproject.openapi.exception.InvalidUrnException; +import io.datahubproject.openapi.exception.UnauthorizedException; +import io.datahubproject.openapi.v3.models.GenericAspectV3; import io.datahubproject.openapi.v3.models.GenericEntityScrollResultV3; import io.datahubproject.openapi.v3.models.GenericEntityV3; +import io.swagger.v3.oas.annotations.Operation; +import io.swagger.v3.oas.annotations.tags.Tag; +import jakarta.servlet.http.HttpServletRequest; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; -import java.util.HashSet; +import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; @RestController("EntityControllerV3") @@ -49,44 +68,74 @@ @Slf4j public class EntityController extends GenericEntitiesController< - GenericEntityV3, GenericEntityScrollResultV3> { + GenericAspectV3, GenericEntityV3, GenericEntityScrollResultV3> { + + @Tag(name = "Generic Entities") + @PostMapping(value = "/{entityName}/batchGet", produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Get a batch of entities") + public ResponseEntity> getEntityBatch( + HttpServletRequest httpServletRequest, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata, + @RequestBody @Nonnull String jsonEntityList) + throws URISyntaxException, JsonProcessingException { + + Map> requestMap = toEntityVersionRequest(jsonEntityList); + + Authentication authentication = AuthenticationContext.getAuthentication(); + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + httpServletRequest, + "getEntityBatch", + requestMap.keySet().stream() + .map(Urn::getEntityType) + .collect(Collectors.toSet())), + authorizationChain, + authentication, + true); + + if (!AuthUtil.isAPIAuthorizedEntityUrns( + authentication, authorizationChain, READ, requestMap.keySet())) { + throw new UnauthorizedException( + authentication.getActor().toUrnStr() + " is unauthorized to " + READ + " entities."); + } + + return ResponseEntity.of( + Optional.of(buildEntityVersionedAspectList(opContext, requestMap, withSystemMetadata))); + } @Override - public GenericEntityScrollResultV3 buildScrollResult( + public GenericEntityScrollResultV3 buildScrollResult( @Nonnull OperationContext opContext, SearchEntityArray searchEntities, Set aspectNames, boolean withSystemMetadata, @Nullable String scrollId) throws URISyntaxException { - return GenericEntityScrollResultV3.builder() + return GenericEntityScrollResultV3.builder() .entities(toRecordTemplates(opContext, searchEntities, aspectNames, withSystemMetadata)) .scrollId(scrollId) .build(); } @Override - protected List buildEntityList( + protected List buildEntityVersionedAspectList( @Nonnull OperationContext opContext, - List urns, - Set aspectNames, + Map> urnAspectVersions, boolean withSystemMetadata) throws URISyntaxException { - if (urns.isEmpty()) { + if (urnAspectVersions.isEmpty()) { return List.of(); } else { - Set urnsSet = new HashSet<>(urns); - Map> aspects = - entityService.getLatestEnvelopedAspects( - opContext, - urnsSet, - resolveAspectNames(urnsSet, aspectNames).stream() - .map(AspectSpec::getName) - .collect(Collectors.toSet()), - false); - - return urns.stream() + entityService.getEnvelopedVersionedAspects( + opContext, resolveAspectNames(urnAspectVersions), false); + + return urnAspectVersions.keySet().stream() .filter(urn -> aspects.containsKey(urn) && !aspects.get(urn).isEmpty()) .map( u -> @@ -149,6 +198,62 @@ private List toRecordTemplates( withSystemMetadata); } + private Map> toEntityVersionRequest(@Nonnull String entityArrayList) + throws JsonProcessingException, InvalidUrnException { + JsonNode entities = objectMapper.readTree(entityArrayList); + + Map> items = new HashMap<>(); + if (entities.isArray()) { + Iterator entityItr = entities.iterator(); + while (entityItr.hasNext()) { + JsonNode entity = entityItr.next(); + if (!entity.has("urn")) { + throw new IllegalArgumentException("Missing `urn` field"); + } + Urn entityUrn = validatedUrn(entity.get("urn").asText()); + items.putIfAbsent(entityUrn, new HashMap<>()); + + Iterator> aspectItr = entity.fields(); + while (aspectItr.hasNext()) { + Map.Entry aspect = aspectItr.next(); + + if ("urn".equals(aspect.getKey())) { + continue; + } + + AspectSpec aspectSpec = lookupAspectSpec(entityUrn, aspect.getKey()); + + if (aspectSpec != null) { + + Map headers = null; + if (aspect.getValue().has("headers")) { + headers = + objectMapper.convertValue( + aspect.getValue().get("headers"), new TypeReference<>() {}); + items + .get(entityUrn) + .put( + aspectSpec.getName(), + Long.parseLong(headers.getOrDefault(HTTP_HEADER_IF_VERSION_MATCH, "0"))); + } else { + items.get(entityUrn).put(aspectSpec.getName(), 0L); + } + } + } + + // handle no aspects specified, default latest version + if (items.get(entityUrn).isEmpty()) { + for (AspectSpec aspectSpec : + entityRegistry.getEntitySpec(entityUrn.getEntityType()).getAspectSpecs()) { + items.get(entityUrn).put(aspectSpec.getName(), 0L); + } + } + } + } + + return items; + } + @Override protected AspectsBatch toMCPBatch( @Nonnull OperationContext opContext, String entityArrayList, Actor actor) @@ -184,6 +289,12 @@ protected AspectsBatch toMCPBatch( objectMapper.writeValueAsString(aspect.getValue().get("systemMetadata"))); ((ObjectNode) aspect.getValue()).remove("systemMetadata"); } + Map headers = null; + if (aspect.getValue().has("headers")) { + headers = + objectMapper.convertValue( + aspect.getValue().get("headers"), new TypeReference<>() {}); + } ChangeItemImpl.ChangeItemImplBuilder builder = ChangeItemImpl.builder() @@ -191,10 +302,11 @@ protected AspectsBatch toMCPBatch( .aspectName(aspectSpec.getName()) .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) .systemMetadata(systemMetadata) + .headers(headers) .recordTemplate( GenericRecordUtils.deserializeAspect( ByteString.copyString( - objectMapper.writeValueAsString(aspect.getValue()), + objectMapper.writeValueAsString(aspect.getValue().get("value")), StandardCharsets.UTF_8), GenericRecordUtils.JSON, aspectSpec)); diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index c40137b265cff0..72578be8c54d07 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -101,6 +101,11 @@ }, "doc" : "Additional properties", "optional" : true + }, { + "name" : "version", + "type" : "string", + "doc" : "Aspect version\n Initial implementation will use the aspect version's number, however stored as\n a string in the case where a different aspect versioning scheme is later adopted.", + "optional" : true } ] } } ] @@ -4052,7 +4057,15 @@ }, { "name" : "systemMetadata", "type" : "SystemMetadata", - "doc" : "A string->string map of custom properties that one might want to attach to an event\n", + "doc" : "System properties that one might want to attach to an event\n", + "optional" : true + }, { + "name" : "headers", + "type" : { + "type" : "map", + "values" : "string" + }, + "doc" : "Headers - intended to mimic http headers", "optional" : true } ] }, "com.linkedin.mxe.SystemMetadata", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ], diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index aeb5fbef5af2f2..9b93f1184cd59f 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -6469,6 +6469,11 @@ }, "doc" : "Additional properties", "optional" : true + }, { + "name" : "version", + "type" : "string", + "doc" : "Aspect version\n Initial implementation will use the aspect version's number, however stored as\n a string in the case where a different aspect versioning scheme is later adopted.", + "optional" : true } ] }, "com.linkedin.policy.DataHubActorFilter", "com.linkedin.policy.DataHubPolicyInfo", "com.linkedin.policy.DataHubResourceFilter", "com.linkedin.policy.PolicyMatchCondition", "com.linkedin.policy.PolicyMatchCriterion", "com.linkedin.policy.PolicyMatchFilter", "com.linkedin.retention.DataHubRetentionConfig", "com.linkedin.retention.Retention", "com.linkedin.retention.TimeBasedRetention", "com.linkedin.retention.VersionBasedRetention", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ], "schema" : { diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json index 3eac87e268f5d4..9bf7f97b34be18 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesV2.snapshot.json @@ -146,6 +146,11 @@ }, "doc" : "Additional properties", "optional" : true + }, { + "name" : "version", + "type" : "string", + "doc" : "Aspect version\n Initial implementation will use the aspect version's number, however stored as\n a string in the case where a different aspect versioning scheme is later adopted.", + "optional" : true } ] }, "doc" : "The system metadata for this aspect\n", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesVersionedV2.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesVersionedV2.snapshot.json index 1733537e68f305..ef72ecb23d5bbe 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesVersionedV2.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entitiesVersionedV2.snapshot.json @@ -155,6 +155,11 @@ }, "doc" : "Additional properties", "optional" : true + }, { + "name" : "version", + "type" : "string", + "doc" : "Aspect version\n Initial implementation will use the aspect version's number, however stored as\n a string in the case where a different aspect versioning scheme is later adopted.", + "optional" : true } ] }, "doc" : "The system metadata for this aspect\n", diff --git a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java index a7980d0d5c99f0..8821143cde6cc3 100644 --- a/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java +++ b/metadata-service/restli-client-api/src/main/java/com/linkedin/entity/client/EntityClient.java @@ -1,6 +1,7 @@ package com.linkedin.entity.client; import static com.linkedin.metadata.utils.GenericRecordUtils.entityResponseToAspectMap; +import static com.linkedin.metadata.utils.GenericRecordUtils.entityResponseToSystemAspectMap; import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.common.VersionedUrn; @@ -12,6 +13,7 @@ import com.linkedin.entity.Entity; import com.linkedin.entity.EntityResponse; import com.linkedin.metadata.aspect.EnvelopedAspect; +import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultV2; @@ -602,4 +604,13 @@ default Map> getLatestAspects( String entityName = urns.stream().findFirst().map(Urn::getEntityType).get(); return entityResponseToAspectMap(batchGetV2(opContext, entityName, urns, aspectNames)); } + + @Nonnull + default Map> getLatestSystemAspect( + @Nonnull OperationContext opContext, @Nonnull Set urns, @Nonnull Set aspectNames) + throws RemoteInvocationException, URISyntaxException { + String entityName = urns.stream().findFirst().map(Urn::getEntityType).get(); + return entityResponseToSystemAspectMap( + batchGetV2(opContext, entityName, urns, aspectNames), opContext.getEntityRegistry()); + } } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index ac4c6895b757b6..283cff5d2e19da 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -256,6 +256,21 @@ Map> getLatestEnvelopedAspects( boolean alwaysIncludeKeyAspect) throws URISyntaxException; + /** + * Retrieve the specified aspect versions for the given URNs + * + * @param opContext operation context + * @param urnAspectVersions map of the urn's aspect versions + * @param alwaysIncludeKeyAspect whether to include the key aspect + * @return enveloped aspects with the specific version + * @throws URISyntaxException + */ + Map> getEnvelopedVersionedAspects( + @Nonnull OperationContext opContext, + @Nonnull Map> urnAspectVersions, + boolean alwaysIncludeKeyAspect) + throws URISyntaxException; + @Deprecated default Map> getLatestEnvelopedAspects( @Nonnull OperationContext opContext, @Nonnull Set urns, @Nonnull Set aspectNames) diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/GenericRecordUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/GenericRecordUtils.java index ae061a2d0c0905..7974d239a25bc4 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/GenericRecordUtils.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/GenericRecordUtils.java @@ -6,7 +6,10 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.Aspect; import com.linkedin.entity.EntityResponse; +import com.linkedin.metadata.aspect.EnvelopedSystemAspect; +import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.GenericPayload; import java.nio.charset.StandardCharsets; @@ -87,4 +90,27 @@ public static Map> entityResponseToAspectMap( .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } + + @Nonnull + public static Map> entityResponseToSystemAspectMap( + Map inputMap, @Nonnull EntityRegistry entityRegistry) { + return inputMap.entrySet().stream() + .map( + entry -> + Map.entry( + entry.getKey(), + entry.getValue().getAspects().entrySet().stream() + .filter(aspectEntry -> aspectEntry.getValue() != null) + .map( + aspectEntry -> + Map.entry( + aspectEntry.getKey(), + EnvelopedSystemAspect.of( + entry.getKey(), + aspectEntry.getValue(), + entityRegistry.getEntitySpec( + entry.getKey().getEntityType())))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } } diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/ExceptionUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/ExceptionUtils.java new file mode 100644 index 00000000000000..b3bad8e8242188 --- /dev/null +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/metrics/ExceptionUtils.java @@ -0,0 +1,43 @@ +package com.linkedin.metadata.utils.metrics; + +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; + +public class ExceptionUtils { + private static final String BASE_NAME = "metadata.validation.exception"; + private static final String DELIMITER = "."; + + private ExceptionUtils() {} + + public static ValidationExceptionCollection collectMetrics( + final ValidationExceptionCollection exceptions) { + exceptions + .streamAllExceptions() + .forEach( + exception -> { + String subTypeBaseName = + String.join( + DELIMITER, BASE_NAME, exception.getSubType().toString().toLowerCase()); + // subtype count + MetricUtils.counter(subTypeBaseName).inc(exceptions.size()); + // Change type count + MetricUtils.counter( + String.join( + DELIMITER, + subTypeBaseName, + exception.getChangeType().toString().toLowerCase())) + .inc(); + // Entity count + MetricUtils.counter( + String.join( + DELIMITER, + subTypeBaseName, + exception.getEntityUrn().getEntityType().toLowerCase())) + .inc(); + // Aspect count + MetricUtils.counter( + String.join(DELIMITER, subTypeBaseName, exception.getAspectName())) + .inc(); + }); + return exceptions; + } +} From e5d42715f09751eedc6211aa3eabad25efbc162e Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 11 Jul 2024 13:23:57 -0700 Subject: [PATCH 3/9] fix(build): upgrade vercel builds to Node 20.x (#10890) --- docs-website/vercel-setup.sh | 10 +++++----- metadata-ingestion/scripts/install_deps.sh | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs-website/vercel-setup.sh b/docs-website/vercel-setup.sh index 01a1ee65b34f01..4bb40eaddf4775 100755 --- a/docs-website/vercel-setup.sh +++ b/docs-website/vercel-setup.sh @@ -4,6 +4,10 @@ set -euxo pipefail ./metadata-ingestion/scripts/install_deps.sh +# Set up java version for gradle +yum install java-17-amazon-corretto -y +java --version + # Build python from source. # Amazon Linux 2 has Python 3.8, but it's version of OpenSSL is super old and hence it # doesn't work with the packages we use. As such, we have to build Python from source. @@ -11,8 +15,7 @@ set -euxo pipefail # for reuse. yum groupinstall "Development Tools" -y -yum erase openssl-devel -y -yum install openssl11 openssl11-devel libffi-devel bzip2-devel wget nodejs -y +yum install openssl openssl-devel libffi-devel bzip2-devel wget nodejs -y wget https://www.python.org/ftp/python/3.10.11/Python-3.10.11.tgz tar -xf Python-3.10.11.tgz @@ -29,6 +32,3 @@ rm "$py3" ln "$(which python3.10)" "$py3" python3 --version -# Set up java version for gradle -yum install java-17-amazon-corretto -java --version \ No newline at end of file diff --git a/metadata-ingestion/scripts/install_deps.sh b/metadata-ingestion/scripts/install_deps.sh index bae0278056ebbd..80a07cb04cb447 100755 --- a/metadata-ingestion/scripts/install_deps.sh +++ b/metadata-ingestion/scripts/install_deps.sh @@ -18,7 +18,8 @@ else sqlite-devel \ xz-devel \ libxml2-devel \ - libxslt-devel + libxslt-devel \ + krb5-devel else $sudo_cmd apt-get update && $sudo_cmd apt-get install -y \ python3-ldap \ From 418f7e05cd299482db8d9f9219da96dbf3a436af Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 11 Jul 2024 13:24:08 -0700 Subject: [PATCH 4/9] feat(ingest/lookml): shallow clone repos (#10888) --- .../ingestion/source/git/git_import.py | 37 +++++++++++++------ 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/git/git_import.py b/metadata-ingestion/src/datahub/ingestion/source/git/git_import.py index 2122374c1e404e..6440ec8e5d7877 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/git/git_import.py +++ b/metadata-ingestion/src/datahub/ingestion/source/git/git_import.py @@ -21,6 +21,8 @@ def __init__(self, tmp_dir: str, skip_known_host_verification: bool = True): def clone( self, ssh_key: Optional[SecretStr], repo_url: str, branch: Optional[str] = None ) -> Path: + # Note: this does a shallow clone. + unique_dir = str(uuid4()) keys_dir = f"{self.tmp_dir}/{unique_dir}/keys" checkout_dir = f"{self.tmp_dir}/{unique_dir}/checkout" @@ -55,20 +57,33 @@ def clone( ) logger.debug(f"ssh_command={git_ssh_cmd}") - logger.info( - f"⏳ Cloning repo '{self.sanitize_repo_url(repo_url)}', this can take some time..." - ) - self.last_repo_cloned = git.Repo.clone_from( - repo_url, - checkout_dir, - env=dict(GIT_SSH_COMMAND=git_ssh_cmd), - ) - logger.info("✅ Cloning complete!") - - if branch is not None: + if branch is None: + logger.info( + f"⏳ Cloning repo '{self.sanitize_repo_url(repo_url)}' (default branch), this can take some time..." + ) + self.last_repo_cloned = git.Repo.clone_from( + repo_url, + checkout_dir, + env=dict(GIT_SSH_COMMAND=git_ssh_cmd), + depth=1, + ) + else: + # Because we accept branch names, tags, and commit hashes in the branch parameter, + # we can't just use the --branch flag of Git clone. Doing a blobless clone allows + # us to quickly checkout the right commit. + logger.info( + f"⏳ Cloning repo '{self.sanitize_repo_url(repo_url)}' (branch: {branch}), this can take some time..." + ) + self.last_repo_cloned = git.Repo.clone_from( + repo_url, + checkout_dir, + env=dict(GIT_SSH_COMMAND=git_ssh_cmd), + filter="blob:none", + ) logger.info(f"Checking out branch {branch}") self.last_repo_cloned.git.checkout(branch) + logger.info("✅ Cloning complete!") return pathlib.Path(checkout_dir) def get_last_repo_cloned(self) -> Optional[git.Repo]: From 44930dfd1e62add3b516b9f1254cb693922e9b4d Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 11 Jul 2024 13:25:13 -0700 Subject: [PATCH 5/9] fix(ingest/looker): add missing dependency (#10876) --- metadata-ingestion/setup.py | 1 + .../src/datahub/ingestion/source/looker/looker_config.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 5ff62dd02fbc3f..7552650fa40ef3 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -166,6 +166,7 @@ *sqlglot_lib, "GitPython>2", "python-liquid", + *sqlglot_lib, } bigquery_common = { diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index 5b774012e70dac..4ad9635069b156 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -256,7 +256,7 @@ class LookerDashboardSourceConfig( "ingest dashboards in the Shared folder space.", ) max_threads: int = Field( - os.cpu_count() or 40, + default_factory=lambda: os.cpu_count() or 40, description="Max parallelism for Looker API calls. Defaults to cpuCount or 40", ) external_base_url: Optional[str] = Field( From 82bd3c248fd0164aa0c525a79604968010731e0f Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 11 Jul 2024 13:26:57 -0700 Subject: [PATCH 6/9] fix(ingest): only populate audit stamps where accurate (#10604) --- .../src/datahub/ingestion/source/metabase.py | 2 +- .../src/datahub/ingestion/source/redash.py | 4 +- .../ingestion/source/redshift/redshift.py | 6 +-- .../source/snowflake/snowflake_schema_gen.py | 6 +-- .../src/datahub/ingestion/source/superset.py | 12 ++---- .../metabase/metabase_mces_golden.json | 12 +++--- .../superset/golden_test_ingest.json | 42 +++++++++++-------- .../superset/golden_test_stateful_ingest.json | 20 ++++----- .../tests/unit/test_redash_source.py | 20 +++------ 9 files changed, 54 insertions(+), 70 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index e401f60445d1bf..49fa9dab5f1d8d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -449,7 +449,7 @@ def construct_card_from_api_data(self, card_data: dict) -> Optional[ChartSnapsho f"{last_edit_by.get('timestamp')}" ) last_modified = ChangeAuditStamps( - created=AuditStamp(time=modified_ts, actor=modified_actor), + created=None, lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py index b833a3691c0818..c7a3f25e947dc5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redash.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py @@ -555,7 +555,7 @@ def _get_dashboard_snapshot(self, dashboard_data, redash_version): title = dashboard_data.get("name", "") last_modified = ChangeAuditStamps( - created=AuditStamp(time=modified_ts, actor=modified_actor), + created=None, lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) @@ -694,7 +694,7 @@ def _get_chart_snapshot(self, query_data: Dict, viz_data: Dict) -> ChartSnapshot title = f"{query_data.get('name')} {viz_data.get('name', '')}" last_modified = ChangeAuditStamps( - created=AuditStamp(time=modified_ts, actor=modified_actor), + created=None, lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index 6be7eedf976bd7..68821662762b63 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -800,11 +800,7 @@ def gen_dataset_workunits( lastModified=( TimeStamp(time=int(table.last_altered.timestamp() * 1000)) if table.last_altered - else ( - TimeStamp(time=int(table.created.timestamp() * 1000)) - if table.created - else None - ) + else None ), description=table.comment, qualifiedName=str(datahub_dataset_name), diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py index b6f16cd671b8d3..ac2a3ced5a232c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py @@ -762,11 +762,7 @@ def get_dataset_properties( lastModified=( TimeStamp(time=int(table.last_altered.timestamp() * 1000)) if table.last_altered is not None - else ( - TimeStamp(time=int(table.created.timestamp() * 1000)) - if table.created is not None - else None - ) + else None ), description=table.comment, qualifiedName=f"{db_name}.{schema_name}.{table.name}", diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 9559b8e84b85da..dd2dc3301d80ea 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -273,11 +273,9 @@ def construct_dashboard_from_api_data(self, dashboard_data): dp.parse(dashboard_data.get("changed_on_utc", "now")).timestamp() * 1000 ) title = dashboard_data.get("dashboard_title", "") - # note: the API does not currently supply created_by usernames due to a bug, but we are required to - # provide a created AuditStamp to comply with ChangeAuditStamp model. For now, I sub in the last - # modified actor urn + # note: the API does not currently supply created_by usernames due to a bug last_modified = ChangeAuditStamps( - created=AuditStamp(time=modified_ts, actor=modified_actor), + created=None, lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) dashboard_url = f"{self.config.display_uri}{dashboard_data.get('url', '')}" @@ -380,11 +378,9 @@ def construct_chart_from_chart_data(self, chart_data): ) title = chart_data.get("slice_name", "") - # note: the API does not currently supply created_by usernames due to a bug, but we are required to - # provide a created AuditStamp to comply with ChangeAuditStamp model. For now, I sub in the last - # modified actor urn + # note: the API does not currently supply created_by usernames due to a bug last_modified = ChangeAuditStamps( - created=AuditStamp(time=modified_ts, actor=modified_actor), + created=None, lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) chart_type = chart_type_from_viz_type.get(chart_data.get("viz_type", "")) diff --git a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json index 96ba9fd9c674f2..976fe5f77c929b 100644 --- a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json +++ b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json @@ -15,8 +15,8 @@ "description": "", "lastModified": { "created": { - "time": 1639417592792, - "actor": "urn:li:corpuser:admin@metabase.com" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1639417592792, @@ -82,8 +82,8 @@ "description": "", "lastModified": { "created": { - "time": 1636614000000, - "actor": "urn:li:corpuser:admin@metabase.com" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1636614000000, @@ -140,8 +140,8 @@ "description": "", "lastModified": { "created": { - "time": 1685628119636, - "actor": "urn:li:corpuser:john.doe@example.com" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1685628119636, diff --git a/metadata-ingestion/tests/integration/superset/golden_test_ingest.json b/metadata-ingestion/tests/integration/superset/golden_test_ingest.json index 74312940f06e78..767b85a72b975a 100644 --- a/metadata-ingestion/tests/integration/superset/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/superset/golden_test_ingest.json @@ -28,8 +28,8 @@ "datasets": [], "lastModified": { "created": { - "time": 1586847600000, - "actor": "urn:li:corpuser:test_username_1" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1586847600000, @@ -44,7 +44,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-test" + "runId": "superset-test", + "lastRunId": "no-run-id-provided" } }, { @@ -74,8 +75,8 @@ "datasets": [], "lastModified": { "created": { - "time": 1586847600000, - "actor": "urn:li:corpuser:test_username_2" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1586847600000, @@ -90,7 +91,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-test" + "runId": "superset-test", + "lastRunId": "no-run-id-provided" } }, { @@ -114,8 +116,8 @@ "description": "", "lastModified": { "created": { - "time": 1586847600000, - "actor": "urn:li:corpuser:test_username_1" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1586847600000, @@ -136,7 +138,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-test" + "runId": "superset-test", + "lastRunId": "no-run-id-provided" } }, { @@ -160,8 +163,8 @@ "description": "", "lastModified": { "created": { - "time": 1586847600000, - "actor": "urn:li:corpuser:test_username_1" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1586847600000, @@ -182,7 +185,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-test" + "runId": "superset-test", + "lastRunId": "no-run-id-provided" } }, { @@ -206,8 +210,8 @@ "description": "", "lastModified": { "created": { - "time": 1586847600000, - "actor": "urn:li:corpuser:test_username_2" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1586847600000, @@ -228,7 +232,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-test" + "runId": "superset-test", + "lastRunId": "no-run-id-provided" } }, { @@ -252,8 +257,8 @@ "description": "", "lastModified": { "created": { - "time": 1586847600000, - "actor": "urn:li:corpuser:test_username_2" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1586847600000, @@ -274,7 +279,8 @@ }, "systemMetadata": { "lastObserved": 1586847600000, - "runId": "superset-test" + "runId": "superset-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/superset/golden_test_stateful_ingest.json b/metadata-ingestion/tests/integration/superset/golden_test_stateful_ingest.json index 0ff90cea7bccc5..0f207990179793 100644 --- a/metadata-ingestion/tests/integration/superset/golden_test_stateful_ingest.json +++ b/metadata-ingestion/tests/integration/superset/golden_test_stateful_ingest.json @@ -28,8 +28,8 @@ "datasets": [], "lastModified": { "created": { - "time": 1586847600000, - "actor": "urn:li:corpuser:test_username_1" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1586847600000, @@ -70,8 +70,8 @@ "description": "", "lastModified": { "created": { - "time": 1586847600000, - "actor": "urn:li:corpuser:test_username_1" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1586847600000, @@ -118,8 +118,8 @@ "description": "", "lastModified": { "created": { - "time": 1586847600000, - "actor": "urn:li:corpuser:test_username_1" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1586847600000, @@ -166,8 +166,8 @@ "description": "", "lastModified": { "created": { - "time": 1586847600000, - "actor": "urn:li:corpuser:test_username_2" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1586847600000, @@ -214,8 +214,8 @@ "description": "", "lastModified": { "created": { - "time": 1586847600000, - "actor": "urn:li:corpuser:test_username_2" + "time": 0, + "actor": "urn:li:corpuser:unknown" }, "lastModified": { "time": 1586847600000, diff --git a/metadata-ingestion/tests/unit/test_redash_source.py b/metadata-ingestion/tests/unit/test_redash_source.py index 925c41689c6cc1..2982fe76c4d4e7 100644 --- a/metadata-ingestion/tests/unit/test_redash_source.py +++ b/metadata-ingestion/tests/unit/test_redash_source.py @@ -489,9 +489,7 @@ def test_get_dashboard_snapshot_before_v10(): ], datasets=[], lastModified=ChangeAuditStamps( - created=AuditStamp( - time=1628882055288, actor="urn:li:corpuser:unknown" - ), + created=None, lastModified=AuditStamp( time=1628882055288, actor="urn:li:corpuser:unknown" ), @@ -521,9 +519,7 @@ def test_get_dashboard_snapshot_after_v10(): ], datasets=[], lastModified=ChangeAuditStamps( - created=AuditStamp( - time=1628882055288, actor="urn:li:corpuser:unknown" - ), + created=None, lastModified=AuditStamp( time=1628882055288, actor="urn:li:corpuser:unknown" ), @@ -551,9 +547,7 @@ def test_get_known_viz_chart_snapshot(mocked_data_source): title="My Query Chart", description="", lastModified=ChangeAuditStamps( - created=AuditStamp( - time=1628882022544, actor="urn:li:corpuser:unknown" - ), + created=None, lastModified=AuditStamp( time=1628882022544, actor="urn:li:corpuser:unknown" ), @@ -584,9 +578,7 @@ def test_get_unknown_viz_chart_snapshot(mocked_data_source): title="My Query Sankey", description="", lastModified=ChangeAuditStamps( - created=AuditStamp( - time=1628882009571, actor="urn:li:corpuser:unknown" - ), + created=None, lastModified=AuditStamp( time=1628882009571, actor="urn:li:corpuser:unknown" ), @@ -711,9 +703,7 @@ def test_get_chart_snapshot_parse_table_names_from_sql(mocked_data_source): title="My Query Chart", description="", lastModified=ChangeAuditStamps( - created=AuditStamp( - time=1628882022544, actor="urn:li:corpuser:unknown" - ), + created=None, lastModified=AuditStamp( time=1628882022544, actor="urn:li:corpuser:unknown" ), From 351e434856acf50a1c897681d8ff0539d49a32b6 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 11 Jul 2024 16:32:16 -0700 Subject: [PATCH 7/9] fix(ingest/dbt): always encode tag urns (#10799) --- .../src/datahub/emitter/mce_builder.py | 2 +- .../src/datahub/ingestion/source/dbt/dbt_common.py | 13 ++++++++----- metadata-ingestion/tests/unit/test_dbt_source.py | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index 545cf755a6fc02..31c7518152ef22 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -476,7 +476,7 @@ def get_or_add_aspect(mce: MetadataChangeEventClass, default: Aspect) -> Aspect: def make_global_tag_aspect_with_tag_list(tags: List[str]) -> GlobalTagsClass: return GlobalTagsClass( - tags=[TagAssociationClass(f"urn:li:tag:{tag}") for tag in tags] + tags=[TagAssociationClass(make_tag_urn(tag)) for tag in tags] ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 962c91fb166266..f56784ab1f0822 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -1483,7 +1483,7 @@ def get_patched_mce(self, mce): transformed_tag_list = self.get_transformed_tags_by_prefix( tag_aspect.tags, mce.proposedSnapshot.urn, - mce_builder.make_tag_urn(self.config.tag_prefix), + tag_prefix=self.config.tag_prefix, ) tag_aspect.tags = transformed_tag_list @@ -1874,16 +1874,19 @@ def get_transformed_tags_by_prefix( self, new_tags: List[TagAssociationClass], entity_urn: str, - tags_prefix_filter: str, + tag_prefix: str, ) -> List[TagAssociationClass]: tag_set = {new_tag.tag for new_tag in new_tags} if self.ctx.graph: existing_tags_class = self.ctx.graph.get_tags(entity_urn) if existing_tags_class and existing_tags_class.tags: - for exiting_tag in existing_tags_class.tags: - if not exiting_tag.tag.startswith(tags_prefix_filter): - tag_set.add(exiting_tag.tag) + for existing_tag in existing_tags_class.tags: + if tag_prefix and existing_tag.tag.startswith( + mce_builder.make_tag_urn(tag_prefix) + ): + continue + tag_set.add(existing_tag.tag) return [TagAssociationClass(tag) for tag in sorted(tag_set)] # This method attempts to read-modify and return the glossary terms of a dataset. diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 0206f2e280ef2d..99387ab4e6ae48 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -148,7 +148,7 @@ def test_dbt_source_patching_tags(): ["new_non_dbt", "dbt:new_dbt"] ) transformed_tags = source.get_transformed_tags_by_prefix( - new_tag_aspect.tags, "urn:li:dataset:dummy", "urn:li:tag:dbt:" + new_tag_aspect.tags, "urn:li:dataset:dummy", "dbt:" ) expected_tags = { "urn:li:tag:new_non_dbt", From 89bda5bdd9888ce780fa6ba5856105934c351bb5 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 11 Jul 2024 16:32:37 -0700 Subject: [PATCH 8/9] fix(ingest/redshift): handle multiline alter table commands (#10727) --- metadata-ingestion/setup.py | 1 - .../ingestion/source/redshift/lineage.py | 20 ++++++++++++++----- .../ingestion/source/redshift/report.py | 1 + 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 7552650fa40ef3..5ff62dd02fbc3f 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -166,7 +166,6 @@ *sqlglot_lib, "GitPython>2", "python-liquid", - *sqlglot_lib, } bigquery_common = { diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py index 852deac13e5168..dadb06b6a95e26 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py @@ -53,6 +53,7 @@ ) from datahub.metadata.urns import DatasetUrn from datahub.sql_parsing.schema_resolver import SchemaResolver +from datahub.sql_parsing.sqlglot_utils import get_dialect, parse_statement from datahub.utilities import memory_footprint from datahub.utilities.dedup_list import deduplicate_list @@ -128,7 +129,7 @@ def parse_alter_table_rename(default_schema: str, query: str) -> Tuple[str, str, Parses an ALTER TABLE ... RENAME TO ... query and returns the schema, previous table name, and new table name. """ - parsed_query = sqlglot.parse_one(query, dialect="redshift") + parsed_query = parse_statement(query, dialect=get_dialect("redshift")) assert isinstance(parsed_query, sqlglot.exp.AlterTable) prev_name = parsed_query.this.name rename_clause = parsed_query.args["actions"][0] @@ -865,10 +866,19 @@ def _process_table_renames( for rename_row in RedshiftDataDictionary.get_alter_table_commands( connection, query ): - schema, prev_name, new_name = parse_alter_table_rename( - default_schema=self.config.default_schema, - query=rename_row.query_text, - ) + # Redshift's system table has some issues where it encodes newlines as \n instead a proper + # newline character. This can cause issues in our parser. + query_text = rename_row.query_text.replace("\\n", "\n") + + try: + schema, prev_name, new_name = parse_alter_table_rename( + default_schema=self.config.default_schema, + query=query_text, + ) + except ValueError as e: + logger.info(f"Failed to parse alter table rename: {e}") + self.report.num_alter_table_parse_errors += 1 + continue prev_urn = make_dataset_urn_with_platform_instance( platform=LineageDatasetPlatform.REDSHIFT.value, diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py index 3012f4949baeb4..ff28ed2c5e849c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py @@ -45,6 +45,7 @@ class RedshiftReport( num_lineage_dropped_not_support_copy_path: int = 0 num_lineage_processed_temp_tables = 0 num_lineage_dropped_s3_path: int = 0 + num_alter_table_parse_errors: int = 0 lineage_start_time: Optional[datetime] = None lineage_end_time: Optional[datetime] = None From aa92a99130b1636d25d104a6e645066bda78242c Mon Sep 17 00:00:00 2001 From: sid-acryl <155424659+sid-acryl@users.noreply.github.com> Date: Fri, 12 Jul 2024 13:17:53 +0530 Subject: [PATCH 9/9] fix(ingestion/looker): column name missing in explore (#10892) --- .../ingestion/source/looker/looker_common.py | 34 +++++++------------ .../ingestion/source/looker/lookml_config.py | 2 +- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index ce135f90e828b5..1f54767de5a68b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -347,26 +347,20 @@ def _form_field_name( model_name: str, upstream_views_file_path: Dict[str, Optional[str]], config: LookerCommonConfig, + remove_variant: bool = False, ) -> Optional[ColumnRef]: assert self.field.name is not None if len(self.field.name.split(".")) != 2: return None # Inconsistent info received - assert self.explore.name - - view_name: Optional[str] = ( - self.explore.name - if self.field.original_view is not None - else self.field.original_view - ) + view_name: Optional[str] = self.explore.name + if self.field.original_view is not None: + view_name = self.field.original_view field_name = self.field.name.split(".")[1] - if ( - self.field.field_group_variant is not None - and self.field.field_group_variant.lower() in field_name.lower() - ): + if remove_variant and self.field.field_group_variant is not None: # remove variant at the end. +1 for "_" field_name = field_name[ : -(len(self.field.field_group_variant.lower()) + 1) @@ -381,7 +375,7 @@ def _form_field_name( file_path: Optional[str] = ( upstream_views_file_path.get(view_name) - if upstream_views_file_path.get(view_name) is not None + if upstream_views_file_path.get(view_name) else ViewFieldValue.NOT_AVAILABLE.value ) @@ -413,7 +407,7 @@ def upstream( ) -> Optional[ColumnRef]: assert self.field.name is not None - if self.field.dimension_group is None: # It is not part of Dimensional Group + if self.field.dimension_group is None or self.field.field_group_variant is None: return self._form_field_name( view_project_map, explore_project_name, @@ -422,15 +416,6 @@ def upstream( config, ) - if self.field.field_group_variant is None: - return self._form_field_name( - view_project_map, - explore_project_name, - model_name, - upstream_views_file_path, - config, - ) # Variant i.e. Month, Day, Year ... is not available - if self.field.type is None or not self.field.type.startswith("date_"): return self._form_field_name( view_project_map, @@ -456,6 +441,7 @@ def upstream( model_name, upstream_views_file_path, config, + remove_variant=True, ) @@ -999,12 +985,15 @@ def from_api( # noqa: C901 view_fields: List[ViewField] = [] field_name_vs_raw_explore_field: Dict = {} + if explore.fields is not None: + if explore.fields.dimensions is not None: for dim_field in explore.fields.dimensions: if dim_field.name is None: continue else: + field_name_vs_raw_explore_field[dim_field.name] = dim_field view_fields.append( @@ -1045,6 +1034,7 @@ def from_api( # noqa: C901 if measure_field.name is None: continue else: + field_name_vs_raw_explore_field[ measure_field.name ] = measure_field diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py index 937c75b7eaf35a..aa5719547c03ed 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py @@ -121,7 +121,7 @@ class LookMLSourceConfig( AllowDenyPattern.allow_all(), description="List of regex patterns for LookML views to include in the extraction.", ) - parse_table_names_from_sql: bool = Field(False, description="See note below.") + parse_table_names_from_sql: bool = Field(True, description="See note below.") sql_parser: str = Field( "datahub.utilities.sql_parser.DefaultSQLParser", description="See note below." )