diff --git a/build.gradle b/build.gradle index 5cf43755fceffe..a78d308c617c0b 100644 --- a/build.gradle +++ b/build.gradle @@ -247,6 +247,7 @@ project.ext.externalDependency = [ 'springActuator': "org.springframework.boot:spring-boot-starter-actuator:$springBootVersion", 'swaggerAnnotations': 'io.swagger.core.v3:swagger-annotations:2.2.15', 'swaggerCli': 'io.swagger.codegen.v3:swagger-codegen-cli:3.0.46', + 'swaggerCore': 'io.swagger.core.v3:swagger-core:2.2.7', 'springBootAutoconfigureJdk11': 'org.springframework.boot:spring-boot-autoconfigure:2.7.18', 'testng': 'org.testng:testng:7.8.0', 'testContainers': 'org.testcontainers:testcontainers:' + testContainersVersion, diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/StructuredPropertyValue.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/StructuredPropertyValue.tsx index a8b4e6607b25ea..b1a01f2b69fe18 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Properties/StructuredPropertyValue.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/StructuredPropertyValue.tsx @@ -1,7 +1,7 @@ import Icon from '@ant-design/icons/lib/components/Icon'; -import React from 'react'; +import React, { useState } from 'react'; import Highlight from 'react-highlighter'; -import { Typography } from 'antd'; +import { Button, Typography } from 'antd'; import styled from 'styled-components'; import { ValueColumnData } from './types'; import { ANTD_GRAY } from '../../constants'; @@ -30,14 +30,27 @@ const IconWrapper = styled.span` margin-right: 4px; `; +const StyledButton = styled(Button)` + margin-top: 2px; +`; + interface Props { value: ValueColumnData; isRichText?: boolean; filterText?: string; } +const MAX_CHARACTERS = 200; + export default function StructuredPropertyValue({ value, isRichText, filterText }: Props) { const entityRegistry = useEntityRegistry(); + const [showMore, setShowMore] = useState(false); + + const toggleShowMore = () => { + setShowMore(!showMore); + }; + + const valueAsString = value?.value?.toString() ?? ''; return ( @@ -60,7 +73,16 @@ export default function StructuredPropertyValue({ value, isRichText, filterText {isRichText ? ( ) : ( - {value.value?.toString()} + <> + + {showMore ? valueAsString : valueAsString?.substring(0, MAX_CHARACTERS)} + + {valueAsString?.length > MAX_CHARACTERS && ( + + {showMore ? 'Show less' : 'Show more'} + + )} + )} )} diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 34398bc8c6661b..fe1b979ab332b9 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -187,6 +187,7 @@ module.exports = { }, { "Managed DataHub Release History": [ + "docs/managed-datahub/release-notes/v_0_3_1", "docs/managed-datahub/release-notes/v_0_2_16", "docs/managed-datahub/release-notes/v_0_2_15", "docs/managed-datahub/release-notes/v_0_2_14", diff --git a/docs/managed-datahub/release-notes/v_0_3_1.md b/docs/managed-datahub/release-notes/v_0_3_1.md new file mode 100644 index 00000000000000..7d1d4c1a6713fb --- /dev/null +++ b/docs/managed-datahub/release-notes/v_0_3_1.md @@ -0,0 +1,16 @@ +# v0.3.1 +--- + +Release Availability Date +--- +8-Apr-2024 + +Recommended CLI/SDK +--- +- `v0.13.1.2` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.13.1.2 + +If you are using an older CLI/SDK version then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, github actions, airflow, in python SDK somewhere, Java SKD etc. This is a strong recommendation to upgrade as we keep on pushing fixes in the CLI and it helps us support you better. + +## Release Changelog +--- +- Since `v0.2.16` these changes from OSS DataHub https://github.com/datahub-project/datahub/compare/55bc955304c4c192c04a0393a47355a295f5770a...57de905c66b6992aefb2051708fa83898fa82cec have been pulled in. diff --git a/entity-registry/build.gradle b/entity-registry/build.gradle index 66e4ad4b930e07..0434829e31aec3 100644 --- a/entity-registry/build.gradle +++ b/entity-registry/build.gradle @@ -35,5 +35,11 @@ dependencies { testAnnotationProcessor externalDependency.lombok testImplementation externalDependency.classGraph + testImplementation externalDependency.swaggerCore + testImplementation spec.product.pegasus.dataAvro + testImplementation('io.acryl:json-schema-avro:0.2.2') { + exclude group: 'com.fasterxml.jackson.core', module: 'jackson-databind' + } + } compileTestJava.dependsOn tasks.getByPath(':entity-registry:custom-test-model:modelDeploy') diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java index 0dcd0420d4df82..6a733cc23f3957 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java @@ -73,6 +73,13 @@ public MergedEntityRegistry apply(EntityRegistry patchEntityRegistry) validationResult.validationFailures.stream().collect(Collectors.joining("\n")))); } + // Merge Aspect Specs + // (Fixed issue where custom defined aspects are not included in the API specification.) + // + if (!patchEntityRegistry.getAspectSpecs().isEmpty()) { + _aspectNameToSpec.putAll(patchEntityRegistry.getAspectSpecs()); + } + // Merge Entity Specs for (Map.Entry e2Entry : patchEntityRegistry.getEntitySpecs().entrySet()) { if (entityNameToSpec.containsKey(e2Entry.getKey())) { diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/OpenApiSpecBuilderTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/OpenApiSpecBuilderTest.java new file mode 100644 index 00000000000000..c482b75956c191 --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/OpenApiSpecBuilderTest.java @@ -0,0 +1,580 @@ +package com.linkedin.metadata.models; + +import static org.testng.Assert.assertEquals; + +import com.datahub.test.TestEntityProfile; +import com.fasterxml.jackson.databind.JsonNode; +import com.github.fge.processing.ProcessingUtil; +import com.google.common.collect.ImmutableSet; +import com.linkedin.data.avro.SchemaTranslator; +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.models.registry.MergedEntityRegistry; +import com.linkedin.metadata.models.registry.PluginEntityRegistryLoader; +import com.linkedin.metadata.models.registry.TestConstants; +import io.swagger.v3.core.util.Json; +import io.swagger.v3.core.util.Yaml; +import io.swagger.v3.oas.models.Components; +import io.swagger.v3.oas.models.OpenAPI; +import io.swagger.v3.oas.models.Operation; +import io.swagger.v3.oas.models.PathItem; +import io.swagger.v3.oas.models.Paths; +import io.swagger.v3.oas.models.info.Info; +import io.swagger.v3.oas.models.media.Content; +import io.swagger.v3.oas.models.media.MediaType; +import io.swagger.v3.oas.models.media.Schema; +import io.swagger.v3.oas.models.parameters.Parameter; +import io.swagger.v3.oas.models.parameters.RequestBody; +import io.swagger.v3.oas.models.responses.ApiResponse; +import io.swagger.v3.oas.models.responses.ApiResponses; +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +@SuppressWarnings({"rawtypes", "unchecked"}) +public class OpenApiSpecBuilderTest { + private static final String MODEL_VERSION = "_test"; + private static final String TYPE_OBJECT = "object"; + private static final String TYPE_BOOLEAN = "boolean"; + private static final String TYPE_STRING = "string"; + private static final String TYPE_ARRAY = "array"; + private static final String TYPE_INTEGER = "integer"; + private static final String NAME_QUERY = "query"; + private static final String NAME_SYSTEM_METADATA = "systemMetadata"; + private static final String NAME_SCROLL_ID = "scrollId"; + private static final String PROPERTY_VALUE = "value"; + private static final String PROPERTY_URN = "urn"; + private static final String PATH_DEFINITIONS = "#/components/schemas/"; + private static final String FORMAT_PATH_DEFINITIONS = "#/components/schemas/%s%s"; + private static final String ASPECT_DESCRIPTION = "Aspect wrapper object."; + private static final String REQUEST_SUFFIX = "Request" + MODEL_VERSION; + private static final String RESPONSE_SUFFIX = "Response" + MODEL_VERSION; + + private static final String ASPECT_REQUEST_SUFFIX = "Aspect" + REQUEST_SUFFIX; + private static final String ASPECT_RESPONSE_SUFFIX = "Aspect" + RESPONSE_SUFFIX; + private static final String ENTITY_REQUEST_SUFFIX = "Entity" + REQUEST_SUFFIX; + private static final String ENTITY_RESPONSE_SUFFIX = "Entity" + RESPONSE_SUFFIX; + private static final ImmutableSet SUPPORTED_ASPECT_PATHS = + ImmutableSet.builder() + .add("domains") + .add("ownership") + .add("deprecation") + .add("status") + .add("globalTags") + .add("glossaryTerms") + .add("dataContractInfo") + .add("browsePathsV2") + .add("datasetProperties") + .add("editableDatasetProperties") + .add("chartInfo") + .add("editableChartProperties") + .add("dashboardInfo") + .add("editableDashboardProperties") + .add("notebookInfo") + .add("editableNotebookProperties") + .add("dataProductProperties") + .add("institutionalMemory") + .build(); + + @BeforeTest + public void disableAssert() { + PathSpecBasedSchemaAnnotationVisitor.class + .getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + @Test + public void testOpenApiSpecBuilder() throws Exception { + ConfigEntityRegistry configEntityRegistry = + new ConfigEntityRegistry( + TestEntityProfile.class.getClassLoader().getResourceAsStream("entity-registry.yml")); + MergedEntityRegistry er = new MergedEntityRegistry(configEntityRegistry); + new PluginEntityRegistryLoader(TestConstants.BASE_DIRECTORY, 60) + .withBaseRegistry(er) + .start(true); + + OpenAPI openAPI = generateOpenApiSpec(er); + String openapiYaml = Yaml.pretty(openAPI); + Files.write( + Path.of(getClass().getResource("/").getPath(), "open-api.yaml"), + openapiYaml.getBytes(StandardCharsets.UTF_8)); + + assertEquals(openAPI.getComponents().getSchemas().size(), 914); + assertEquals(openAPI.getComponents().getParameters().size(), 56); + assertEquals(openAPI.getPaths().size(), 102); + } + + private OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { + final Set aspectNames = entityRegistry.getAspectSpecs().keySet(); + final Set entityNames = + entityRegistry.getEntitySpecs().values().stream() + .filter(e -> aspectNames.contains(e.getKeyAspectName())) + .map(EntitySpec::getName) + .collect(Collectors.toSet()); + final Set definitionNames = + Stream.concat(aspectNames.stream(), entityNames.stream()).collect(Collectors.toSet()); + // Info + final Info info = new Info(); + info.setTitle("Entity API"); + info.setDescription("This is a service for DataHub Entities."); + info.setVersion("v3"); + // Components + final Components components = new Components(); + // --> Aspect components + // TODO: Correct handling of SystemMetadata and SortOrder + components.addSchemas("SystemMetadata", new Schema().type(TYPE_STRING)); + components.addSchemas("SortOrder", new Schema()._enum(List.of("ASCENDING", "DESCENDING"))); + entityRegistry + .getAspectSpecs() + .values() + .forEach( + a -> { + final String upperAspectName = a.getPegasusSchema().getName(); + addAspectSchemas(components, a); + components.addSchemas( + upperAspectName + ASPECT_REQUEST_SUFFIX, + buildAspectRefSchema(upperAspectName, false)); + components.addSchemas( + upperAspectName + ASPECT_RESPONSE_SUFFIX, + buildAspectRefSchema(upperAspectName, true)); + }); + // --> Entity components + entityRegistry.getEntitySpecs().values().stream() + .filter(e -> aspectNames.contains(e.getKeyAspectName())) + .forEach( + e -> { + final String entityName = toUpperFirst(e.getName()); + components.addSchemas( + entityName + ENTITY_REQUEST_SUFFIX, buildEntitySchema(e, aspectNames, false)); + components.addSchemas( + entityName + ENTITY_RESPONSE_SUFFIX, buildEntitySchema(e, aspectNames, true)); + components.addSchemas( + "Scroll" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityScrollSchema(e)); + }); + // Parameters + entityRegistry.getEntitySpecs().values().stream() + .filter(e -> definitionNames.contains(e.getKeyAspectName())) + .forEach( + e -> { + final String parameterName = toUpperFirst(e.getName()) + "Aspects"; + components.addParameters( + parameterName + MODEL_VERSION, buildParameterSchema(e, definitionNames)); + }); + addExtraParameters(components); + // Path + final Paths paths = new Paths(); + entityRegistry.getEntitySpecs().values().stream() + .filter(e -> definitionNames.contains(e.getName())) + .forEach( + e -> { + paths.addPathItem( + String.format("/%s", e.getName().toLowerCase()), buildListEntityPath(e)); + paths.addPathItem( + String.format("/%s/{urn}", e.getName().toLowerCase()), buildListEntityPath(e)); + }); + entityRegistry.getEntitySpecs().values().stream() + .filter(e -> definitionNames.contains(e.getName())) + .forEach( + e -> { + e.getAspectSpecs().stream() + .filter(a -> SUPPORTED_ASPECT_PATHS.contains(a.getName())) + .filter(a -> definitionNames.contains(toUpperFirst(a.getName()))) + .forEach( + a -> + paths.addPathItem( + String.format( + "/%s/{urn}/%s", + e.getName().toLowerCase(), a.getName().toLowerCase()), + buildSingleEntityAspectPath( + e, a.getName(), a.getPegasusSchema().getName()))); + }); + return new OpenAPI().openapi("3.0.1").info(info).paths(paths).components(components); + } + + private PathItem buildListEntityPath(final EntitySpec entity) { + final String upperFirst = toUpperFirst(entity.getName()); + final String aspectParameterName = upperFirst + "Aspects"; + final PathItem result = new PathItem(); + final List parameters = + List.of( + new Parameter() + .in(NAME_QUERY) + .name("systemMetadata") + .description("Include systemMetadata with response.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(false)), + new Parameter() + .$ref( + String.format( + "#/components/parameters/%s", aspectParameterName + MODEL_VERSION)), + new Parameter().$ref("#/components/parameters/PaginationCount" + MODEL_VERSION), + new Parameter().$ref("#/components/parameters/ScrollId" + MODEL_VERSION), + new Parameter().$ref("#/components/parameters/SortBy" + MODEL_VERSION), + new Parameter().$ref("#/components/parameters/SortOrder" + MODEL_VERSION), + new Parameter().$ref("#/components/parameters/ScrollQuery" + MODEL_VERSION)); + final ApiResponse successApiResponse = + new ApiResponse() + .description("Success") + .content( + new Content() + .addMediaType( + "application/json", + new MediaType() + .schema( + new Schema() + .$ref( + String.format( + "#/components/schemas/Scroll%s%s", + upperFirst, ENTITY_RESPONSE_SUFFIX))))); + result.setGet( + new Operation() + .summary(String.format("Scroll %s.", upperFirst)) + .operationId("scroll") + .parameters(parameters) + .tags(List.of(entity.getName() + " Entity")) + .responses(new ApiResponses().addApiResponse("200", successApiResponse))); + final Content requestContent = + new Content() + .addMediaType( + "application/json", + new MediaType() + .schema( + new Schema() + .type(TYPE_ARRAY) + .items( + new Schema() + .$ref( + String.format( + "#/components/schemas/%s%s", + upperFirst, ENTITY_REQUEST_SUFFIX))))); + final ApiResponse apiResponse = + new ApiResponse() + .description("Create " + entity.getName() + " entities.") + .content( + new Content() + .addMediaType( + "application/json", + new MediaType() + .schema( + new Schema() + .type(TYPE_ARRAY) + .items( + new Schema<>() + .$ref( + String.format( + "#/components/schemas/%s%s", + upperFirst, ENTITY_RESPONSE_SUFFIX)))))); + result.setPost( + new Operation() + .summary("Create " + upperFirst) + .operationId("create") + .tags(List.of(entity.getName() + " Entity")) + .requestBody( + new RequestBody() + .description("Create " + entity.getName() + " entities.") + .required(true) + .content(requestContent)) + .responses(new ApiResponses().addApiResponse("201", apiResponse))); + return result; + } + + private void addExtraParameters(final Components components) { + components.addParameters( + "ScrollId" + MODEL_VERSION, + new Parameter() + .in(NAME_QUERY) + .name(NAME_SCROLL_ID) + .description("Scroll pagination token.") + .schema(new Schema().type(TYPE_STRING))); + components.addParameters( + "SortBy" + MODEL_VERSION, + new Parameter() + .in(NAME_QUERY) + .name("sort") + .explode(true) + .description("Sort fields for pagination.") + .example(PROPERTY_URN) + .schema( + new Schema() + .type(TYPE_ARRAY) + ._default(PROPERTY_URN) + .items( + new Schema<>() + .type(TYPE_STRING) + ._enum(List.of(PROPERTY_URN)) + ._default(PROPERTY_URN)))); + components.addParameters( + "SortOrder" + MODEL_VERSION, + new Parameter() + .in(NAME_QUERY) + .name("sortOrder") + .explode(true) + .description("Sort direction field for pagination.") + .example("ASCENDING") + .schema(new Schema()._default("ASCENDING").$ref("#/components/schemas/SortOrder"))); + components.addParameters( + "PaginationCount" + MODEL_VERSION, + new Parameter() + .in(NAME_QUERY) + .name("count") + .description("Number of items per page.") + .example("10") + .schema(new Schema().type(TYPE_INTEGER)._default(10).minimum(new BigDecimal(1)))); + components.addParameters( + "ScrollQuery" + MODEL_VERSION, + new Parameter() + .in(NAME_QUERY) + .name(NAME_QUERY) + .description("Structured search query.") + .example("*") + .schema(new Schema().type(TYPE_STRING)._default("*"))); + } + + private Parameter buildParameterSchema( + final EntitySpec entity, final Set definitionNames) { + final List aspectNames = + entity.getAspectSpecs().stream() + .map(AspectSpec::getName) + .filter(definitionNames::contains) // Only if aspect is defined + .distinct() + .collect(Collectors.toList()); + if (aspectNames.isEmpty()) { + aspectNames.add(entity.getKeyAspectName()); + } + final Schema schema = + new Schema() + .type(TYPE_ARRAY) + .items(new Schema().type(TYPE_STRING)._enum(aspectNames)._default(aspectNames)); + return new Parameter() + .in(NAME_QUERY) + .name("aspects") + .explode(true) + .description("Aspects to include in response.") + .example(aspectNames) + .schema(schema); + } + + private void addAspectSchemas(final Components components, final AspectSpec aspect) { + final org.apache.avro.Schema avroSchema = + SchemaTranslator.dataToAvroSchema(aspect.getPegasusSchema().getDereferencedDataSchema()); + try { + final JsonNode apiSchema = ProcessingUtil.buildResult(avroSchema.toString()); + final JsonNode definitions = apiSchema.get("definitions"); + definitions + .fieldNames() + .forEachRemaining( + n -> { + try { + final String definition = Json.mapper().writeValueAsString(definitions.get(n)); + final String newDefinition = + definition.replaceAll("definitions", "components/schemas"); + Schema s = Json.mapper().readValue(newDefinition, Schema.class); + components.addSchemas(n, s); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private Schema buildAspectRefSchema(final String aspectName, final boolean withSystemMetadata) { + final Schema result = + new Schema<>() + .type(TYPE_OBJECT) + .description(ASPECT_DESCRIPTION) + .required(List.of(PROPERTY_VALUE)) + .addProperty(PROPERTY_VALUE, new Schema<>().$ref(PATH_DEFINITIONS + aspectName)); + if (withSystemMetadata) { + result.addProperty( + "systemMetadata", + new Schema<>() + .$ref(PATH_DEFINITIONS + "SystemMetadata") + .description("System metadata for the aspect.")); + } + return result; + } + + private Schema buildEntitySchema( + final EntitySpec entity, Set aspectNames, final boolean withSystemMetadata) { + final Map properties = + entity.getAspectSpecMap().entrySet().stream() + .filter(a -> aspectNames.contains(a.getValue().getName())) + .collect( + Collectors.toMap( + Map.Entry::getKey, + a -> + buildAspectRef( + a.getValue().getPegasusSchema().getName(), withSystemMetadata))); + properties.put( + PROPERTY_URN, + new Schema<>().type(TYPE_STRING).description("Unique id for " + entity.getName())); + properties.put( + entity.getKeyAspectName(), + buildAspectRef(entity.getKeyAspectSpec().getPegasusSchema().getName(), withSystemMetadata)); + return new Schema<>() + .type(TYPE_OBJECT) + .description(toUpperFirst(entity.getName()) + " object.") + .required(List.of(PROPERTY_URN)) + .properties(properties); + } + + private Schema buildEntityScrollSchema(final EntitySpec entity) { + return new Schema<>() + .type(TYPE_OBJECT) + .description("Scroll across " + toUpperFirst(entity.getName()) + " objects.") + .required(List.of("entities")) + .addProperty( + NAME_SCROLL_ID, + new Schema<>().type(TYPE_STRING).description("Scroll id for pagination.")) + .addProperty( + "entities", + new Schema<>() + .type(TYPE_ARRAY) + .description(toUpperFirst(entity.getName()) + " object.") + .items( + new Schema<>() + .$ref( + String.format( + "#/components/schemas/%s%s", + toUpperFirst(entity.getName()), ENTITY_RESPONSE_SUFFIX)))); + } + + private Schema buildAspectRef(final String aspect, final boolean withSystemMetadata) { + final Schema result = new Schema<>(); + if (withSystemMetadata) { + result.set$ref( + String.format(FORMAT_PATH_DEFINITIONS, toUpperFirst(aspect), ASPECT_RESPONSE_SUFFIX)); + } else { + result.set$ref( + String.format(FORMAT_PATH_DEFINITIONS, toUpperFirst(aspect), ASPECT_REQUEST_SUFFIX)); + } + return result; + } + + private PathItem buildSingleEntityAspectPath( + final EntitySpec entity, final String aspect, final String upperFirstAspect) { + final String upperFirstEntity = toUpperFirst(entity.getName()); + + List tags = List.of(aspect + " Aspect"); + // Get Operation + final Parameter getParameter = + new Parameter() + .in(NAME_QUERY) + .name(NAME_SYSTEM_METADATA) + .description("Include systemMetadata with response.") + .schema(new Schema().type(TYPE_BOOLEAN)._default(false)); + final ApiResponse successApiResponse = + new ApiResponse() + .description("Success") + .content( + new Content() + .addMediaType( + "application/json", + new MediaType() + .schema( + new Schema() + .$ref( + String.format( + "#/components/schemas/%s%s", + upperFirstAspect, ASPECT_RESPONSE_SUFFIX))))); + final Operation getOperation = + new Operation() + .summary(String.format("Get %s for %s.", aspect, entity.getName())) + .operationId(String.format("get%s", upperFirstAspect)) + .tags(tags) + .parameters(List.of(getParameter)) + .responses(new ApiResponses().addApiResponse("200", successApiResponse)); + // Head Operation + final ApiResponse successHeadResponse = + new ApiResponse() + .description(String.format("%s on %s exists.", aspect, entity.getName())) + .content(new Content().addMediaType("application/json", new MediaType())); + final ApiResponse notFoundHeadResponse = + new ApiResponse() + .description(String.format("%s on %s does not exist.", aspect, entity.getName())) + .content(new Content().addMediaType("application/json", new MediaType())); + final Operation headOperation = + new Operation() + .summary(String.format("%s on %s existence.", aspect, upperFirstEntity)) + .operationId(String.format("head%s", upperFirstAspect)) + .tags(tags) + .responses( + new ApiResponses() + .addApiResponse("200", successHeadResponse) + .addApiResponse("404", notFoundHeadResponse)); + // Delete Operation + final ApiResponse successDeleteResponse = + new ApiResponse() + .description(String.format("Delete %s on %s entity.", aspect, upperFirstEntity)) + .content(new Content().addMediaType("application/json", new MediaType())); + final Operation deleteOperation = + new Operation() + .summary(String.format("Delete %s on entity %s", aspect, upperFirstEntity)) + .operationId(String.format("delete%s", upperFirstAspect)) + .tags(tags) + .responses(new ApiResponses().addApiResponse("200", successDeleteResponse)); + // Post Operation + final ApiResponse successPostResponse = + new ApiResponse() + .description(String.format("Create aspect %s on %s entity.", aspect, upperFirstEntity)) + .content( + new Content() + .addMediaType( + "application/json", + new MediaType() + .schema( + new Schema() + .$ref( + String.format( + "#/components/schemas/%s%s", + upperFirstAspect, ASPECT_RESPONSE_SUFFIX))))); + final RequestBody requestBody = + new RequestBody() + .description(String.format("Create aspect %s on %s entity.", aspect, upperFirstEntity)) + .required(true) + .content( + new Content() + .addMediaType( + "application/json", + new MediaType() + .schema( + new Schema() + .$ref( + String.format( + "#/components/schemas/%s%s", + upperFirstAspect, ASPECT_REQUEST_SUFFIX))))); + final Operation postOperation = + new Operation() + .summary(String.format("Create aspect %s on %s ", aspect, upperFirstEntity)) + .operationId(String.format("create%s", upperFirstAspect)) + .tags(tags) + .requestBody(requestBody) + .responses(new ApiResponses().addApiResponse("201", successPostResponse)); + return new PathItem() + .parameters( + List.of( + new Parameter() + .in("path") + .name("urn") + .required(true) + .schema(new Schema().type(TYPE_STRING)))) + .get(getOperation) + .head(headOperation) + .delete(deleteOperation) + .post(postOperation); + } + + private static String toUpperFirst(final String s) { + return s.substring(0, 1).toUpperCase() + s.substring(1); + } +} diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md index 0acc134d4ef00b..5421a932daccee 100644 --- a/metadata-ingestion/docs/transformer/dataset_transformer.md +++ b/metadata-ingestion/docs/transformer/dataset_transformer.md @@ -22,7 +22,7 @@ The below table shows transformer which can transform aspects of entity [Dataset |-----------------------------|----------|---------|---------------|---------------------------------------------| | `tag_pattern` | | str | | Regex to use for tags to match against. Supports Regex to match a pattern which is used to remove content. Rest of string is considered owner ID for creating owner URN. | | `is_user` | | bool | `true` | Whether should be consider a user or not. If `false` then considered a group. | -| `owner_character_mapping` | | dict[str, str] | | A mapping of extracted owner character to datahub owner character. | +| `tag_character_mapping` | | dict[str, str] | | A mapping of tag character to datahub owner character. If provided, `tag_pattern` config should be matched against converted tag as per mapping| | `email_domain` | | str | | If set then this is appended to create owner URN. | | `extract_owner_type_from_tag_pattern` | | str | `false` | Whether to extract an owner type from provided tag pattern first group. If `true`, no need to provide owner_type and owner_type_urn config. For example: if provided tag pattern is `(.*)_owner_email:` and actual tag is `developer_owner_email`, then extracted owner type will be `developer`.| | `owner_type` | | str | `TECHNICAL_OWNER` | Ownership type. | @@ -40,14 +40,14 @@ transformers: ``` So if we have input dataset tag like -- `urn:li:tag:dataset_owner_email:abc@email.com` -- `urn:li:tag:dataset_owner_email:xyz@email.com` +- `urn:li:tag:owner_email:abc@email.com` +- `urn:li:tag:owner_email:xyz@email.com` The portion of the tag after the matched tag pattern will be converted into an owner. Hence users `abc@email.com` and `xyz@email.com` will be added as owners. ### Examples -- Add owners, however owner should be considered as group and also email domain not provided in tag string. For example: from tag urn `urn:li:tag:dataset_owner:abc` extracted owner urn should be `urn:li:corpGroup:abc@email.com` then config would look like this: +- Add owners, however owner should be considered as group and also email domain not provided in tag string. For example: from tag urn `urn:li:tag:owner:abc` extracted owner urn should be `urn:li:corpGroup:abc@email.com` then config would look like this: ```yaml transformers: - type: "extract_ownership_from_tags" @@ -56,7 +56,7 @@ The portion of the tag after the matched tag pattern will be converted into an o is_user: false email_domain: "email.com" ``` -- Add owners, however owner type and owner type urn wanted to provide externally. For example: from tag urn `urn:li:tag:dataset_owner_email:abc@email.com` owner type should be `CUSTOM` and owner type urn as `"urn:li:ownershipType:data_product"` then config would look like this: +- Add owners, however owner type and owner type urn wanted to provide externally. For example: from tag urn `urn:li:tag:owner_email:abc@email.com` owner type should be `CUSTOM` and owner type urn as `"urn:li:ownershipType:data_product"` then config would look like this: ```yaml transformers: - type: "extract_ownership_from_tags" @@ -65,15 +65,17 @@ The portion of the tag after the matched tag pattern will be converted into an o owner_type: "CUSTOM" owner_type_urn: "urn:li:ownershipType:data_product" ``` -- Add owners, however some owner characters needs to replace with some other characters before ingestion. For example: from tag urn `urn:li:tag:dataset_owner_email:abc_xyz-email_com` extracted owner urn should be `urn:li:corpGroup:abc.xyz@email.com` then config would look like this: +- Add owners, however some tag characters needs to replace with some other characters before extracting owner. For example: from tag urn `urn:li:tag:owner__email:abc--xyz-email_com` extracted owner urn should be `urn:li:corpGroup:abc.xyz@email.com` then config would look like this: ```yaml transformers: - type: "extract_ownership_from_tags" config: tag_pattern: "owner_email:" - owner_character_mapping: - "_": ".", - "-": "@", + tag_character_mapping: + "_": "." + "-": "@" + "--": "-" + "__": "_" ``` - Add owners, however owner type also need to extracted from tag pattern. For example: from tag urn `urn:li:tag:data_producer_owner_email:abc@email.com` extracted owner type should be `data_producer` then config would look like this: ```yaml diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 33325b26d4e158..bc70c1d8cee208 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -266,6 +266,9 @@ # Version 2.4.0 includes sqlalchemy dialect, 2.8.0 includes some bug fixes # Version 3.0.0 required SQLAlchemy > 2.0.21 "databricks-sql-connector>=2.8.0,<3.0.0", + # Due to https://github.com/databricks/databricks-sql-python/issues/326 + # databricks-sql-connector<3.0.0 requires pandas<2.2.0 + "pandas<2.2.0", } mysql = sql_common | {"pymysql>=1.0.2"} diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py index e509b4b719166b..27311ff998cbf9 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py @@ -26,7 +26,7 @@ class ExtractOwnersFromTagsConfig(ConfigModel): tag_pattern: str = "" is_user: bool = True - owner_character_mapping: Optional[Dict[str, str]] = None + tag_character_mapping: Optional[Dict[str, str]] = None email_domain: Optional[str] = None extract_owner_type_from_tag_pattern: bool = False owner_type: str = "TECHNICAL_OWNER" @@ -70,18 +70,35 @@ def get_owner_urn(self, owner_str: str) -> str: return owner_str + "@" + self.config.email_domain return owner_str - def convert_owner_as_per_mapping(self, owner: str) -> str: - if self.config.owner_character_mapping: - # Sort the provided mapping by its length. - # Eg: Suppose we have {"_":".", "__":"#"} character mapping. - # In this case "__" character should get replace first compare to "_" character. - for key in sorted( - self.config.owner_character_mapping.keys(), + def convert_tag_as_per_mapping(self, tag: str) -> str: + """ + Function to modify tag as per provided tag character mapping. It also handles the overlappings in the mapping. + Eg: '--':'-' & '-':'@' should not cause incorrect mapping. + """ + if self.config.tag_character_mapping: + # indices list to keep track of the indices where replacements have been made + indices: List[int] = list() + for old_char in sorted( + self.config.tag_character_mapping.keys(), key=len, reverse=True, ): - owner = owner.replace(key, self.config.owner_character_mapping[key]) - return owner + new_char = self.config.tag_character_mapping[old_char] + index = tag.find(old_char) + while index != -1: + if index not in indices: + tag = tag[:index] + new_char + tag[index + len(old_char) :] + # Adjust indices for overlapping replacements + indices = [ + each + (len(new_char) - len(old_char)) + if each > index + else each + for each in indices + ] + indices.append(index) + # Find the next occurrence of old_char, starting from the next index + index = tag.find(old_char, index + len(new_char)) + return tag def handle_end_of_stream( self, @@ -100,10 +117,10 @@ def transform_aspect( for tag_class in tags: tag_str = TagUrn.from_string(tag_class.tag).name + tag_str = self.convert_tag_as_per_mapping(tag_str) re_match = re.search(self.config.tag_pattern, tag_str) if re_match: owner_str = tag_str[re_match.end() :].strip() - owner_str = self.convert_owner_as_per_mapping(owner_str) owner_urn_str = self.get_owner_urn(owner_str) owner_urn = ( str(CorpuserUrn(owner_urn_str)) diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index c31ec12abfbd71..3782eb0e275f31 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -742,20 +742,18 @@ def _test_owner( expected_owner_type_urn="urn:li:ownershipType:ad8557d6-dcb9-4d2a-83fc-b7d0d54f3e0f", ) _test_owner( - tag="data_producer_owner_email:abc_xyz-email_com", + tag="data__producer__owner__email:abc--xyz-email_com", config={ "tag_pattern": "(.*)_owner_email:", - "owner_character_mapping": { + "tag_character_mapping": { "_": ".", "-": "@", "__": "_", "--": "-", - "_-": "#", - "-_": " ", }, "extract_owner_type_from_tag_pattern": True, }, - expected_owner="urn:li:corpuser:abc.xyz@email.com", + expected_owner="urn:li:corpuser:abc-xyz@email.com", expected_owner_type=OwnershipTypeClass.CUSTOM, expected_owner_type_urn="urn:li:ownershipType:data_producer", )