From 333799c338d386db453e579586503e1d6f779612 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:53:54 -0500 Subject: [PATCH] feat(strucutred-properties): structured properties delete and schema change support (#10711) --- .../graphql/resolvers/ResolverUtils.java | 36 +- .../assertion/AssertionRunEventResolver.java | 12 +- .../auth/ListAccessTokensResolver.java | 5 +- .../resolvers/chart/BrowseV2Resolver.java | 4 +- .../ListDataProductAssetsResolver.java | 5 +- .../domain/DomainEntitiesResolver.java | 4 +- .../CreateDynamicFormAssignmentResolver.java | 4 +- .../source/ListIngestionSourcesResolver.java | 5 +- .../load/TimeSeriesAspectResolver.java | 8 +- .../resolvers/mutate/util/FormUtils.java | 9 +- .../ownership/ListOwnershipTypesResolver.java | 5 +- .../policy/ListPoliciesResolver.java | 6 +- .../resolvers/query/ListQueriesResolver.java | 8 +- .../ListRecommendationsResolver.java | 10 +- .../AggregateAcrossEntitiesResolver.java | 4 +- .../search/AutoCompleteResolver.java | 8 +- .../resolvers/search/AutocompleteUtils.java | 7 +- .../search/ScrollAcrossEntitiesResolver.java | 4 +- .../search/ScrollAcrossLineageResolver.java | 5 +- .../search/SearchAcrossEntitiesResolver.java | 5 +- .../search/SearchAcrossLineageResolver.java | 5 +- .../resolvers/search/SearchResolver.java | 5 +- .../resolvers/view/CreateViewResolver.java | 4 +- .../view/ListGlobalViewsResolver.java | 8 +- .../resolvers/view/ListMyViewsResolver.java | 13 +- .../resolvers/view/UpdateViewResolver.java | 3 +- .../graphql/resolvers/view/ViewUtils.java | 25 +- .../graphql/resolvers/ResolverUtilsTest.java | 14 +- .../AssertionRunEventResolverTest.java | 6 +- .../auth/ListAccessTokensResolverTest.java | 2 +- .../browse/BrowseV2ResolverTest.java | 4 +- .../query/ListQueriesResolverTest.java | 2 +- .../graphql/resolvers/view/ViewUtilsTest.java | 4 +- datahub-upgrade/build.gradle | 21 + .../upgrade/config/BuildIndicesConfig.java | 7 +- .../upgrade/config/CleanIndicesConfig.java | 7 +- .../system/elasticsearch/BuildIndices.java | 75 +- .../system/elasticsearch/CleanIndices.java | 20 +- .../steps/BuildIndicesPostStep.java | 15 +- .../steps/BuildIndicesPreStep.java | 71 +- .../elasticsearch/steps/BuildIndicesStep.java | 11 +- .../elasticsearch/steps/CleanIndicesStep.java | 11 +- .../system/elasticsearch/util/IndexUtils.java | 21 +- docs/api/tutorials/structured-properties.md | 1024 ++++++++++++++++- docs/deploy/environment-vars.md | 36 +- entity-registry/build.gradle | 4 + .../metadata/aspect/RetrieverContext.java | 4 + .../metadata/aspect/batch/AspectsBatch.java | 10 + .../aspect/plugins/PluginFactory.java | 25 + .../aspect/plugins/hooks/MCPSideEffect.java | 36 + .../metadata/entity/SearchRetriever.java | 24 + .../models/StructuredPropertyUtils.java | 167 ++- .../plugins/hooks/MCPSideEffectTest.java | 8 + .../metadata/aspect/MockAspectRetriever.java | 5 +- .../metadata/aspect/TestEntityRegistry.java | 0 .../test/metadata/aspect/batch/TestMCL.java | 32 + .../test/metadata/aspect/batch/TestMCP.java | 0 .../java/com/linkedin/metadata/Constants.java | 5 + metadata-ingestion/scripts/modeldocgen.py | 1 + metadata-io/build.gradle | 2 + .../entity/ebean/batch/PatchItemImpl.java | 26 + .../aspect/utils/DefaultAspectsUtil.java | 2 +- .../metadata/entity/EntityServiceImpl.java | 82 +- .../graph/dgraph/DgraphGraphService.java | 3 - .../elastic/ElasticSearchGraphService.java | 19 +- .../graph/neo4j/Neo4jGraphService.java | 5 - .../candidatesource/MostPopularSource.java | 4 +- .../candidatesource/RecentlyEditedSource.java | 13 +- .../candidatesource/RecentlyViewedSource.java | 13 +- .../search/SearchServiceSearchRetriever.java | 51 + .../elasticsearch/ElasticSearchService.java | 21 +- .../indexbuilder/ESIndexBuilder.java | 8 +- .../indexbuilder/EntityIndexBuilders.java | 32 +- .../indexbuilder/MappingsBuilder.java | 28 +- .../indexbuilder/ReindexConfig.java | 146 ++- .../elasticsearch/query/ESBrowseDAO.java | 12 +- .../elasticsearch/query/ESSearchDAO.java | 38 +- .../request/AggregationQueryBuilder.java | 70 +- .../request/AutocompleteRequestHandler.java | 17 +- .../query/request/SearchQueryBuilder.java | 25 +- .../query/request/SearchRequestHandler.java | 19 +- .../SearchDocumentTransformer.java | 63 +- .../metadata/search/utils/ESUtils.java | 194 ++-- .../BusinessAttributeUpdateHookService.java | 2 +- .../service/UpdateIndicesService.java | 32 +- .../metadata/shared/ElasticSearchIndexed.java | 16 +- .../PropertyDefinitionDeleteSideEffect.java | 201 ++++ .../hooks/StructuredPropertiesSoftDelete.java | 2 +- .../PropertyDefinitionValidator.java | 97 +- .../StructuredPropertiesValidator.java | 39 +- .../ElasticSearchSystemMetadataService.java | 20 +- .../ElasticSearchTimeseriesAspectService.java | 50 +- .../TimeseriesAspectIndexBuilders.java | 14 +- .../elastic/query/ESAggregatedStatsDAO.java | 12 +- .../metadata/AspectIngestionUtils.java | 6 +- .../entity/CassandraEntityServiceTest.java | 4 +- .../entity/EbeanEntityServiceTest.java | 4 +- .../metadata/entity/EntityServiceTest.java | 52 +- .../graph/dgraph/DgraphContainer.java | 3 - .../search/SearchGraphServiceTestBase.java | 4 +- .../search/LineageServiceTestBase.java | 2 +- .../search/SearchServiceTestBase.java | 3 +- .../metadata/search/TestEntityTestBase.java | 3 +- .../indexbuilder/IndexBuilderTestBase.java | 9 + .../indexbuilder/MappingsBuilderTest.java | 176 ++- .../request/AggregationQueryBuilderTest.java | 201 +++- .../AutocompleteRequestHandlerTest.java | 23 +- .../request/SearchRequestHandlerTest.java | 36 +- .../SearchDocumentTransformerTest.java | 5 + .../metadata/search/utils/ESUtilsTest.java | 188 ++- ...ropertyDefinitionDeleteSideEffectTest.java | 193 ++++ .../StructuredPropertiesSoftDeleteTest.java | 2 +- .../PropertyDefinitionValidatorTest.java | 187 +-- .../StructuredPropertiesValidatorTest.java | 4 +- .../SystemMetadataServiceTestBase.java | 3 +- .../TimeseriesAspectServiceTestBase.java | 3 +- .../SampleDataFixtureConfiguration.java | 4 +- .../SearchLineageFixtureConfiguration.java | 7 +- .../test/search/SearchTestUtils.java | 4 +- .../SearchTestContainerConfiguration.java | 1 + .../kafka/MaeConsumerApplication.java | 1 + .../hook/BusinessAttributeUpdateHookTest.java | 4 +- .../CustomDataQualityRulesMCPSideEffect.java | 8 + .../StructuredPropertyDefinition.pdl | 8 + .../src/main/resources/entity-registry.yml | 27 +- .../metadata/context/OperationContext.java | 10 + .../metadata/context/RetrieverContext.java | 2 + .../context/TestOperationContexts.java | 37 +- .../token/StatefulTokenServiceTest.java | 3 +- .../SystemOperationContextFactory.java | 20 +- .../ElasticSearchIndexBuilderFactory.java | 4 + .../IngestDataPlatformInstancesStep.java | 2 +- .../controller/GenericEntitiesController.java | 18 +- .../resources/entity/AspectResourceTest.java | 2 +- .../mock/MockTimeseriesAspectService.java | 3 - .../metadata/entity/EntityService.java | 10 +- .../metadata/entity/RollbackResult.java | 23 + .../metadata/entity/RollbackRunResult.java | 1 + .../metadata/entity/UpdateAspectResult.java | 33 +- .../linkedin/metadata/graph/GraphService.java | 2 +- .../metadata/search/EntitySearchService.java | 2 +- .../systemmetadata/SystemMetadataService.java | 2 +- .../timeseries/TimeseriesAspectService.java | 2 +- .../gms/servlet/ConfigSearchExport.java | 3 +- .../test_structured_properties.py | 133 ++- 145 files changed, 3826 insertions(+), 928 deletions(-) create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java rename entity-registry/src/{test => testFixtures}/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java (96%) rename entity-registry/src/{test => testFixtures}/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java (100%) create mode 100644 entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCL.java rename entity-registry/src/{test => testFixtures}/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java (100%) create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java rename {entity-registry/src/main/java/com/linkedin/metadata/aspect => metadata-io/src/main/java/com/linkedin/metadata/structuredproperties}/hooks/StructuredPropertiesSoftDelete.java (96%) rename {entity-registry/src/main/java/com/linkedin/metadata/aspect => metadata-io/src/main/java/com/linkedin/metadata/structuredproperties}/validation/PropertyDefinitionValidator.java (68%) rename {entity-registry/src/main/java/com/linkedin/metadata/aspect => metadata-io/src/main/java/com/linkedin/metadata/structuredproperties}/validation/StructuredPropertiesValidator.java (94%) create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java rename {entity-registry/src/test/java/com/linkedin/metadata/aspect => metadata-io/src/test/java/com/linkedin/metadata/structuredproperties}/hooks/StructuredPropertiesSoftDeleteTest.java (98%) rename {entity-registry/src/test/java/com/linkedin/metadata/aspect => metadata-io/src/test/java/com/linkedin/metadata/structuredproperties}/validators/PropertyDefinitionValidatorTest.java (70%) rename {entity-registry/src/test/java/com/linkedin/metadata/aspect => metadata-io/src/test/java/com/linkedin/metadata/structuredproperties}/validators/StructuredPropertiesValidatorTest.java (99%) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java index f2682ad050c86a..542745e0148628 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java @@ -15,6 +15,7 @@ import com.linkedin.datahub.graphql.generated.AndFilterInput; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.resolvers.search.SearchUtils; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -112,10 +113,11 @@ public static Map buildFacetFilters( return facetFilters; } - public static List criterionListFromAndFilter(List andFilters) { + public static List criterionListFromAndFilter( + List andFilters, @Nullable AspectRetriever aspectRetriever) { return andFilters != null && !andFilters.isEmpty() ? andFilters.stream() - .map(filter -> criterionFromFilter(filter)) + .map(filter -> criterionFromFilter(filter, aspectRetriever)) .collect(Collectors.toList()) : Collections.emptyList(); } @@ -124,13 +126,14 @@ public static List criterionListFromAndFilter(List // conjunctive criterion // arrays, rather than just one for the AND case. public static ConjunctiveCriterionArray buildConjunctiveCriterionArrayWithOr( - @Nonnull List orFilters) { + @Nonnull List orFilters, @Nullable AspectRetriever aspectRetriever) { return new ConjunctiveCriterionArray( orFilters.stream() .map( orFilter -> { CriterionArray andCriterionForOr = - new CriterionArray(criterionListFromAndFilter(orFilter.getAnd())); + new CriterionArray( + criterionListFromAndFilter(orFilter.getAnd(), aspectRetriever)); return new ConjunctiveCriterion().setAnd(andCriterionForOr); }) .collect(Collectors.toList())); @@ -138,7 +141,9 @@ public static ConjunctiveCriterionArray buildConjunctiveCriterionArrayWithOr( @Nullable public static Filter buildFilter( - @Nullable List andFilters, @Nullable List orFilters) { + @Nullable List andFilters, + @Nullable List orFilters, + @Nullable AspectRetriever aspectRetriever) { if ((andFilters == null || andFilters.isEmpty()) && (orFilters == null || orFilters.isEmpty())) { return null; @@ -147,30 +152,33 @@ public static Filter buildFilter( // Or filters are the new default. We will check them first. // If we have OR filters, we need to build a series of CriterionArrays if (orFilters != null && !orFilters.isEmpty()) { - return new Filter().setOr(buildConjunctiveCriterionArrayWithOr(orFilters)); + return new Filter().setOr(buildConjunctiveCriterionArrayWithOr(orFilters, aspectRetriever)); } // If or filters are not set, someone may be using the legacy and filters - final List andCriterions = criterionListFromAndFilter(andFilters); + final List andCriterions = criterionListFromAndFilter(andFilters, aspectRetriever); return new Filter() .setOr( new ConjunctiveCriterionArray( new ConjunctiveCriterion().setAnd(new CriterionArray(andCriterions)))); } - public static Criterion criterionFromFilter(final FacetFilterInput filter) { - return criterionFromFilter(filter, false); + public static Criterion criterionFromFilter( + final FacetFilterInput filter, @Nullable AspectRetriever aspectRetriever) { + return criterionFromFilter(filter, false, aspectRetriever); } // Translates a FacetFilterInput (graphql input class) into Criterion (our internal model) public static Criterion criterionFromFilter( - final FacetFilterInput filter, final Boolean skipKeywordSuffix) { + final FacetFilterInput filter, + final Boolean skipKeywordSuffix, + @Nullable AspectRetriever aspectRetriever) { Criterion result = new Criterion(); if (skipKeywordSuffix) { result.setField(filter.getField()); } else { - result.setField(getFilterField(filter.getField(), skipKeywordSuffix)); + result.setField(getFilterField(filter.getField(), skipKeywordSuffix, aspectRetriever)); } // `value` is deprecated in place of `values`- this is to support old query patterns. If values @@ -205,11 +213,13 @@ public static Criterion criterionFromFilter( } private static String getFilterField( - final String originalField, final boolean skipKeywordSuffix) { + final String originalField, + final boolean skipKeywordSuffix, + @Nullable AspectRetriever aspectRetriever) { if (KEYWORD_EXCLUDED_FILTERS.contains(originalField)) { return originalField; } - return ESUtils.toKeywordField(originalField, skipKeywordSuffix); + return ESUtils.toKeywordField(originalField, skipKeywordSuffix, aspectRetriever); } public static Filter buildFilterWithUrns(@Nonnull Set urns, @Nullable Filter inputFilters) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java index 18f8ad85668d8d..0e9d2cea611416 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolver.java @@ -15,6 +15,7 @@ import com.linkedin.datahub.graphql.types.dataset.mappers.AssertionRunEventMapper; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -67,7 +68,10 @@ public CompletableFuture get(DataFetchingEnvironment e maybeStartTimeMillis, maybeEndTimeMillis, maybeLimit, - buildFilter(maybeFilters, maybeStatus)); + buildFilter( + maybeFilters, + maybeStatus, + context.getOperationContext().getAspectRetriever())); // Step 2: Bind profiles into GraphQL strong types. List runEvents = @@ -120,7 +124,9 @@ public CompletableFuture get(DataFetchingEnvironment e @Nullable public static Filter buildFilter( - @Nullable FilterInput filtersInput, @Nullable final String status) { + @Nullable FilterInput filtersInput, + @Nullable final String status, + @Nullable AspectRetriever aspectRetriever) { if (filtersInput == null && status == null) { return null; } @@ -141,7 +147,7 @@ public static Filter buildFilter( .setAnd( new CriterionArray( facetFilters.stream() - .map(filter -> criterionFromFilter(filter, true)) + .map(filter -> criterionFromFilter(filter, true, aspectRetriever)) .collect(Collectors.toList()))))); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java index eaac2aedef03a7..dc57ed3c673c16 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolver.java @@ -70,7 +70,10 @@ public CompletableFuture get(DataFetchingEnvironment envi .withSearchFlags(flags -> flags.setFulltext(true)), Constants.ACCESS_TOKEN_ENTITY_NAME, "", - buildFilter(filters, Collections.emptyList()), + buildFilter( + filters, + Collections.emptyList(), + context.getOperationContext().getAspectRetriever()), sortCriterion, start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java index 18ee5f595ce582..b54ca398aef980 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java @@ -74,7 +74,9 @@ public CompletableFuture get(DataFetchingEnvironment environmen ? BROWSE_PATH_V2_DELIMITER + String.join(BROWSE_PATH_V2_DELIMITER, input.getPath()) : ""; - final Filter inputFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); + final Filter inputFilter = + ResolverUtils.buildFilter( + null, input.getOrFilters(), context.getOperationContext().getAspectRetriever()); BrowseResultV2 browseResults = _entityClient.browseV2( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java index 041de9f58db231..320d89cdec164a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataproduct/ListDataProductAssetsResolver.java @@ -132,7 +132,10 @@ public CompletableFuture get(DataFetchingEnvironment environment) // add urns from the aspect to our filters final Filter baseFilter = - ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); + ResolverUtils.buildFilter( + input.getFilters(), + input.getOrFilters(), + context.getOperationContext().getAspectRetriever()); final Filter finalFilter = buildFilterWithUrns(new HashSet<>(assetUrns), baseFilter); final SearchFlags searchFlags; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java index 9ca5de86034daa..75796f637525e5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java @@ -78,7 +78,9 @@ public CompletableFuture get(final DataFetchingEnvironment enviro .getFilters() .forEach( filter -> { - criteria.add(criterionFromFilter(filter, true)); + criteria.add( + criterionFromFilter( + filter, true, context.getOperationContext().getAspectRetriever())); }); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java index b9d74f8af660e8..3cf4d9175d45bf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/form/CreateDynamicFormAssignmentResolver.java @@ -33,7 +33,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment) final CreateDynamicFormAssignmentInput input = bindArgument(environment.getArgument("input"), CreateDynamicFormAssignmentInput.class); final Urn formUrn = UrnUtils.getUrn(input.getFormUrn()); - final DynamicFormAssignment formAssignment = FormUtils.mapDynamicFormAssignment(input); + final DynamicFormAssignment formAssignment = + FormUtils.mapDynamicFormAssignment( + input, context.getOperationContext().getAspectRetriever()); return GraphQLConcurrencyUtils.supplyAsync( () -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java index 8ead47aa65ceb0..1a2806224e4a92 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/ListIngestionSourcesResolver.java @@ -68,7 +68,10 @@ public CompletableFuture get( .withSearchFlags(flags -> flags.setFulltext(true)), Constants.INGESTION_SOURCE_ENTITY_NAME, query, - buildFilter(filters, Collections.emptyList()), + buildFilter( + filters, + Collections.emptyList(), + context.getOperationContext().getAspectRetriever()), null, start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java index 1839fd3cc57055..8fc26e3cec9d06 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/load/TimeSeriesAspectResolver.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.generated.TimeSeriesAspect; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; @@ -120,7 +121,7 @@ public CompletableFuture> get(DataFetchingEnvironment env maybeStartTimeMillis, maybeEndTimeMillis, maybeLimit, - buildFilters(maybeFilters), + buildFilters(maybeFilters, context.getOperationContext().getAspectRetriever()), maybeSort); // Step 2: Bind profiles into GraphQL strong types. @@ -135,7 +136,8 @@ public CompletableFuture> get(DataFetchingEnvironment env "get"); } - private Filter buildFilters(@Nullable FilterInput maybeFilters) { + private Filter buildFilters( + @Nullable FilterInput maybeFilters, @Nullable AspectRetriever aspectRetriever) { if (maybeFilters == null) { return null; } @@ -146,7 +148,7 @@ private Filter buildFilters(@Nullable FilterInput maybeFilters) { .setAnd( new CriterionArray( maybeFilters.getAnd().stream() - .map(filter -> criterionFromFilter(filter, true)) + .map(filter -> criterionFromFilter(filter, true, aspectRetriever)) .collect(Collectors.toList()))))); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java index 9a06682c87f78f..6caa858460c2f6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java @@ -6,6 +6,7 @@ import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.form.DynamicFormAssignment; import com.linkedin.form.FormInfo; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -15,6 +16,7 @@ import com.linkedin.structured.PrimitivePropertyValueArray; import java.util.Objects; import javax.annotation.Nonnull; +import javax.annotation.Nullable; public class FormUtils { @@ -44,13 +46,16 @@ public static PrimitivePropertyValueArray getStructuredPropertyValuesFromInput( /** Map a GraphQL CreateDynamicFormAssignmentInput to the GMS DynamicFormAssignment aspect */ @Nonnull public static DynamicFormAssignment mapDynamicFormAssignment( - @Nonnull final CreateDynamicFormAssignmentInput input) { + @Nonnull final CreateDynamicFormAssignmentInput input, + @Nullable AspectRetriever aspectRetriever) { Objects.requireNonNull(input, "input must not be null"); final DynamicFormAssignment result = new DynamicFormAssignment(); final Filter filter = new Filter() - .setOr(ResolverUtils.buildConjunctiveCriterionArrayWithOr(input.getOrFilters())); + .setOr( + ResolverUtils.buildConjunctiveCriterionArrayWithOr( + input.getOrFilters(), aspectRetriever)); result.setFilter(filter); return result; } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java index 0f0bb299eda837..9f6951e44dd735 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ownership/ListOwnershipTypesResolver.java @@ -63,7 +63,10 @@ public CompletableFuture get(DataFetchingEnvironment e context.getOperationContext().withSearchFlags(flags -> flags.setFulltext(true)), Constants.OWNERSHIP_TYPE_ENTITY_NAME, query, - buildFilter(filters, Collections.emptyList()), + buildFilter( + filters, + Collections.emptyList(), + context.getOperationContext().getAspectRetriever()), DEFAULT_SORT_CRITERION, start, count); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java index 4120401e0150f9..ce11451aa1913f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/policy/ListPoliciesResolver.java @@ -59,7 +59,11 @@ public CompletableFuture get(final DataFetchingEnvironment e log.debug( "User {} listing policies with filters {}", context.getActorUrn(), filters.toString()); - final Filter filter = ResolverUtils.buildFilter(facetFilters, Collections.emptyList()); + final Filter filter = + ResolverUtils.buildFilter( + facetFilters, + Collections.emptyList(), + context.getOperationContext().getAspectRetriever()); return _policyFetcher .fetchPolicies(context.getOperationContext(), start, query, count, filter) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java index b0d84942b12572..95be3a68e895c8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolver.java @@ -15,6 +15,7 @@ import com.linkedin.datahub.graphql.generated.ListQueriesResult; import com.linkedin.datahub.graphql.generated.QueryEntity; import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; @@ -72,7 +73,7 @@ public CompletableFuture get(final DataFetchingEnvironment en flags -> flags.setFulltext(true).setSkipHighlighting(true)), QUERY_ENTITY_NAME, query, - buildFilters(input), + buildFilters(input, context.getOperationContext().getAspectRetriever()), sortCriterion, start, count); @@ -109,7 +110,8 @@ private List mapUnresolvedQueries(final List queryUrns) { } @Nullable - private Filter buildFilters(@Nonnull final ListQueriesInput input) { + private Filter buildFilters( + @Nonnull final ListQueriesInput input, @Nullable AspectRetriever aspectRetriever) { final AndFilterInput criteria = new AndFilterInput(); List andConditions = new ArrayList<>(); @@ -136,6 +138,6 @@ private Filter buildFilters(@Nonnull final ListQueriesInput input) { } criteria.setAnd(andConditions); - return buildFilter(Collections.emptyList(), ImmutableList.of(criteria)); + return buildFilter(Collections.emptyList(), ImmutableList.of(criteria), aspectRetriever); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java index 202c78a62c9ae5..01818778643905 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java @@ -26,6 +26,7 @@ import com.linkedin.metadata.service.ViewService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; import io.opentelemetry.extension.annotations.WithSpan; import java.net.URISyntaxException; import java.util.Collections; @@ -33,6 +34,7 @@ import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; +import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -62,7 +64,7 @@ public CompletableFuture get(DataFetchingEnvironment List modules = _recommendationsService.listRecommendations( context.getOperationContext(), - mapRequestContext(input.getRequestContext()), + mapRequestContext(context.getOperationContext(), input.getRequestContext()), viewFilter(context.getOperationContext(), _viewService, input.getViewUrn()), input.getLimit()); return ListRecommendationsResult.builder() @@ -83,7 +85,7 @@ public CompletableFuture get(DataFetchingEnvironment } private com.linkedin.metadata.recommendation.RecommendationRequestContext mapRequestContext( - RecommendationRequestContext requestContext) { + @Nonnull OperationContext opContext, RecommendationRequestContext requestContext) { com.linkedin.metadata.recommendation.ScenarioType mappedScenarioType; try { mappedScenarioType = @@ -103,7 +105,9 @@ private com.linkedin.metadata.recommendation.RecommendationRequestContext mapReq searchRequestContext.setFilters( new CriterionArray( requestContext.getSearchRequestContext().getFilters().stream() - .map(facetField -> criterionFromFilter(facetField)) + .map( + facetField -> + criterionFromFilter(facetField, opContext.getAspectRetriever())) .collect(Collectors.toList()))); } mappedRequestContext.setSearchRequestContext(searchRequestContext); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java index 10a09b95bfd6ea..04a72b14eeb021 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolver.java @@ -63,7 +63,9 @@ public CompletableFuture get(DataFetchingEnvironment environme UrnUtils.getUrn(input.getViewUrn())) : null; - final Filter inputFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); + final Filter inputFilter = + ResolverUtils.buildFilter( + null, input.getOrFilters(), context.getOperationContext().getAspectRetriever()); final SearchFlags searchFlags = mapInputFlags(context, input.getSearchFlags()); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java index 13861c94ba3368..79792940ef27f7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteResolver.java @@ -3,6 +3,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; import static org.apache.commons.lang3.StringUtils.isBlank; +import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.ValidationException; import com.linkedin.datahub.graphql.generated.AutoCompleteInput; @@ -39,6 +40,7 @@ public AutoCompleteResolver(@Nonnull final List> sear @Override public CompletableFuture get(DataFetchingEnvironment environment) { + final QueryContext context = environment.getContext(); final AutoCompleteInput input = bindArgument(environment.getArgument("input"), AutoCompleteInput.class); @@ -49,7 +51,11 @@ public CompletableFuture get(DataFetchingEnvironment enviro throw new ValidationException("'query' parameter can not be null or empty"); } - final Filter filter = ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); + final Filter filter = + ResolverUtils.buildFilter( + input.getFilters(), + input.getOrFilters(), + context.getOperationContext().getRetrieverContext().orElseThrow().getAspectRetriever()); final int limit = input.getLimit() != null ? input.getLimit() : DEFAULT_LIMIT; return GraphQLConcurrencyUtils.supplyAsync( () -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java index c849e3ad3f68c7..5b5888b89b241b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/AutocompleteUtils.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.search; +import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.AutoCompleteMultipleInput; import com.linkedin.datahub.graphql.generated.AutoCompleteMultipleResults; @@ -33,6 +34,7 @@ public static CompletableFuture batchGetAutocomplet DataFetchingEnvironment environment, @Nullable DataHubViewInfo view) { final int limit = input.getLimit() != null ? input.getLimit() : DEFAULT_LIMIT; + final QueryContext context = environment.getContext(); final List> autoCompletesFuture = entities.stream() @@ -41,7 +43,10 @@ public static CompletableFuture batchGetAutocomplet GraphQLConcurrencyUtils.supplyAsync( () -> { final Filter filter = - ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); + ResolverUtils.buildFilter( + input.getFilters(), + input.getOrFilters(), + context.getOperationContext().getAspectRetriever()); final Filter finalFilter = view != null ? SearchUtils.combineFilters( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java index 77eef1b9a25c69..8b8b93353bc6e8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossEntitiesResolver.java @@ -72,7 +72,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) UrnUtils.getUrn(input.getViewUrn())) : null; - final Filter baseFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); + final Filter baseFilter = + ResolverUtils.buildFilter( + null, input.getOrFilters(), context.getOperationContext().getAspectRetriever()); final SearchFlags searchFlags; com.linkedin.datahub.graphql.generated.SearchFlags inputFlags = input.getSearchFlags(); if (inputFlags != null) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java index addd217d687241..14b2d3b8f8420c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/ScrollAcrossLineageResolver.java @@ -129,7 +129,10 @@ public CompletableFuture get(DataFetchingEnvironment entityNames, sanitizedQuery, maxHops, - ResolverUtils.buildFilter(facetFilters, input.getOrFilters()), + ResolverUtils.buildFilter( + facetFilters, + input.getOrFilters(), + context.getOperationContext().getAspectRetriever()), null, scrollId, keepAlive, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java index a9da1c40554345..287e339ddee50c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolver.java @@ -59,7 +59,10 @@ public CompletableFuture get(DataFetchingEnvironment environment) : null; final Filter baseFilter = - ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); + ResolverUtils.buildFilter( + input.getFilters(), + input.getOrFilters(), + context.getOperationContext().getAspectRetriever()); SearchFlags searchFlags = mapInputFlags(context, input.getSearchFlags()); SortCriterion sortCriterion = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java index 238f2375ee207d..f342d251acd725 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossLineageResolver.java @@ -138,7 +138,10 @@ public CompletableFuture get(DataFetchingEnvironment count); final Filter filter = - ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()); + ResolverUtils.buildFilter( + input.getFilters(), + input.getOrFilters(), + context.getOperationContext().getAspectRetriever()); final SearchFlags searchFlags; com.linkedin.datahub.graphql.generated.SearchFlags inputFlags = input.getSearchFlags(); if (inputFlags != null) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java index 35586ea29571c8..5fb2f8f14b293c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchResolver.java @@ -85,7 +85,10 @@ public CompletableFuture get(DataFetchingEnvironment environment) context.getOperationContext().withSearchFlags(flags -> searchFlags), entityName, sanitizedQuery, - ResolverUtils.buildFilter(input.getFilters(), input.getOrFilters()), + ResolverUtils.buildFilter( + input.getFilters(), + input.getOrFilters(), + context.getOperationContext().getAspectRetriever()), null, start, count)); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolver.java index 49c3467adb58bc..7c3e433dd1ede4 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolver.java @@ -49,7 +49,9 @@ public CompletableFuture get(final DataFetchingEnvironment environm DataHubViewType.valueOf(input.getViewType().toString()), input.getName(), input.getDescription(), - ViewUtils.mapDefinition(input.getDefinition()), + ViewUtils.mapDefinition( + input.getDefinition(), + context.getOperationContext().getAspectRetriever()), System.currentTimeMillis()); return createView(urn, input); } catch (Exception e) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java index 28b8fe50b70d65..952e55ca117f2d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolver.java @@ -16,6 +16,7 @@ import com.linkedin.datahub.graphql.generated.ListViewsResult; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; @@ -30,6 +31,7 @@ import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; /** Resolver used for listing global DataHub Views. */ @@ -71,7 +73,7 @@ public CompletableFuture get(final DataFetchingEnvironment envi context.getOperationContext().withSearchFlags(flags -> flags.setFulltext(true)), Constants.DATAHUB_VIEW_ENTITY_NAME, query, - buildFilters(), + buildFilters(context.getOperationContext().getAspectRetriever()), DEFAULT_SORT_CRITERION, start, count); @@ -107,7 +109,7 @@ private List mapUnresolvedViews(final List entityUrns) { return results; } - private Filter buildFilters() { + private Filter buildFilters(@Nullable AspectRetriever aspectRetriever) { final AndFilterInput globalCriteria = new AndFilterInput(); List andConditions = new ArrayList<>(); andConditions.add( @@ -118,6 +120,6 @@ private Filter buildFilters() { false, FilterOperator.EQUAL)); globalCriteria.setAnd(andConditions); - return buildFilter(Collections.emptyList(), ImmutableList.of(globalCriteria)); + return buildFilter(Collections.emptyList(), ImmutableList.of(globalCriteria), aspectRetriever); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java index 218bedcd0beffb..32eb0e46bb6160 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolver.java @@ -15,6 +15,7 @@ import com.linkedin.datahub.graphql.generated.ListViewsResult; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; @@ -74,7 +75,10 @@ public CompletableFuture get(final DataFetchingEnvironment envi context.getOperationContext().withSearchFlags(flags -> flags.setFulltext(true)), Constants.DATAHUB_VIEW_ENTITY_NAME, query, - buildFilters(viewType, context.getActorUrn()), + buildFilters( + viewType, + context.getActorUrn(), + context.getOperationContext().getAspectRetriever()), DEFAULT_SORT_CRITERION, start, count); @@ -110,7 +114,10 @@ private List mapUnresolvedViews(final List entityUrns) { return results; } - private Filter buildFilters(@Nullable final String viewType, final String creatorUrn) { + private Filter buildFilters( + @Nullable final String viewType, + final String creatorUrn, + @Nullable AspectRetriever aspectRetriever) { // And GLOBAL views for the authenticated actor. final AndFilterInput filterCriteria = new AndFilterInput(); final List andConditions = new ArrayList<>(); @@ -125,6 +132,6 @@ private Filter buildFilters(@Nullable final String viewType, final String creato filterCriteria.setAnd(andConditions); // Currently, there is no way to fetch the views belonging to another user. - return buildFilter(Collections.emptyList(), ImmutableList.of(filterCriteria)); + return buildFilter(Collections.emptyList(), ImmutableList.of(filterCriteria), aspectRetriever); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolver.java index 9ab5efe83105a0..11ec1c5705bd31 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolver.java @@ -48,7 +48,8 @@ public CompletableFuture get(final DataFetchingEnvironment environm urn, input.getName(), input.getDescription(), - ViewUtils.mapDefinition(input.getDefinition()), + ViewUtils.mapDefinition( + input.getDefinition(), context.getOperationContext().getAspectRetriever()), System.currentTimeMillis()); log.info(String.format("Successfully updated View %s with urn", urn)); return getView(context, urn, context.getAuthentication()); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtils.java index 29ceba2f1b86c3..70a5ced4bfbf10 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtils.java @@ -13,6 +13,7 @@ import com.linkedin.datahub.graphql.generated.LogicalOperator; import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.CriterionArray; @@ -25,6 +26,7 @@ import java.util.Objects; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import javax.annotation.Nullable; public class ViewUtils { @@ -92,12 +94,12 @@ public static boolean canUpdateView( */ @Nonnull public static DataHubViewDefinition mapDefinition( - @Nonnull final DataHubViewDefinitionInput input) { + @Nonnull final DataHubViewDefinitionInput input, @Nullable AspectRetriever aspectRetriever) { Objects.requireNonNull(input, "input must not be null"); final DataHubViewDefinition result = new DataHubViewDefinition(); if (input.getFilter() != null) { - result.setFilter(mapFilter(input.getFilter()), SetMode.IGNORE_NULL); + result.setFilter(mapFilter(input.getFilter(), aspectRetriever), SetMode.IGNORE_NULL); } result.setEntityTypes( new StringArray( @@ -118,17 +120,19 @@ public static DataHubViewDefinition mapDefinition( * which cannot be rendered in full by the UI. We account for this on the read path by logging a * warning and returning an empty View in such cases. */ - private static Filter mapFilter(@Nonnull DataHubViewFilterInput input) { + private static Filter mapFilter( + @Nonnull DataHubViewFilterInput input, @Nullable AspectRetriever aspectRetriever) { if (LogicalOperator.AND.equals(input.getOperator())) { // AND - return buildAndFilter(input.getFilters()); + return buildAndFilter(input.getFilters(), aspectRetriever); } else { // OR - return buildOrFilter(input.getFilters()); + return buildOrFilter(input.getFilters(), aspectRetriever); } } - private static Filter buildAndFilter(@Nonnull List input) { + private static Filter buildAndFilter( + @Nonnull List input, @Nullable AspectRetriever aspectRetriever) { final Filter result = new Filter(); result.setOr( new ConjunctiveCriterionArray( @@ -137,12 +141,13 @@ private static Filter buildAndFilter(@Nonnull List input) { .setAnd( new CriterionArray( input.stream() - .map(ResolverUtils::criterionFromFilter) + .map(f -> ResolverUtils.criterionFromFilter(f, aspectRetriever)) .collect(Collectors.toList())))))); return result; } - private static Filter buildOrFilter(@Nonnull List input) { + private static Filter buildOrFilter( + @Nonnull List input, @Nullable AspectRetriever aspectRetriever) { final Filter result = new Filter(); result.setOr( new ConjunctiveCriterionArray( @@ -152,7 +157,9 @@ private static Filter buildOrFilter(@Nonnull List input) { new ConjunctiveCriterion() .setAnd( new CriterionArray( - ImmutableList.of(ResolverUtils.criterionFromFilter(filter))))) + ImmutableList.of( + ResolverUtils.criterionFromFilter( + filter, aspectRetriever))))) .collect(Collectors.toList()))); return result; } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java index 57d85e5b204c21..f98284e92ede58 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; +import static org.mockito.Mockito.mock; import static org.testng.AssertJUnit.assertEquals; import com.google.common.collect.ImmutableList; @@ -11,6 +12,7 @@ import com.linkedin.datahub.graphql.TestUtils; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.FilterOperator; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -28,7 +30,7 @@ public class ResolverUtilsTest { @Test public void testCriterionFromFilter() throws Exception { - final DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + final DataFetchingEnvironment mockEnv = mock(DataFetchingEnvironment.class); final QueryContext mockAllowContext = TestUtils.getMockAllowContext(); Mockito.when(mockEnv.getContext()).thenReturn(mockAllowContext); @@ -40,7 +42,8 @@ public void testCriterionFromFilter() throws Exception { null, ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def"), false, - FilterOperator.EQUAL)); + FilterOperator.EQUAL), + mock(AspectRetriever.class)); assertEquals( valuesCriterion, new Criterion() @@ -53,7 +56,8 @@ public void testCriterionFromFilter() throws Exception { // this is the legacy pathway Criterion valueCriterion = criterionFromFilter( - new FacetFilterInput("tags", "urn:li:tag:abc", null, true, FilterOperator.EQUAL)); + new FacetFilterInput("tags", "urn:li:tag:abc", null, true, FilterOperator.EQUAL), + mock(AspectRetriever.class)); assertEquals( valueCriterion, new Criterion() @@ -66,7 +70,9 @@ public void testCriterionFromFilter() throws Exception { // check that both being null doesn't cause a NPE. this should never happen except via API // interaction Criterion doubleNullCriterion = - criterionFromFilter(new FacetFilterInput("tags", null, null, true, FilterOperator.EQUAL)); + criterionFromFilter( + new FacetFilterInput("tags", null, null, true, FilterOperator.EQUAL), + mock(AspectRetriever.class)); assertEquals( doubleNullCriterion, new Criterion() diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java index 845ce1e6129d8b..f6e7e7267a060e 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/assertion/AssertionRunEventResolverTest.java @@ -20,6 +20,7 @@ import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.SystemMetadata; import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -56,7 +57,7 @@ public void testGetSuccess() throws Exception { Mockito.eq(5), Mockito.eq( AssertionRunEventResolver.buildFilter( - null, AssertionRunStatus.COMPLETE.toString())))) + null, AssertionRunStatus.COMPLETE.toString(), null)))) .thenReturn( ImmutableList.of( new EnvelopedAspect() @@ -68,6 +69,9 @@ public void testGetSuccess() throws Exception { // Execute resolver QueryContext mockContext = Mockito.mock(QueryContext.class); Mockito.when(mockContext.getAuthentication()).thenReturn(Mockito.mock(Authentication.class)); + Mockito.when(mockContext.getOperationContext()) + .thenReturn(Mockito.mock(OperationContext.class)); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); Mockito.when(mockEnv.getArgumentOrDefault(Mockito.eq("status"), Mockito.eq(null))) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java index ad30e48d8361b8..6c876226a45e60 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/auth/ListAccessTokensResolverTest.java @@ -46,7 +46,7 @@ public void testGetSuccess() throws Exception { any(), Mockito.eq(Constants.ACCESS_TOKEN_ENTITY_NAME), Mockito.eq(""), - Mockito.eq(buildFilter(filters, Collections.emptyList())), + Mockito.eq(buildFilter(filters, Collections.emptyList(), null)), Mockito.any(SortCriterion.class), Mockito.eq(input.getStart()), Mockito.eq(input.getCount()))) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java index 4897d0819b59fb..9cf7e62e65e253 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; import com.google.common.collect.ImmutableList; import com.linkedin.common.AuditStamp; @@ -17,6 +18,7 @@ import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.resolvers.chart.BrowseV2Resolver; import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.browse.BrowseResultGroupV2; import com.linkedin.metadata.browse.BrowseResultGroupV2Array; import com.linkedin.metadata.browse.BrowseResultMetadata; @@ -100,7 +102,7 @@ public static void testBrowseV2SuccessWithQueryAndFilter() throws Exception { facetFilterInput.setValues(ImmutableList.of("urn:li:corpuser:test")); andFilterInput.setAnd(ImmutableList.of(facetFilterInput)); orFilters.add(andFilterInput); - Filter filter = ResolverUtils.buildFilter(null, orFilters); + Filter filter = ResolverUtils.buildFilter(null, orFilters, mock(AspectRetriever.class)); EntityClient mockClient = initMockEntityClient( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java index 9ed1d5001b75c3..70b427a1606f12 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/query/ListQueriesResolverTest.java @@ -169,6 +169,6 @@ private Filter buildFilter(@Nullable QuerySource source, @Nullable String entity FilterOperator.EQUAL)); } criteria.setAnd(andConditions); - return ResolverUtils.buildFilter(Collections.emptyList(), ImmutableList.of(criteria)); + return ResolverUtils.buildFilter(Collections.emptyList(), ImmutableList.of(criteria), null); } } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java index 443050456f3fd9..701ddd84c173e7 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.TestUtils.*; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; @@ -17,6 +18,7 @@ import com.linkedin.datahub.graphql.generated.FilterOperator; import com.linkedin.datahub.graphql.generated.LogicalOperator; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -169,7 +171,7 @@ public void testMapDefinition() throws Exception { // the keyword mapping. .setCondition(Condition.CONTAIN)))))))); - assertEquals(ViewUtils.mapDefinition(input), expectedResult); + assertEquals(ViewUtils.mapDefinition(input, mock(AspectRetriever.class)), expectedResult); } private static ViewService initViewService(DataHubViewType viewType) { diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index 9108f3009b4ba3..4b46996d306852 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -91,6 +91,27 @@ bootJar { archiveFileName = "${project.name}.jar" } +bootRun { + environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml" + environment "ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE", "true" + environment "ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX", "true" + environment "SERVER_PORT", "8083" + args += ["-u", "SystemUpdate"] +} + +/** + * Runs SystemUpdate on locally running system + */ +task run(type: Exec) { + dependsOn bootJar + group = "Execution" + description = "Run the datahub-upgrade SystemUpdate process locally." + environment "ENTITY_REGISTRY_CONFIG_PATH", "../metadata-models/src/main/resources/entity-registry.yml" + environment "ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE", "true" + environment "ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX", "true" + commandLine "java", "-jar", "-Dserver.port=8083", bootJar.getArchiveFile().get(), "-u", "SystemUpdate" +} + docker { name "${docker_registry}/${docker_repo}:v${version}" version "v${version}" diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BuildIndicesConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BuildIndicesConfig.java index 3510fa513b3b9c..e0de8a7255d61e 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BuildIndicesConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BuildIndicesConfig.java @@ -6,7 +6,6 @@ import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.graph.GraphService; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; @@ -26,8 +25,7 @@ public BlockingSystemUpgrade buildIndices( final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents baseElasticSearchComponents, final ConfigurationProvider configurationProvider, - final AspectDao aspectDao, - final EntityRegistry entityRegistry) { + final AspectDao aspectDao) { return new BuildIndices( systemMetadataService, @@ -36,7 +34,6 @@ public BlockingSystemUpgrade buildIndices( graphService, baseElasticSearchComponents, configurationProvider, - aspectDao, - entityRegistry); + aspectDao); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CleanIndicesConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CleanIndicesConfig.java index 4f54b01459625d..7559aaf3f3cdbc 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CleanIndicesConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/CleanIndicesConfig.java @@ -4,6 +4,7 @@ import com.linkedin.datahub.upgrade.system.elasticsearch.CleanIndices; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; +import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.systemmetadata.SystemMetadataService; @@ -23,7 +24,8 @@ public NonBlockingSystemUpgrade cleanIndices( final GraphService graphService, final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents baseElasticSearchComponents, - final ConfigurationProvider configurationProvider) { + final ConfigurationProvider configurationProvider, + final AspectDao aspectDao) { return new CleanIndices( systemMetadataService, @@ -31,6 +33,7 @@ public NonBlockingSystemUpgrade cleanIndices( entitySearchService, graphService, baseElasticSearchComponents, - configurationProvider); + configurationProvider, + aspectDao); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/BuildIndices.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/BuildIndices.java index fea0479876a2e9..a91bba2fa0a976 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/BuildIndices.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/BuildIndices.java @@ -1,5 +1,13 @@ package com.linkedin.datahub.upgrade.system.elasticsearch; +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; + +import com.datahub.util.RecordUtils; +import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.system.BlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.elasticsearch.steps.BuildIndicesPostStep; @@ -8,14 +16,17 @@ import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.graph.GraphService; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.util.ArrayList; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -31,8 +42,7 @@ public BuildIndices( final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents baseElasticSearchComponents, final ConfigurationProvider configurationProvider, - final AspectDao aspectDao, - final EntityRegistry entityRegistry) { + final AspectDao aspectDao) { List indexedServices = Stream.of(graphService, entitySearchService, systemMetadataService, timeseriesAspectService) @@ -41,12 +51,7 @@ public BuildIndices( .collect(Collectors.toList()); _steps = - buildSteps( - indexedServices, - baseElasticSearchComponents, - configurationProvider, - aspectDao, - entityRegistry); + buildSteps(indexedServices, baseElasticSearchComponents, configurationProvider, aspectDao); } @Override @@ -64,8 +69,14 @@ private List buildSteps( final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents baseElasticSearchComponents, final ConfigurationProvider configurationProvider, - final AspectDao aspectDao, - final EntityRegistry entityRegistry) { + final AspectDao aspectDao) { + + final Set> structuredProperties; + if (configurationProvider.getStructuredProperties().isSystemUpdateEnabled()) { + structuredProperties = getActiveStructuredPropertiesDefinitions(aspectDao); + } else { + structuredProperties = Set.of(); + } final List steps = new ArrayList<>(); // Disable ES write mode/change refresh rate and clone indices @@ -74,13 +85,47 @@ private List buildSteps( baseElasticSearchComponents, indexedServices, configurationProvider, - aspectDao, - entityRegistry)); + structuredProperties)); // Configure graphService, entitySearchService, systemMetadataService, timeseriesAspectService - steps.add(new BuildIndicesStep(indexedServices)); + steps.add(new BuildIndicesStep(indexedServices, structuredProperties)); // Reset configuration (and delete clones? Or just do this regularly? Or delete clone in // pre-configure step if it already exists? - steps.add(new BuildIndicesPostStep(baseElasticSearchComponents, indexedServices)); + steps.add( + new BuildIndicesPostStep( + baseElasticSearchComponents, indexedServices, structuredProperties)); return steps; } + + static Set> getActiveStructuredPropertiesDefinitions( + AspectDao aspectDao) { + Set removedStructuredPropertyUrns; + try (Stream stream = + aspectDao.streamAspects(STRUCTURED_PROPERTY_ENTITY_NAME, STATUS_ASPECT_NAME)) { + removedStructuredPropertyUrns = + stream + .map( + entityAspect -> + Pair.of( + entityAspect.getUrn(), + RecordUtils.toRecordTemplate(Status.class, entityAspect.getMetadata()))) + .filter(status -> status.getSecond().isRemoved()) + .map(Pair::getFirst) + .collect(Collectors.toSet()); + } + + try (Stream stream = + aspectDao.streamAspects( + STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) { + return stream + .map( + entityAspect -> + Pair.of( + UrnUtils.getUrn(entityAspect.getUrn()), + RecordUtils.toRecordTemplate( + StructuredPropertyDefinition.class, entityAspect.getMetadata()))) + .filter( + definition -> !removedStructuredPropertyUrns.contains(definition.getKey().toString())) + .collect(Collectors.toSet()); + } + } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/CleanIndices.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/CleanIndices.java index e316481e2b07e6..96aea906b021e4 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/CleanIndices.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/CleanIndices.java @@ -1,16 +1,23 @@ package com.linkedin.datahub.upgrade.system.elasticsearch; +import static com.linkedin.datahub.upgrade.system.elasticsearch.BuildIndices.getActiveStructuredPropertiesDefinitions; + +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.datahub.upgrade.system.elasticsearch.steps.CleanIndicesStep; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; +import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; import lombok.extern.slf4j.Slf4j; @@ -26,7 +33,15 @@ public CleanIndices( final GraphService graphService, final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents baseElasticSearchComponents, - final ConfigurationProvider configurationProvider) { + final ConfigurationProvider configurationProvider, + final AspectDao aspectDao) { + + final Set> structuredProperties; + if (configurationProvider.getStructuredProperties().isSystemUpdateEnabled()) { + structuredProperties = getActiveStructuredPropertiesDefinitions(aspectDao); + } else { + structuredProperties = Set.of(); + } List indexedServices = Stream.of(graphService, entitySearchService, systemMetadataService, timeseriesAspectService) @@ -39,7 +54,8 @@ public CleanIndices( new CleanIndicesStep( baseElasticSearchComponents.getSearchClient(), configurationProvider.getElasticSearch(), - indexedServices)); + indexedServices, + structuredProperties)); } @Override diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java index a44f6d6487067d..09f65c84480279 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java @@ -4,6 +4,7 @@ import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.getAllReindexConfigs; import com.google.common.collect.ImmutableMap; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; @@ -12,8 +13,11 @@ import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; import com.linkedin.metadata.shared.ElasticSearchIndexed; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; @@ -25,8 +29,9 @@ @Slf4j public class BuildIndicesPostStep implements UpgradeStep { - private final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents _esComponents; - private final List _services; + private final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents esComponents; + private final List services; + private final Set> structuredProperties; @Override public String id() { @@ -44,7 +49,7 @@ public Function executable() { try { List indexConfigs = - getAllReindexConfigs(_services).stream() + getAllReindexConfigs(services, structuredProperties).stream() .filter(ReindexConfig::requiresReindex) .collect(Collectors.toList()); @@ -55,7 +60,7 @@ public Function executable() { request.settings(indexSettings); boolean ack = - _esComponents + esComponents .getSearchClient() .indices() .putSettings(request, RequestOptions.DEFAULT) @@ -69,7 +74,7 @@ public Function executable() { if (ack) { ack = IndexUtils.validateWriteBlock( - _esComponents.getSearchClient(), indexConfig.name(), false); + esComponents.getSearchClient(), indexConfig.name(), false); log.info( "Validated index {} with new settings. Settings: {}, Acknowledged: {}", indexConfig.name(), diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java index c3c9981b1dd7e4..983e7f0c97f38b 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java @@ -2,13 +2,9 @@ import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.INDEX_BLOCKS_WRITE_SETTING; import static com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils.getAllReindexConfigs; -import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; -import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableMap; -import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; @@ -16,9 +12,6 @@ import com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; -import com.linkedin.metadata.entity.AspectDao; -import com.linkedin.metadata.entity.EntityAspect; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.structured.StructuredPropertyDefinition; @@ -29,7 +22,6 @@ import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; -import java.util.stream.Stream; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.opensearch.OpenSearchStatusException; @@ -40,11 +32,10 @@ @RequiredArgsConstructor @Slf4j public class BuildIndicesPreStep implements UpgradeStep { - private final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents _esComponents; - private final List _services; - private final ConfigurationProvider _configurationProvider; - private final AspectDao _aspectDao; - private final EntityRegistry _entityRegistry; + private final BaseElasticSearchComponentsFactory.BaseElasticSearchComponents esComponents; + private final List services; + private final ConfigurationProvider configurationProvider; + private final Set> structuredProperties; @Override public String id() { @@ -60,13 +51,8 @@ public int retryCount() { public Function executable() { return (context) -> { try { - final List reindexConfigs; - if (_configurationProvider.getStructuredProperties().isSystemUpdateEnabled()) { - reindexConfigs = - getAllReindexConfigs(_services, getActiveStructuredPropertiesDefinitions(_aspectDao)); - } else { - reindexConfigs = getAllReindexConfigs(_services); - } + final List reindexConfigs = + getAllReindexConfigs(services, structuredProperties); // Get indices to update List indexConfigs = @@ -76,7 +62,7 @@ public Function executable() { for (ReindexConfig indexConfig : indexConfigs) { String indexName = - IndexUtils.resolveAlias(_esComponents.getSearchClient(), indexConfig.name()); + IndexUtils.resolveAlias(esComponents.getSearchClient(), indexConfig.name()); boolean ack = blockWrites(indexName); if (!ack) { @@ -87,11 +73,11 @@ public Function executable() { } // Clone indices - if (_configurationProvider.getElasticSearch().getBuildIndices().isCloneIndices()) { + if (configurationProvider.getElasticSearch().getBuildIndices().isCloneIndices()) { String clonedName = indexConfig.name() + "_clone_" + System.currentTimeMillis(); ResizeRequest resizeRequest = new ResizeRequest(clonedName, indexName); boolean cloneAck = - _esComponents + esComponents .getSearchClient() .indices() .clone(resizeRequest, RequestOptions.DEFAULT) @@ -121,7 +107,7 @@ private boolean blockWrites(String indexName) throws InterruptedException, IOExc boolean ack; try { ack = - _esComponents + esComponents .getSearchClient() .indices() .putSettings(request, RequestOptions.DEFAULT) @@ -145,7 +131,7 @@ private boolean blockWrites(String indexName) throws InterruptedException, IOExc } if (ack) { - ack = IndexUtils.validateWriteBlock(_esComponents.getSearchClient(), indexName, true); + ack = IndexUtils.validateWriteBlock(esComponents.getSearchClient(), indexName, true); log.info( "Validated index {} with new settings. Settings: {}, Acknowledged: {}", indexName, @@ -155,37 +141,4 @@ private boolean blockWrites(String indexName) throws InterruptedException, IOExc return ack; } - - private static Set getActiveStructuredPropertiesDefinitions( - AspectDao aspectDao) { - Set removedStructuredPropertyUrns; - try (Stream stream = - aspectDao.streamAspects(STRUCTURED_PROPERTY_ENTITY_NAME, STATUS_ASPECT_NAME)) { - removedStructuredPropertyUrns = - stream - .map( - entityAspect -> - Pair.of( - entityAspect.getUrn(), - RecordUtils.toRecordTemplate(Status.class, entityAspect.getMetadata()))) - .filter(status -> status.getSecond().isRemoved()) - .map(Pair::getFirst) - .collect(Collectors.toSet()); - } - - try (Stream stream = - aspectDao.streamAspects( - STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) { - return stream - .map( - entityAspect -> - Pair.of( - entityAspect.getUrn(), - RecordUtils.toRecordTemplate( - StructuredPropertyDefinition.class, entityAspect.getMetadata()))) - .filter(definition -> !removedStructuredPropertyUrns.contains(definition.getKey())) - .map(Pair::getSecond) - .collect(Collectors.toSet()); - } - } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesStep.java index d37ee173bd9af8..5cf370162a3125 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesStep.java @@ -1,11 +1,15 @@ package com.linkedin.datahub.upgrade.system.elasticsearch.steps; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; import com.linkedin.metadata.shared.ElasticSearchIndexed; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.util.List; +import java.util.Set; import java.util.function.Function; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -14,7 +18,8 @@ @RequiredArgsConstructor public class BuildIndicesStep implements UpgradeStep { - private final List _services; + private final List services; + private final Set> structuredProperties; @Override public String id() { @@ -30,8 +35,8 @@ public int retryCount() { public Function executable() { return (context) -> { try { - for (ElasticSearchIndexed service : _services) { - service.reindexAll(); + for (ElasticSearchIndexed service : services) { + service.reindexAll(structuredProperties); } } catch (Exception e) { log.error("BuildIndicesStep failed.", e); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java index c3a4d8ab89c070..fd5592c4ead25e 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.upgrade.system.elasticsearch.steps; +import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; @@ -8,7 +9,10 @@ import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.shared.ElasticSearchIndexed; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.util.List; +import java.util.Set; import java.util.function.Function; import lombok.extern.slf4j.Slf4j; import org.opensearch.client.RestHighLevelClient; @@ -18,14 +22,17 @@ public class CleanIndicesStep implements UpgradeStep { private final RestHighLevelClient searchClient; private final ElasticSearchConfiguration esConfig; private final List indexedServices; + private final Set> structuredProperties; public CleanIndicesStep( final RestHighLevelClient searchClient, final ElasticSearchConfiguration esConfig, - final List indexedServices) { + final List indexedServices, + final Set> structuredProperties) { this.searchClient = searchClient; this.esConfig = esConfig; this.indexedServices = indexedServices; + this.structuredProperties = structuredProperties; } @Override @@ -42,7 +49,7 @@ public int retryCount() { public Function executable() { return (context) -> { try { - IndexUtils.getAllReindexConfigs(indexedServices) + IndexUtils.getAllReindexConfigs(indexedServices, structuredProperties) .forEach( reindexConfig -> ESIndexBuilder.cleanIndex(searchClient, esConfig, reindexConfig)); } catch (Exception e) { diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java index 52b34200991c35..99d72776ff788b 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/util/IndexUtils.java @@ -1,8 +1,10 @@ package com.linkedin.datahub.upgrade.system.elasticsearch.util; +import com.linkedin.common.urn.Urn; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -27,30 +29,15 @@ private IndexUtils() {} private static List _reindexConfigs = new ArrayList<>(); - public static List getAllReindexConfigs( - List elasticSearchIndexedList) throws IOException { - // Avoid locking & reprocessing - List reindexConfigs = new ArrayList<>(_reindexConfigs); - if (reindexConfigs.isEmpty()) { - for (ElasticSearchIndexed elasticSearchIndexed : elasticSearchIndexedList) { - reindexConfigs.addAll(elasticSearchIndexed.buildReindexConfigs()); - } - _reindexConfigs = new ArrayList<>(reindexConfigs); - } - - return reindexConfigs; - } - public static List getAllReindexConfigs( List elasticSearchIndexedList, - Collection structuredProperties) + Collection> structuredProperties) throws IOException { // Avoid locking & reprocessing List reindexConfigs = new ArrayList<>(_reindexConfigs); if (reindexConfigs.isEmpty()) { for (ElasticSearchIndexed elasticSearchIndexed : elasticSearchIndexedList) { - reindexConfigs.addAll( - elasticSearchIndexed.buildReindexConfigsWithAllStructProps(structuredProperties)); + reindexConfigs.addAll(elasticSearchIndexed.buildReindexConfigs(structuredProperties)); } _reindexConfigs = new ArrayList<>(reindexConfigs); } diff --git a/docs/api/tutorials/structured-properties.md b/docs/api/tutorials/structured-properties.md index b4363141f630bb..4f830f25a8ec3b 100644 --- a/docs/api/tutorials/structured-properties.md +++ b/docs/api/tutorials/structured-properties.md @@ -16,9 +16,11 @@ Learn more about structured properties in the [Structured Properties Feature Gui This guide will show you how to execute the following actions with structured properties. - Create structured properties - Read structured properties -- Delete structured properties (soft delete) +- Delete structured properties - Add structured properties to a dataset - Patch structured properties (add / remove / update a single property) +- Update structured property with breaking schema changes +- Search using structured properties ## Prerequisites @@ -87,9 +89,9 @@ datahub properties upsert -f {properties_yaml} If successful, you should see `Created structured property urn:li:structuredProperty:...` - + -```commandline +```shell curl -X 'POST' -v \ 'http://localhost:8080/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition' \ -H 'accept: application/json' \ @@ -120,6 +122,83 @@ curl -X 'POST' -v \ ] }' | jq ``` + + + + +```shell +curl -X 'POST' -v \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "qualifiedName": "io.acryl.privacy.retentionTime", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "displayName": "Retention Time", + "cardinality": "MULTIPLE", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "allowedValues": [ + { + "value": {"double": 30}, + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii" + }, + { + "value": {"double": 60}, + "description": "Use this for datasets that drive monthly reporting but contain pii" + }, + { + "value": {"double": 365}, + "description": "Use this for non-sensitive data that can be retained for longer" + } + ] +}' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "propertyDefinition": { + "value": { + "allowedValues": [ + { + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii", + "value": { + "double": 30 + } + }, + { + "description": "Use this for datasets that drive monthly reporting but contain pii", + "value": { + "double": 60 + } + }, + { + "description": "Use this for non-sensitive data that can be retained for longer", + "value": { + "double": 365 + } + } + ], + "displayName": "Retention Time", + "qualifiedName": "io.acryl.privacy.retentionTime", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "cardinality": "MULTIPLE" + } + } +} +``` + @@ -137,7 +216,7 @@ datahub properties get --urn {urn} For example, you can run `datahub properties get --urn urn:li:structuredProperty:io.acryl.privacy.retentionTime`. If successful, you should see metadata about your properties returned. -```commandline +```json { "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", "qualified_name": "io.acryl.privacy.retentionTime", @@ -167,7 +246,8 @@ If successful, you should see metadata about your properties returned. ``` - + + Example Request: ``` @@ -178,7 +258,7 @@ curl -X 'GET' -v \ Example Response: -```commandline +```json { "value": { "allowedValues": [ @@ -214,6 +294,58 @@ Example Response: } ``` + + + + +Example Request: +``` +curl -X 'GET' -v \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition' \ + -H 'accept: application/json' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "propertyDefinition": { + "value": { + "allowedValues": [ + { + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii", + "value": { + "double": 30 + } + }, + { + "description": "Use this for datasets that drive monthly reporting but contain pii", + "value": { + "double": 60 + } + }, + { + "description": "Use this for non-sensitive data that can be retained for longer", + "value": { + "double": 365 + } + } + ], + "displayName": "Retention Time", + "qualifiedName": "io.acryl.privacy.retentionTime", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "cardinality": "MULTIPLE" + } + } +} +``` + @@ -250,12 +382,13 @@ If successful, you should see `Update succeeded for urn:li:dataset:...` - + + Following command will set structured properties `retentionTime` as `60.0` to a dataset `urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)`. Please note that the structured property and the dataset must exist before executing this command. (You can create sample datasets using the `datahub docker ingest-sample-data`) -```commandline +```shell curl -X 'POST' -v \ 'http://localhost:8080/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ -H 'accept: application/json' \ @@ -272,6 +405,50 @@ curl -X 'POST' -v \ }' | jq ``` + + + + +Following command will set structured properties `retentionTime` as `60.0` to a dataset `urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)`. +Please note that the structured property and the dataset must exist before executing this command. (You can create sample datasets using the `datahub docker ingest-sample-data`) + +```shell +curl -X 'POST' -v \ + 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + {"double": 60.0} + ] + } + ] +}' | jq +``` +Example Response: + +```json +{ + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "double": 60 + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime" + } + ] + } + } +} +``` @@ -299,11 +476,11 @@ For this example, we'll extend create a second structured property and apply bot After this your system should include both `io.acryl.privacy.retentionTime` and `io.acryl.privacy.retentionTime02`. - + Let's start by creating the second structured property. -``` +```shell curl -X 'POST' -v \ 'http://localhost:8080/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime02/propertyDefinition' \ -H 'accept: application/json' \ @@ -331,10 +508,10 @@ curl -X 'POST' -v \ ``` This command will attach one of each of the two properties to our test dataset `urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)` -Specically, this will set `io.acryl.privacy.retentionTime` as `60.0` and `io.acryl.privacy.retentionTime02` as `bar2`. +Specifically, this will set `io.acryl.privacy.retentionTime` as `60.0` and `io.acryl.privacy.retentionTime02` as `bar2`. -``` +```shell curl -X 'POST' -v \ 'http://localhost:8080/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ -H 'accept: application/json' \ @@ -357,6 +534,128 @@ curl -X 'POST' -v \ }' | jq ``` + + + + +Let's start by creating the second structured property. + +```shell +curl -X 'POST' -v \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime02/propertyDefinition' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "qualifiedName": "io.acryl.privacy.retentionTime02", + "displayName": "Retention Time 02", + "valueType": "urn:li:dataType:datahub.string", + "allowedValues": [ + { + "value": {"string": "foo2"}, + "description": "test foo2 value" + }, + { + "value": {"string": "bar2"}, + "description": "test bar2 value" + } + ], + "cardinality": "SINGLE", + "entityTypes": [ + "urn:li:entityType:datahub.dataset" + ] +}' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02", + "propertyDefinition": { + "value": { + "allowedValues": [ + { + "value": { + "string": "foo2" + }, + "description": "test foo2 value" + }, + { + "value": { + "string": "bar2" + }, + "description": "test bar2 value" + } + ], + "entityTypes": [ + "urn:li:entityType:datahub.dataset" + ], + "qualifiedName": "io.acryl.privacy.retentionTime02", + "displayName": "Retention Time 02", + "cardinality": "SINGLE", + "valueType": "urn:li:dataType:datahub.string" + } + } +} +``` + +This command will attach one of each of the two properties to our test dataset `urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)` +Specically, this will set `io.acryl.privacy.retentionTime` as `60.0` and `io.acryl.privacy.retentionTime02` as `bar2`. + + +```shell +curl -X 'POST' -v \ + 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + {"double": 60.0} + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02", + "values": [ + {"string": "bar2"} + ] + } + ] +}' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "double": 60 + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime" + }, + { + "values": [ + { + "string": "bar2" + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02" + } + ] + } + } +} +``` + @@ -375,9 +674,9 @@ The expected state of our test dataset include 2 structured properties. We'd like to remove the first one (`io.acryl.privacy.retentionTime`) and preserve the second property. (`io.acryl.privacy.retentionTime02`). - + -``` +```shell curl -X 'PATCH' -v \ 'http://localhost:8080/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ -H 'accept: application/json' \ @@ -398,7 +697,7 @@ curl -X 'PATCH' -v \ ``` The response will show that the expected property has been removed. -``` +```json { "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", "aspects": { @@ -420,6 +719,51 @@ The response will show that the expected property has been removed. } ``` + + + +```shell +curl -X 'PATCH' -v \ + 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json-patch+json' \ + -d '{ + "patch": [ + { + "op": "remove", + "path": "/properties/urn:li:structuredProperty:io.acryl.privacy.retentionTime" + } + ], + "arrayPrimaryKeys": { + "properties": [ + "propertyUrn" + ] + } + }' | jq +``` +The response will show that the expected property has been removed. + +```json +{ + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "string": "bar2" + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02" + } + ] + } + } +} +``` + + #### Expected Outcomes @@ -436,9 +780,9 @@ You can see that the first property has been removed and the second property is In this example, we'll add the property back with a different value, preserving the existing property. - + -``` +```shell curl -X 'PATCH' -v \ 'http://localhost:8080/openapi/v2/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ -H 'accept: application/json' \ @@ -466,8 +810,9 @@ curl -X 'PATCH' -v \ }' | jq ``` -Below is the expected response: -``` +Example Response: + +```json { "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", "aspects": { @@ -497,26 +842,91 @@ Below is the expected response: } ``` -The response shows that the property was re-added with the new value bar instead of the previous value foo. +The response shows that the property was re-added with the new value 365.0 instead of the previous value 60.0. - -#### Expected Outcomes -You can see that the first property has been added back with a new value and the second property is still present. + -

- -

+```shell +curl -X 'PATCH' -v \ + 'http://localhost:8080/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Ahive%2CSampleHiveDataset%2CPROD%29/structuredProperties' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json-patch+json' \ + -d '{ + "patch": [ + { + "op": "add", + "path": "/properties/urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "value": { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + { + "double": 365.0 + } + ] + } + } + ], + "arrayPrimaryKeys": { + "properties": [ + "propertyUrn" + ] + } + }' | jq +``` +Example Response: + +```json +{ + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "structuredProperties": { + "value": { + "properties": [ + { + "values": [ + { + "string": "bar2" + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02" + }, + { + "values": [ + { + "double": 365 + } + ], + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime" + } + ] + } + } +} +``` + +The response shows that the property was re-added with the new value 365 instead of the previous value 60. + +
+ + + +#### Expected Outcomes +You can see that the first property has been added back with a new value and the second property is still present. + +

+ +

## Delete Structured Properties -There are two types of deletion present in DataHub: hard and soft delete. As of the current release only the soft delete is supported for Structured Properties. +There are two types of deletion present in DataHub: hard and soft delete. :::note SOFT DELETE -A soft deleted Structured Property does not remove any underlying data on the Structured Property entity or the Structured Property's values written to other entities. The soft delete is 100% reversible with zero data loss. When a Structured Property is soft deleted, a few operations are not available. +A soft deleted Structured Property does not remove any underlying data on the Structured Property entity or the Structured Property's values written to other entities. +The soft delete is 100% reversible with zero data loss. When a Structured Property is soft deleted, a few operations are not available. Structured Property Soft Delete Effects: @@ -526,6 +936,19 @@ Structured Property Soft Delete Effects: - Search filters using a soft deleted Structured Property will be denied ::: +:::note HARD DELETE +A hard deleted Structured Property REMOVES all underlying data for the Structured Property entity and the Structured Property's values written to other entities. +The hard delete is NOT reversible. + +Structured Property Hard Delete Effects: + +- Structured Property entity is removed +- Structured Property values are removed via PATCH MCPs on their respective entities +- Rollback is not possible +- Elasticsearch index mappings will continue to contain references to the hard deleted property until reindex +::: + +### Soft Delete @@ -537,11 +960,11 @@ datahub delete --urn {urn} ``` - + The following command will soft delete the test property by writing to the status aspect. -``` +```shell curl -X 'POST' \ 'http://localhost:8080/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false' \ -H 'accept: application/json' \ @@ -553,7 +976,7 @@ curl -X 'POST' \ If you want to **remove the soft delete**, you can do so by either hard deleting the status aspect or changing the removed boolean to `false` like below. -``` +```shell curl -X 'POST' \ 'http://localhost:8080/openapi/v2/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false' \ -H 'accept: application/json' \ @@ -563,5 +986,544 @@ curl -X 'POST' \ }' | jq ``` + + + + +The following command will soft delete the test property by writing to the status aspect. + +```shell +curl -X 'POST' \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ +"removed": true +}' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "status": { + "value": { + "removed": true + } + } +} +``` + +If you want to **remove the soft delete**, you can do so by either hard deleting the status aspect or changing the removed boolean to `false` like below. + +```shell +curl -X 'POST' \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/status?systemMetadata=false' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ +"removed": false +}' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "status": { + "value": { + "removed": false + } + } +} +``` + + + + + +### Hard Delete + + + + +The following command will hard delete the test property. + +```commandline +datahub delete --urn {urn} --hard +``` + + + + + +The following command will hard delete the test property. + +```shell +curl -v -X 'DELETE' \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime' +``` + +Example Response: + +```text +> DELETE /openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime HTTP/1.1 +> Host: localhost:8080 +> User-Agent: curl/8.4.0 +> Accept: */* +> +< HTTP/1.1 200 OK +< Date: Fri, 14 Jun 2024 17:30:27 GMT +< Content-Length: 0 +< Server: Jetty(11.0.19) +``` + + + + +#### Index Mappings Cleanup + +After the asynchronous delete of all Structured Property values have been processed, triggered by the above +hard delete, it is possible to remove the remaining index mappings. Note that if even 1 Structured Property value remains +the mapping will not be removed for a given entity index. + +Run the DataHub system-update job (automatically run with every helm upgrade or install and quickstart) with +the following environment variables enabled. + +This will trigger an ES index which will take time to complete. During the process the entire index is recreated. + +```shell +ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true +ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE=true +``` + +## Update Structured Property With Breaking Schema Changes + +This section will demonstrate how to make backwards incompatible schema changes. Making backwards incompatible +schema changes will remove previously written data. + +Breaking schema changes are implemented by setting a version string within the Structured Property definition. This +version must be in the following format: `yyyyMMddhhmmss`, i.e. `20240614080000` + +:::IMPORTANT NOTES +Old values will not be retrieve-able after the new Structured Property definition is applied. + +The old values will be subject to deletion asynchronously (future work). +::: + +In the following example, we'll revisit the `retentionTime` structured property and apply a breaking change +by changing the cardinality from `MULTIPLE` to `SINGLE`. Normally this change would be rejected as a +backwards incompatible change since values that were previously written may have multiple values written +which would no longer be valid. + + + + +Edit the previously created definition yaml: Change the cardinality to `SINGLE` and add a `version`. + +```yaml +- id: io.acryl.privacy.retentionTime + # - urn: urn:li:structuredProperty:io.acryl.privacy.retentionTime # optional if id is provided + qualified_name: io.acryl.privacy.retentionTime # required if urn is provided + type: number + cardinality: SINGLE + version: '20240614080000' + display_name: Retention Time + entity_types: + - dataset # or urn:li:entityType:datahub.dataset + - dataFlow + description: "Retention Time is used to figure out how long to retain records in a dataset" + allowed_values: + - value: 30 + description: 30 days, usually reserved for datasets that are ephemeral and contain pii + - value: 90 + description: Use this for datasets that drive monthly reporting but contain pii + - value: 365 + description: Use this for non-sensitive data that can be retained for longer +``` + +Use the CLI to create your properties: +```commandline +datahub properties upsert -f {properties_yaml} +``` + +If successful, you should see `Created structured property urn:li:structuredProperty:...` + + + + +Change the cardinality to `SINGLE` and add a `version`. + +```shell +curl -X 'POST' -v \ + 'http://localhost:8080/openapi/v3/entity/structuredProperty/urn%3Ali%3AstructuredProperty%3Aio.acryl.privacy.retentionTime/propertyDefinition' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "qualifiedName": "io.acryl.privacy.retentionTime", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "displayName": "Retention Time", + "cardinality": "SINGLE", + "version": "20240614080000", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "allowedValues": [ + { + "value": {"double": 30}, + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii" + }, + { + "value": {"double": 60}, + "description": "Use this for datasets that drive monthly reporting but contain pii" + }, + { + "value": {"double": 365}, + "description": "Use this for non-sensitive data that can be retained for longer" + } + ] +}' | jq +``` + +Example Response: + +```json +{ + "urn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "propertyDefinition": { + "value": { + "allowedValues": [ + { + "description": "30 days, usually reserved for datasets that are ephemeral and contain pii", + "value": { + "double": 30 + } + }, + { + "description": "Use this for datasets that drive monthly reporting but contain pii", + "value": { + "double": 60 + } + }, + { + "description": "Use this for non-sensitive data that can be retained for longer", + "value": { + "double": 365 + } + } + ], + "displayName": "Retention Time", + "qualifiedName": "io.acryl.privacy.retentionTime", + "valueType": "urn:li:dataType:datahub.number", + "description": "Retention Time is used to figure out how long to retain records in a dataset", + "entityTypes": [ + "urn:li:entityType:datahub.dataset", + "urn:li:entityType:datahub.dataFlow" + ], + "version": "20240614080000", + "cardinality": "SINGLE" + } + } +} +``` + + +## Structured Properties & Search + +Currently Structured Properties can be used to filter search results. This currently excludes fulltext search. + +The following examples re-use the two previously defined Structured Properties. + +`io.acryl.privacy.retentionTime` - An example numeric property. + +`io.acryl.privacy.retentionTime02` - An example string property. + + + + +Range Query: + +Document should be returned based on the previously assigned value of 60. + +```graphql +query { + scrollAcrossEntities( + input: { + types: DATASET, + count: 10, + query: "*", + orFilters: { + and: [ + { + field: "structuredProperties.io.acryl.privacy.retentionTime", + condition: GREATER_THAN, + values: [ + "45.0" + ] + } + ] + } + } + ) { + searchResults { + entity { + urn, + type + } + } + } +} +``` + +Exists Query: + +Document should be returned based on the previously assigned value. + +```graphql +query { + scrollAcrossEntities( + input: { + types: DATASET, + count: 10, + query: "*", + orFilters: { + and: [ + { + field: "structuredProperties.io.acryl.privacy.retentionTime", + condition: EXISTS + } + ] + } + } + ) { + searchResults { + entity { + urn, + type + } + } + } +} +``` + +Equality Query: + +Document should be returned based on the previously assigned value of 'bar2'. + +```graphql +query { + scrollAcrossEntities( + input: { + types: DATASET, + count: 10, + query: "*", + orFilters: { + and: [ + { + field: "structuredProperties.io.acryl.privacy.retentionTime02", + condition: EQUAL + values: [ + "bar2" + ] + } + ] + } + } + ) { + searchResults { + entity { + urn, + type + } + } + } +} +``` + + + + + +Unlike GraphQL which has a parsed input object for filtering, OpenAPI only includes a structured query which +relies on the `query_string` syntax. See the Elasticsearch [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-query-string-query.html) for detailed syntax. + +In order to use the `query_string` syntax we'll need to know a bit about the Structured Property's definition such +as whether it is versioned or un-unversioned and its type. This information will be added to the `query` url parameter. + +Un-versioned Example: + +Structured Property URN - `urn:li:structuredProperty:io.acryl.privacy.retentionTime` + +Elasticsearch Field Name - `structuredProperties.io_acryl_privacy_retentionTime` + +Versioned: + +Structured Property Version - `20240614080000` + +Structured Property Type - `string` + +Structured Property URN - `urn:li:structuredProperty:io.acryl.privacy.retentionTime02` + +Elasticsearch Field Name - `structuredProperties._versioned.io_acryl_privacy_retentionTime02.20240614080000.string` + +Range Query: + +query - `structuredProperties.io_acryl_privacy_retentionTime:>45` + +```shell +curl -X 'GET' \ + 'http://localhost:9002/openapi/v3/entity/dataset?systemMetadata=false&aspects=datasetKey&aspects=structuredProperties&count=10&sort=urn&sortOrder=ASCENDING&query=structuredProperties.io_acryl_privacy_retentionTime%3A%3E45' \ + -H 'accept: application/json' +``` + +Example Response: + +```json +{ + "entities": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "datasetKey": { + "value": { + "name": "SampleHiveDataset", + "platform": "urn:li:dataPlatform:hive", + "origin": "PROD" + } + }, + "structuredProperties": { + "value": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + { + "double": 60 + } + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02", + "values": [ + { + "string": "bar2" + } + ] + } + ] + } + } + } + ] +} +``` + +Exists Query: + +query - `_exists_:structuredProperties.io_acryl_privacy_retentionTime` + +```shell +curl -X 'GET' \ + 'http://localhost:9002/openapi/v3/entity/dataset?systemMetadata=false&aspects=datasetKey&aspects=structuredProperties&count=10&sort=urn&sortOrder=ASCENDING&query=_exists_%3AstructuredProperties.io_acryl_privacy_retentionTime' \ + -H 'accept: application/json' +``` + +Example Response: + +```json +{ + "entities": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "datasetKey": { + "value": { + "name": "SampleHiveDataset", + "platform": "urn:li:dataPlatform:hive", + "origin": "PROD" + } + }, + "structuredProperties": { + "value": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + { + "double": 60 + } + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02", + "values": [ + { + "string": "bar2" + } + ] + } + ] + } + } + } + ] +} +``` + +Equality Query: + +query - `structuredProperties._versioned.io_acryl_privacy_retentionTime02.20240614080000.string` + +```shell +curl -X 'GET' \ + 'http://localhost:9002/openapi/v3/entity/dataset?systemMetadata=false&aspects=datasetKey&aspects=structuredProperties&count=10&sort=urn&sortOrder=ASCENDING&query=structuredProperties._versioned.io_acryl_privacy_retentionTime02.20240614080000.string' \ + -H 'accept: application/json' +``` + +Example Response: + +```json +{ + "entities": [ + { + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + "datasetKey": { + "value": { + "name": "SampleHiveDataset", + "platform": "urn:li:dataPlatform:hive", + "origin": "PROD" + } + }, + "structuredProperties": { + "value": { + "properties": [ + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime", + "values": [ + { + "double": 60 + } + ] + }, + { + "propertyUrn": "urn:li:structuredProperty:io.acryl.privacy.retentionTime02", + "values": [ + { + "string": "bar2" + } + ] + } + ] + } + } + } + ] +} +``` + + + \ No newline at end of file diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md index e2354e398ecb97..3314d2db1f467f 100644 --- a/docs/deploy/environment-vars.md +++ b/docs/deploy/environment-vars.md @@ -45,23 +45,25 @@ DataHub works. ## Search -| Variable | Default | Unit/Type | Components | Description | -|-----------------------------------------------------|------------------------|-----------|-----------------------------------------------------------------|--------------------------------------------------------------------------| -| `INDEX_PREFIX` | `` | string | [`GMS`, `MAE Consumer`, `Elasticsearch Setup`, `System Update`] | Prefix Elasticsearch indices with the given string. | -| `ELASTICSEARCH_NUM_SHARDS_PER_INDEX` | 1 | integer | [`System Update`] | Default number of shards per Elasticsearch index. | -| `ELASTICSEARCH_NUM_REPLICAS_PER_INDEX` | 1 | integer | [`System Update`] | Default number of replica per Elasticsearch index. | -| `ELASTICSEARCH_BUILD_INDICES_RETENTION_VALUE` | 60 | integer | [`System Update`] | Number of units for the retention of Elasticsearch clone/backup indices. | -| `ELASTICSEARCH_BUILD_INDICES_RETENTION_UNIT` | DAYS | string | [`System Update`] | Unit for the retention of Elasticsearch clone/backup indices. | -| `ELASTICSEARCH_QUERY_EXACT_MATCH_EXCLUSIVE` | `false` | boolean | [`GMS`] | Only return exact matches when using quotes. | -| `ELASTICSEARCH_QUERY_EXACT_MATCH_WITH_PREFIX` | `true` | boolean | [`GMS`] | Include prefix match in exact match results. | -| `ELASTICSEARCH_QUERY_EXACT_MATCH_FACTOR` | 10.0 | float | [`GMS`] | Multiply by this number on true exact match. | -| `ELASTICSEARCH_QUERY_EXACT_MATCH_PREFIX_FACTOR` | 1.6 | float | [`GMS`] | Multiply by this number when prefix match. | -| `ELASTICSEARCH_QUERY_EXACT_MATCH_CASE_FACTOR` | 0.7 | float | [`GMS`] | Multiply by this number when case insensitive match. | -| `ELASTICSEARCH_QUERY_EXACT_MATCH_ENABLE_STRUCTURED` | `true` | boolean | [`GMS`] | When using structured query, also include exact matches. | -| `ELASTICSEARCH_QUERY_PARTIAL_URN_FACTOR` | 0.5 | float | [`GMS`] | Multiply by this number when partial token match on URN) | -| `ELASTICSEARCH_QUERY_PARTIAL_FACTOR` | 0.4 | float | [`GMS`] | Multiply by this number when partial token match on non-URN field. | -| `ELASTICSEARCH_QUERY_CUSTOM_CONFIG_ENABLED` | `false` | boolean | [`GMS`] | Enable search query and ranking customization configuration. | -| `ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE` | `search_config.yml` | string | [`GMS`] | The location of the search customization configuration. | +| Variable | Default | Unit/Type | Components | Description | +|-----------------------------------------------------|---------------------|-----------|-----------------------------------------------------------------|--------------------------------------------------------------------------| +| `INDEX_PREFIX` | `` | string | [`GMS`, `MAE Consumer`, `Elasticsearch Setup`, `System Update`] | Prefix Elasticsearch indices with the given string. | +| `ELASTICSEARCH_NUM_SHARDS_PER_INDEX` | 1 | integer | [`System Update`] | Default number of shards per Elasticsearch index. | +| `ELASTICSEARCH_NUM_REPLICAS_PER_INDEX` | 1 | integer | [`System Update`] | Default number of replica per Elasticsearch index. | +| `ELASTICSEARCH_BUILD_INDICES_RETENTION_VALUE` | 60 | integer | [`System Update`] | Number of units for the retention of Elasticsearch clone/backup indices. | +| `ELASTICSEARCH_BUILD_INDICES_RETENTION_UNIT` | DAYS | string | [`System Update`] | Unit for the retention of Elasticsearch clone/backup indices. | +| `ELASTICSEARCH_QUERY_EXACT_MATCH_EXCLUSIVE` | `false` | boolean | [`GMS`] | Only return exact matches when using quotes. | +| `ELASTICSEARCH_QUERY_EXACT_MATCH_WITH_PREFIX` | `true` | boolean | [`GMS`] | Include prefix match in exact match results. | +| `ELASTICSEARCH_QUERY_EXACT_MATCH_FACTOR` | 10.0 | float | [`GMS`] | Multiply by this number on true exact match. | +| `ELASTICSEARCH_QUERY_EXACT_MATCH_PREFIX_FACTOR` | 1.6 | float | [`GMS`] | Multiply by this number when prefix match. | +| `ELASTICSEARCH_QUERY_EXACT_MATCH_CASE_FACTOR` | 0.7 | float | [`GMS`] | Multiply by this number when case insensitive match. | +| `ELASTICSEARCH_QUERY_EXACT_MATCH_ENABLE_STRUCTURED` | `true` | boolean | [`GMS`] | When using structured query, also include exact matches. | +| `ELASTICSEARCH_QUERY_PARTIAL_URN_FACTOR` | 0.5 | float | [`GMS`] | Multiply by this number when partial token match on URN) | +| `ELASTICSEARCH_QUERY_PARTIAL_FACTOR` | 0.4 | float | [`GMS`] | Multiply by this number when partial token match on non-URN field. | +| `ELASTICSEARCH_QUERY_CUSTOM_CONFIG_ENABLED` | `false` | boolean | [`GMS`] | Enable search query and ranking customization configuration. | +| `ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE` | `search_config.yml` | string | [`GMS`] | The location of the search customization configuration. | +| `ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX` | `false` | boolean | [`System Update`] | Enable reindexing on Elasticsearch schema changes. | +| `ENABLE_STRUCTURED_PROPERTIES_SYSTEM_UPDATE` | `false` | boolean | [`System Update`] | Enable reindexing to remove hard deleted structured properties. | ## Kafka diff --git a/entity-registry/build.gradle b/entity-registry/build.gradle index 484a1f3271dbb4..2dedea1f16d99c 100644 --- a/entity-registry/build.gradle +++ b/entity-registry/build.gradle @@ -1,6 +1,7 @@ plugins { id 'pegasus' id 'java-library' + id 'java-test-fixtures' } dependencies { @@ -45,5 +46,8 @@ dependencies { exclude group: 'com.fasterxml.jackson.core', module: 'jackson-databind' } + testFixturesImplementation externalDependency.mockito + testFixturesCompileOnly externalDependency.lombok + testFixturesAnnotationProcessor externalDependency.lombok } compileTestJava.dependsOn tasks.getByPath(':entity-registry:custom-test-model:modelDeploy') diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/RetrieverContext.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/RetrieverContext.java index df1b9c6a6259cb..629d2c0aad52d2 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/RetrieverContext.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/RetrieverContext.java @@ -1,7 +1,11 @@ package com.linkedin.metadata.aspect; +import com.linkedin.metadata.entity.SearchRetriever; + public interface RetrieverContext { GraphRetriever getGraphRetriever(); AspectRetriever getAspectRetriever(); + + SearchRetriever getSearchRetriever(); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java index 031625da0477c3..a302632e1936fd 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java @@ -129,6 +129,16 @@ static Stream applyMCPSideEffects( .flatMap(mcpSideEffect -> mcpSideEffect.apply(items, retrieverContext)); } + default Stream applyPostMCPSideEffects(Collection items) { + return applyPostMCPSideEffects(items, getRetrieverContext()); + } + + static Stream applyPostMCPSideEffects( + Collection items, @Nonnull RetrieverContext retrieverContext) { + return retrieverContext.getAspectRetriever().getEntityRegistry().getAllMCPSideEffects().stream() + .flatMap(mcpSideEffect -> mcpSideEffect.postApply(items, retrieverContext)); + } + default Stream applyMCLSideEffects(Collection items) { return applyMCLSideEffects(items, getRetrieverContext()); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java index c812aea0c55d78..183b726fe04400 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java @@ -82,9 +82,34 @@ public PluginFactory loadPlugins() { this.mutationHooks = buildMutationHooks(this.pluginConfiguration); this.mclSideEffects = buildMCLSideEffects(this.pluginConfiguration); this.mcpSideEffects = buildMCPSideEffects(this.pluginConfiguration); + logSummary( + Stream.of( + this.aspectPayloadValidators, + this.mutationHooks, + this.mclSideEffects, + this.mcpSideEffects) + .flatMap(List::stream) + .collect(Collectors.toList())); return this; } + private void logSummary(List pluginSpecs) { + if (!pluginSpecs.isEmpty()) { + log.info( + "Enabled {} plugins. {}", + pluginSpecs.size(), + pluginSpecs.stream() + .map( + v -> + String.join( + ", ", + Collections.singletonList( + String.format("%s", v.getConfig().getClassName())))) + .sorted() + .collect(Collectors.toList())); + } + } + /** * Memory intensive operation because of the size of the jars. Limit packages, classes scanned, * cache results diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java index 845f967c0a5281..52920d8c6f3966 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java @@ -2,6 +2,8 @@ import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.plugins.PluginSpec; import java.util.Collection; import java.util.function.BiFunction; @@ -28,6 +30,40 @@ public final Stream apply( retrieverContext); } + /** + * Apply MCP Side Effects after commit. + * + * @param mclItems MCL items generated by MCP commit. + * @param retrieverContext accessors for aspect and graph data + * @return additional MCPs + */ + public final Stream postApply( + Collection mclItems, @Nonnull RetrieverContext retrieverContext) { + return postMCPSideEffect( + mclItems.stream() + .filter(item -> shouldApply(item.getChangeType(), item.getUrn(), item.getAspectSpec())) + .collect(Collectors.toList()), + retrieverContext); + } + + /** + * Generate additional MCPs during the transaction of the given MCPs + * + * @param changeMCPS MCPs being committed + * @param retrieverContext accessors for aspect and graph data + * @return additional MCPs + */ protected abstract Stream applyMCPSideEffect( Collection changeMCPS, @Nonnull RetrieverContext retrieverContext); + + /** + * Generate additional MCPs after the transaction of an MCP. This task will not block the + * production of the MCL for downstream processing. + * + * @param mclItems MCL items generated from committing the MCP + * @param retrieverContext accessors for aspect and graph data + * @return additional MCPs + */ + protected abstract Stream postMCPSideEffect( + Collection mclItems, @Nonnull RetrieverContext retrieverContext); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java new file mode 100644 index 00000000000000..eaa106b8d1f638 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/entity/SearchRetriever.java @@ -0,0 +1,24 @@ +package com.linkedin.metadata.entity; + +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.ScrollResult; +import java.util.List; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public interface SearchRetriever { + /** + * Returns search results for the given entities, filtered and sorted. + * + * @param entities list of entities to search + * @param filters filters to apply + * @param scrollId pagination token + * @param count size of a page + * @return result of the search + */ + ScrollResult scroll( + @Nonnull List entities, + @Nullable Filter filters, + @Nullable String scrollId, + int count); +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java b/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java index d8fb67c60469bf..0ed492643980f5 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/StructuredPropertyUtils.java @@ -1,33 +1,44 @@ package com.linkedin.metadata.models; import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD_PREFIX; import com.google.common.collect.ImmutableSet; import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.GetMode; +import com.linkedin.entity.Aspect; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.structured.PrimitivePropertyValue; import com.linkedin.structured.StructuredProperties; +import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.structured.StructuredPropertyValueAssignment; import com.linkedin.structured.StructuredPropertyValueAssignmentArray; import com.linkedin.util.Pair; import java.sql.Date; import java.time.format.DateTimeParseException; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +@Slf4j public class StructuredPropertyUtils { private StructuredPropertyUtils() {} @@ -35,26 +46,141 @@ private StructuredPropertyUtils() {} static final Date MIN_DATE = Date.valueOf("1000-01-01"); static final Date MAX_DATE = Date.valueOf("9999-12-31"); + public static LogicalValueType getLogicalValueType( + StructuredPropertyDefinition structuredPropertyDefinition) { + return getLogicalValueType(structuredPropertyDefinition.getValueType()); + } + + public static LogicalValueType getLogicalValueType(@Nullable Urn valueType) { + String valueTypeId = getValueTypeId(valueType); + if ("string".equals(valueTypeId)) { + return LogicalValueType.STRING; + } else if ("date".equals(valueTypeId)) { + return LogicalValueType.DATE; + } else if ("number".equals(valueTypeId)) { + return LogicalValueType.NUMBER; + } else if ("urn".equals(valueTypeId)) { + return LogicalValueType.URN; + } else if ("rich_text".equals(valueTypeId)) { + return LogicalValueType.RICH_TEXT; + } + return LogicalValueType.UNKNOWN; + } + + @Nullable + public static String getValueTypeId(@Nullable final Urn valueType) { + if (valueType != null) { + String valueTypeId = valueType.getId(); + if (valueTypeId.startsWith("datahub.")) { + valueTypeId = valueTypeId.split("\\.")[1]; + } + return valueTypeId.toLowerCase(); + } else { + return null; + } + } + + /** + * Lookup structured property definition given the name used for the field in APIs such as a + * search filter or aggregation query facet name. + * + * @param fieldOrFacetName the field name used in a filter or facet name in an aggregation query + * @param aspectRetriever method to look up the definition aspect + * @return the structured property definition if found + */ + public static Optional> + lookupDefinitionFromFilterOrFacetName( + @Nonnull String fieldOrFacetName, @Nullable AspectRetriever aspectRetriever) { + if (fieldOrFacetName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD + ".")) { + String fqn = + fieldOrFacetName + .substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1) + .replace(".keyword", "") + .replace(".delimited", ""); + Urn urn = toURNFromFQN(fqn); + Map> result = + Objects.requireNonNull(aspectRetriever) + .getLatestAspectObjects( + Collections.singleton(urn), + Collections.singleton(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)); + Optional definition = + Optional.ofNullable( + result + .getOrDefault(urn, Collections.emptyMap()) + .getOrDefault(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, null)); + return definition.map( + definitonAspect -> + Pair.of(urn, new StructuredPropertyDefinition(definitonAspect.data()))); + } + return Optional.empty(); + } + /** - * Sanitizes fully qualified name for use in an ElasticSearch field name Replaces . and " " + * Given the structured property definition extract the Elasticsearch field name with nesting and + * character replacement. + * + *

Sanitizes fully qualified name for use in an ElasticSearch field name Replaces `.` * characters * - * @param fullyQualifiedName The original fully qualified name of the property + * @param definition The structured property definition * @return The sanitized version that can be used as a field name */ - public static String sanitizeStructuredPropertyFQN(@Nonnull String fullyQualifiedName) { - if (fullyQualifiedName.contains(" ")) { + public static String toElasticsearchFieldName( + @Nonnull Urn propertyUrn, @Nullable StructuredPropertyDefinition definition) { + String qualifiedName = definition != null ? definition.getQualifiedName() : propertyUrn.getId(); + + if (qualifiedName.contains(" ")) { throw new IllegalArgumentException( "Fully qualified structured property name cannot contain spaces"); } - return fullyQualifiedName.replace('.', '_'); + if (definition != null && definition.getVersion(GetMode.NULL) != null) { + // includes type suffix + return String.join( + ".", + STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD, + definition.getQualifiedName().replace('.', '_'), + definition.getVersion(), + getLogicalValueType(definition).name().toLowerCase()); + } else { + // un-typed property + return qualifiedName.replace('.', '_'); + } + } + + /** + * Return an elasticsearch type from structured property type + * + * @param fieldName filter or facet field name + * @param aspectRetriever aspect retriever + * @return elasticsearch type + */ + public static Set toElasticsearchFieldType( + @Nonnull String fieldName, @Nullable AspectRetriever aspectRetriever) { + LogicalValueType logicalValueType = + lookupDefinitionFromFilterOrFacetName(fieldName, aspectRetriever) + .map(definition -> getLogicalValueType(definition.getValue())) + .orElse(LogicalValueType.STRING); + + switch (logicalValueType) { + case NUMBER: + return Collections.singleton("double"); + case DATE: + return Collections.singleton("long"); + case RICH_TEXT: + return Collections.singleton("text"); + case UNKNOWN: + case STRING: + case URN: + default: + return Collections.singleton("keyword"); + } } public static void validateStructuredPropertyFQN( @Nonnull Collection fullyQualifiedNames, @Nonnull AspectRetriever aspectRetriever) { Set structuredPropertyUrns = fullyQualifiedNames.stream() - .map(StructuredPropertyUtils::toURNFromFieldName) + .map(StructuredPropertyUtils::toURNFromFQN) .collect(Collectors.toSet()); Set removedUrns = getRemovedUrns(structuredPropertyUrns, aspectRetriever); if (!removedUrns.isEmpty()) { @@ -63,13 +189,19 @@ public static void validateStructuredPropertyFQN( } } - public static Urn toURNFromFieldName(@Nonnull String fieldName) { + /** + * Given a Structured Property fqn, calculate the expected URN + * + * @param fqn structured property's fqn + * @return the expected structured property urn + */ + private static Urn toURNFromFQN(@Nonnull String fqn) { return UrnUtils.getUrn( - String.join(":", "urn:li", STRUCTURED_PROPERTY_ENTITY_NAME, fieldName.replace('_', '.'))); + String.join(":", "urn:li", STRUCTURED_PROPERTY_ENTITY_NAME, fqn.replace('_', '.'))); } public static void validateFilter( - @Nullable Filter filter, @Nonnull AspectRetriever aspectRetriever) { + @Nullable Filter filter, @Nullable AspectRetriever aspectRetriever) { if (filter == null) { return; @@ -80,7 +212,7 @@ public static void validateFilter( if (filter.getCriteria() != null) { for (Criterion c : filter.getCriteria()) { if (c.getField().startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { - fieldNames.add(c.getField().substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1)); + fieldNames.add(stripStructuredPropertyPrefix(c.getField())); } } } @@ -89,15 +221,24 @@ public static void validateFilter( for (ConjunctiveCriterion cc : filter.getOr()) { for (Criterion c : cc.getAnd()) { if (c.getField().startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { - fieldNames.add(c.getField().substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1)); + fieldNames.add(stripStructuredPropertyPrefix(c.getField())); } } } } if (!fieldNames.isEmpty()) { - validateStructuredPropertyFQN(fieldNames, aspectRetriever); + validateStructuredPropertyFQN(fieldNames, Objects.requireNonNull(aspectRetriever)); + } + } + + private static String stripStructuredPropertyPrefix(String s) { + if (s.startsWith(STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD_PREFIX)) { + return s.substring(STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD.length() + 1).split("[.]")[0]; + } else if (s.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { + return s.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1).split("[.]")[0]; } + return s; } public static Date toDate(PrimitivePropertyValue value) throws DateTimeParseException { @@ -120,7 +261,7 @@ public static boolean isValidDate(PrimitivePropertyValue value) { return date.compareTo(MIN_DATE) >= 0 && date.compareTo(MAX_DATE) <= 0; } - private static Set getRemovedUrns(Set urns, AspectRetriever aspectRetriever) { + private static Set getRemovedUrns(Set urns, @Nonnull AspectRetriever aspectRetriever) { return aspectRetriever .getLatestAspectObjects(urns, ImmutableSet.of(STATUS_ASPECT_NAME)) .entrySet() diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java index 8e877d1d23aadc..1137c7c55880ff 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java @@ -7,6 +7,8 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import java.util.Collection; @@ -72,5 +74,11 @@ protected Stream applyMCPSideEffect( Collection changeMCPS, @Nonnull RetrieverContext retrieverContext) { return changeMCPS.stream(); } + + @Override + protected Stream postMCPSideEffect( + Collection mclItems, @Nonnull RetrieverContext retrieverContext) { + return Stream.of(); + } } } diff --git a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java similarity index 96% rename from entity-registry/src/test/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java rename to entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java index 9c7cd997a9af0f..62e22efa9da165 100644 --- a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java +++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/MockAspectRetriever.java @@ -1,7 +1,5 @@ package com.linkedin.test.metadata.aspect; -import static org.mockito.Mockito.mock; - import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; @@ -17,6 +15,7 @@ import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import org.mockito.Mockito; public class MockAspectRetriever implements AspectRetriever { private final Map> data; @@ -64,6 +63,6 @@ public Map> getLatestAspectObjects( @Nonnull @Override public EntityRegistry getEntityRegistry() { - return mock(EntityRegistry.class); + return Mockito.mock(EntityRegistry.class); } } diff --git a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java similarity index 100% rename from entity-registry/src/test/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java rename to entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/TestEntityRegistry.java diff --git a/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCL.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCL.java new file mode 100644 index 00000000000000..7dd889c48b8747 --- /dev/null +++ b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCL.java @@ -0,0 +1,32 @@ +package com.linkedin.test.metadata.aspect.batch; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.mxe.MetadataChangeLog; +import javax.annotation.Nonnull; +import lombok.Builder; +import lombok.Getter; + +@Builder(toBuilder = true) +@Getter +public class TestMCL implements MCLItem { + private Urn urn; + private ChangeType changeType; + private MetadataChangeLog metadataChangeLog; + private RecordTemplate previousRecordTemplate; + private RecordTemplate recordTemplate; + private EntitySpec entitySpec; + private AspectSpec aspectSpec; + private AuditStamp auditStamp; + + @Nonnull + @Override + public String getAspectName() { + return getAspectSpec().getName(); + } +} diff --git a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java b/entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java similarity index 100% rename from entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java rename to entity-registry/src/testFixtures/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 752bf44cf43549..9a7b8287e2c6a7 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -21,6 +21,10 @@ public class Constants { public static final String STRUCTURED_PROPERTY_MAPPING_FIELD = "structuredProperties"; public static final String STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX = STRUCTURED_PROPERTY_MAPPING_FIELD + "."; + public static final String STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD = "_versioned"; + public static final String STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD_PREFIX = + String.join( + ".", STRUCTURED_PROPERTY_MAPPING_FIELD, STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD, ""); // !!!!!!! IMPORTANT !!!!!!! // This effectively sets the max aspect size to 16 MB. Used in deserialization of messages. @@ -342,6 +346,7 @@ public class Constants { // Structured Property public static final String STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME = "propertyDefinition"; + public static final String STRUCTURED_PROPERTY_KEY_ASPECT_NAME = "structuredPropertyKey"; // Form public static final String FORM_INFO_ASPECT_NAME = "formInfo"; diff --git a/metadata-ingestion/scripts/modeldocgen.py b/metadata-ingestion/scripts/modeldocgen.py index ef6ce765c23edd..ea7813f0ca85bc 100644 --- a/metadata-ingestion/scripts/modeldocgen.py +++ b/metadata-ingestion/scripts/modeldocgen.py @@ -504,6 +504,7 @@ class AspectPluginConfig: className: str enabled: bool supportedEntityAspectNames: List[EntityAspectName] + packageScan: Optional[List[str]] = None supportedOperations: Optional[List[str]] = None diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 5bd73c844b3800..6666e335446884 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -92,6 +92,8 @@ dependencies { testImplementation 'ch.qos.logback:logback-classic:1.4.7' testImplementation 'net.datafaker:datafaker:1.9.0' + testImplementation(testFixtures(project(":entity-registry"))) + testAnnotationProcessor externalDependency.lombok constraints { diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java index f4473c8db3148c..43a7d00248a224 100644 --- a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java @@ -10,6 +10,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; +import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.AspectRetriever; @@ -21,7 +22,9 @@ import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.SystemMetadataUtils; +import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; import jakarta.json.Json; @@ -78,6 +81,29 @@ public RecordTemplate getRecordTemplate() { return null; } + @Nonnull + public MetadataChangeProposal getMetadataChangeProposal() { + if (metadataChangeProposal != null) { + return metadataChangeProposal; + } else { + GenericAspect genericAspect = new GenericAspect(); + genericAspect.setContentType("application/json"); + genericAspect.setValue(ByteString.copyString(getPatch().toString(), StandardCharsets.UTF_8)); + + final MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityUrn(getUrn()); + mcp.setChangeType(getChangeType()); + mcp.setEntityType(getEntitySpec().getName()); + mcp.setAspectName(getAspectName()); + mcp.setAspect(genericAspect); + mcp.setSystemMetadata(getSystemMetadata()); + mcp.setEntityKeyAspect( + GenericRecordUtils.serializeAspect( + EntityKeyUtils.convertUrnToEntityKey(getUrn(), entitySpec.getKeyAspectSpec()))); + return mcp; + } + } + public ChangeItemImpl applyPatch(RecordTemplate recordTemplate, AspectRetriever aspectRetriever) { ChangeItemImpl.ChangeItemImplBuilder builder = ChangeItemImpl.builder() diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java index 21bac3cbb0e616..6f0cd51af0793b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java @@ -132,7 +132,7 @@ public static List getAdditionalChanges( getProposalFromAspectForDefault( entry.getKey(), entry.getValue(), entityKeyAspect, templateItem), templateItem.getAuditStamp(), - opContext.getRetrieverContext().get().getAspectRetriever())) + opContext.getAspectRetrieverOpt().get())) .filter(Objects::nonNull); }) .collect(Collectors.toList()); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 80f976f9ae81e8..34c836d760a7d7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -44,6 +44,7 @@ import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; @@ -53,6 +54,7 @@ import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.ebean.batch.DeleteItemImpl; +import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs; @@ -100,6 +102,7 @@ import java.util.function.Consumer; import java.util.stream.Collectors; import java.util.stream.Stream; +import java.util.stream.StreamSupport; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.persistence.EntityNotFoundException; @@ -152,6 +155,7 @@ public class EntityServiceImpl implements EntityService { @Nullable @Getter private SearchIndicesService updateIndicesService; private final PreProcessHooks preProcessHooks; protected static final int MAX_KEYS_PER_QUERY = 500; + protected static final int MCP_SIDE_EFFECT_KAFKA_BATCH_SIZE = 500; private final Integer ebeanMaxTransactionRetry; private final boolean enableBrowseV2; @@ -659,7 +663,7 @@ public List ingestAspects( .recordTemplate(pair.getValue()) .systemMetadata(systemMetadata) .auditStamp(auditStamp) - .build(opContext.getRetrieverContext().get().getAspectRetriever())) + .build(opContext.getAspectRetrieverOpt().get())) .collect(Collectors.toList()); return ingestAspects( opContext, @@ -696,9 +700,50 @@ public List ingestAspects( ingestAspectsToLocalDB(opContext, aspectsBatch, overwrite); List mclResults = emitMCL(opContext, ingestResults, emitMCL); + + processPostCommitMCLSideEffects( + opContext, + mclResults.stream() + .filter(result -> !result.isNoOp()) + .map(UpdateAspectResult::toMCL) + .collect(Collectors.toList())); + return mclResults; } + /** + * Process post-commit MCPSideEffects + * + * @param mcls mcls generated + */ + private void processPostCommitMCLSideEffects( + @Nonnull OperationContext opContext, List mcls) { + log.debug("Considering {} MCLs post commit side effects.", mcls.size()); + List batch = + mcls.stream() + .map(mcl -> MCLItemImpl.builder().build(mcl, opContext.getAspectRetrieverOpt().get())) + .collect(Collectors.toList()); + + Iterable> iterable = + () -> + Iterators.partition( + AspectsBatch.applyPostMCPSideEffects(batch, opContext.getRetrieverContext().get()) + .iterator(), + MCP_SIDE_EFFECT_KAFKA_BATCH_SIZE); + StreamSupport.stream(iterable.spliterator(), false) + .forEach( + sideEffects -> { + long count = + ingestProposalAsync( + AspectsBatchImpl.builder() + .items(sideEffects) + .retrieverContext(opContext.getRetrieverContext().get()) + .build()) + .count(); + log.info("Generated {} MCP SideEffects for async processing", count); + }); + } + /** * Checks whether there is an actual update to the aspect by applying the updateLambda If there is * an update, push the new version into the local DB. Otherwise, do not push the new version, but @@ -984,7 +1029,7 @@ public RecordTemplate ingestAspectIfNotPresent( .recordTemplate(newValue) .systemMetadata(systemMetadata) .auditStamp(auditStamp) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), opContext.getRetrieverContext().get()) .build(); List ingested = ingestAspects(opContext, aspectsBatch, true, false); @@ -1082,7 +1127,7 @@ private Stream ingestTimeseriesProposal( .recordTemplate( EntityApiUtils.buildKeyAspect( opContext.getEntityRegistry(), item.getUrn())) - .build(opContext.getRetrieverContext().get().getAspectRetriever())) + .build(opContext.getAspectRetrieverOpt().get())) .collect(Collectors.toList()); ingestProposalSync( @@ -1476,7 +1521,7 @@ private RestoreIndicesResult restoreIndices( .auditStamp(auditStamp) .systemMetadata(latestSystemMetadata) .recordTemplate(EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), urn)) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); Stream defaultAspectsResult = ingestProposalSync( opContext, @@ -1807,7 +1852,7 @@ private void ingestSnapshotUnion( .recordTemplate(pair.getValue()) .auditStamp(auditStamp) .systemMetadata(systemMetadata) - .build(opContext.getRetrieverContext().get().getAspectRetriever())) + .build(opContext.getAspectRetrieverOpt().get())) .collect(Collectors.toList())) .build(); @@ -1871,6 +1916,7 @@ public RollbackRunResult rollbackWithConditions( Map conditions, boolean hardDelete) { List removedAspects = new ArrayList<>(); + List removedAspectResults = new ArrayList<>(); AtomicInteger rowsDeletedFromEntityDeletion = new AtomicInteger(0); List> futures = @@ -1878,7 +1924,7 @@ public RollbackRunResult rollbackWithConditions( .map( aspectToRemove -> { RollbackResult result = - deleteAspect( + deleteAspectWithoutMCL( opContext, aspectToRemove.getUrn(), aspectToRemove.getAspectName(), @@ -1899,6 +1945,7 @@ public RollbackRunResult rollbackWithConditions( rowsDeletedFromEntityDeletion.addAndGet(result.additionalRowsAffected); removedAspects.add(aspectToRemove); + removedAspectResults.add(result); return alwaysProduceMCLAsync( opContext, result.getUrn(), @@ -1929,12 +1976,14 @@ public RollbackRunResult rollbackWithConditions( } }); - return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion.get()); + return new RollbackRunResult( + removedAspects, rowsDeletedFromEntityDeletion.get(), removedAspectResults); } @Override public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn) { List removedAspects = new ArrayList<>(); + List removedAspectResults = new ArrayList<>(); Integer rowsDeletedFromEntityDeletion = 0; final EntitySpec spec = @@ -1949,7 +1998,8 @@ public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn) log.warn("Entity to delete does not exist. {}", urn.toString()); } if (latestKey == null || latestKey.getSystemMetadata() == null) { - return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion); + return new RollbackRunResult( + removedAspects, rowsDeletedFromEntityDeletion, removedAspectResults); } SystemMetadata latestKeySystemMetadata = @@ -1957,7 +2007,7 @@ public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn) .map(SystemAspect::getSystemMetadata) .get(); RollbackResult result = - deleteAspect( + deleteAspectWithoutMCL( opContext, urn.toString(), keyAspectName, @@ -1974,6 +2024,7 @@ public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn) rowsDeletedFromEntityDeletion = result.additionalRowsAffected; removedAspects.add(summary); + removedAspectResults.add(result); Future future = alwaysProduceMCLAsync( opContext, @@ -1999,7 +2050,8 @@ public RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn) } } - return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion); + return new RollbackRunResult( + removedAspects, rowsDeletedFromEntityDeletion, removedAspectResults); } @Override @@ -2052,9 +2104,9 @@ public Set exists( } } + /** Does not emit MCL */ @Nullable - @Override - public RollbackResult deleteAspect( + private RollbackResult deleteAspectWithoutMCL( @Nonnull OperationContext opContext, String urn, String aspectName, @@ -2074,7 +2126,7 @@ public RollbackResult deleteAspect( .urn(entityUrn) .aspectName(aspectName) .auditStamp(auditStamp) - .build(opContext.getRetrieverContext().get().getAspectRetriever()); + .build(opContext.getAspectRetrieverOpt().get()); // Delete validation hooks ValidationExceptionCollection exceptions = @@ -2242,6 +2294,10 @@ public RollbackResult deleteAspect( }, DEFAULT_MAX_TRANSACTION_RETRY); + if (result != null) { + processPostCommitMCLSideEffects(opContext, List.of(result.toMCL(auditStamp))); + } + return result; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java index fe9bcb0daba910..27b603244d3b35 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java @@ -765,9 +765,6 @@ private void removeIncomingEdgesFromNode( _dgraph.executeConsumer(client -> client.newTransaction().doRequest(request)); } - @Override - public void configure() {} - @Override public void clear() { log.debug("dropping Dgraph data"); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java index 37cc6ab6b1eeb3..ada5069d0cabe5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java @@ -35,6 +35,7 @@ import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import io.opentelemetry.extension.annotations.WithSpan; import java.io.IOException; import java.util.ArrayList; @@ -274,10 +275,10 @@ public void removeEdgesFromNode( } @Override - public void configure() { + public void reindexAll(Collection> properties) { log.info("Setting up elastic graph index"); try { - for (ReindexConfig config : buildReindexConfigs()) { + for (ReindexConfig config : buildReindexConfigs(properties)) { _indexBuilder.buildIndex(config); } } catch (IOException e) { @@ -286,7 +287,8 @@ public void configure() { } @Override - public List buildReindexConfigs() throws IOException { + public List buildReindexConfigs( + Collection> properties) throws IOException { return List.of( _indexBuilder.buildReindexState( _indexConvention.getIndexName(INDEX_NAME), @@ -294,17 +296,6 @@ public List buildReindexConfigs() throws IOException { Collections.emptyMap())); } - @Override - public List buildReindexConfigsWithAllStructProps( - Collection properties) throws IOException { - return buildReindexConfigs(); - } - - @Override - public void reindexAll() { - configure(); - } - @VisibleForTesting @Override public void clear() { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java index 513672b071c17a..16c0804538dd78 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java @@ -698,11 +698,6 @@ public void removeNodesMatchingLabel(@Nonnull String labelPattern) { runQuery(buildStatement(statement, params)).consume(); } - @Override - public void configure() { - // Do nothing - } - @Override public void clear() { removeNodesMatchingLabel(".*"); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java index 399b0aa6e49a64..fea3fafdc845ad 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/MostPopularSource.java @@ -146,7 +146,9 @@ private SearchRequest buildSearchRequest(@Nonnull OperationContext opContext) { // Find the entities with the most views AggregationBuilder aggregation = AggregationBuilders.terms(ENTITY_AGG_NAME) - .field(ESUtils.toKeywordField(DataHubUsageEventConstants.ENTITY_URN, false)) + .field( + ESUtils.toKeywordField( + DataHubUsageEventConstants.ENTITY_URN, false, opContext.getAspectRetriever())) .size(MAX_CONTENT * 2); source.aggregation(aggregation); source.size(0); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java index d75470127ded80..afdce0d7145133 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyEditedSource.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.datahubusage.DataHubUsageEventConstants; import com.linkedin.metadata.datahubusage.DataHubUsageEventType; import com.linkedin.metadata.entity.EntityService; @@ -104,7 +105,8 @@ public List getRecommendations( @Nonnull RecommendationRequestContext requestContext, @Nullable Filter filter) { SearchRequest searchRequest = - buildSearchRequest(opContext.getSessionActorContext().getActorUrn()); + buildSearchRequest( + opContext.getSessionActorContext().getActorUrn(), opContext.getAspectRetriever()); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getRecentlyEdited").time()) { final SearchResponse searchResponse = _searchClient.search(searchRequest, RequestOptions.DEFAULT); @@ -128,7 +130,8 @@ public Set getSupportedEntityTypes() { return SUPPORTED_ENTITY_TYPES; } - private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { + private SearchRequest buildSearchRequest( + @Nonnull Urn userUrn, @Nullable AspectRetriever aspectRetriever) { // TODO: Proactively filter for entity types in the supported set. SearchRequest request = new SearchRequest(); SearchSourceBuilder source = new SearchSourceBuilder(); @@ -136,7 +139,7 @@ private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { // Filter for the entity edit events of the user requesting recommendation query.must( QueryBuilders.termQuery( - ESUtils.toKeywordField(DataHubUsageEventConstants.ACTOR_URN, false), + ESUtils.toKeywordField(DataHubUsageEventConstants.ACTOR_URN, false, aspectRetriever), userUrn.toString())); // Filter for the entity action events query.must( @@ -148,7 +151,9 @@ private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { String lastViewed = "last_viewed"; AggregationBuilder aggregation = AggregationBuilders.terms(ENTITY_AGG_NAME) - .field(ESUtils.toKeywordField(DataHubUsageEventConstants.ENTITY_URN, false)) + .field( + ESUtils.toKeywordField( + DataHubUsageEventConstants.ENTITY_URN, false, aspectRetriever)) .size(MAX_CONTENT) .order(BucketOrder.aggregation(lastViewed, false)) .subAggregation( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java index e9613495e8d220..f282470193ae5e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlyViewedSource.java @@ -5,6 +5,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.datahubusage.DataHubUsageEventConstants; import com.linkedin.metadata.datahubusage.DataHubUsageEventType; import com.linkedin.metadata.entity.EntityService; @@ -104,7 +105,8 @@ public List getRecommendations( @Nonnull RecommendationRequestContext requestContext, @Nullable Filter filter) { SearchRequest searchRequest = - buildSearchRequest(opContext.getSessionActorContext().getActorUrn()); + buildSearchRequest( + opContext.getSessionActorContext().getActorUrn(), opContext.getAspectRetriever()); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "getRecentlyViewed").time()) { final SearchResponse searchResponse = _searchClient.search(searchRequest, RequestOptions.DEFAULT); @@ -128,7 +130,8 @@ public Set getSupportedEntityTypes() { return SUPPORTED_ENTITY_TYPES; } - private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { + private SearchRequest buildSearchRequest( + @Nonnull Urn userUrn, @Nullable AspectRetriever aspectRetriever) { // TODO: Proactively filter for entity types in the supported set. SearchRequest request = new SearchRequest(); SearchSourceBuilder source = new SearchSourceBuilder(); @@ -136,7 +139,7 @@ private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { // Filter for the entity view events of the user requesting recommendation query.must( QueryBuilders.termQuery( - ESUtils.toKeywordField(DataHubUsageEventConstants.ACTOR_URN, false), + ESUtils.toKeywordField(DataHubUsageEventConstants.ACTOR_URN, false, aspectRetriever), userUrn.toString())); query.must( QueryBuilders.termQuery( @@ -147,7 +150,9 @@ private SearchRequest buildSearchRequest(@Nonnull Urn userUrn) { String lastViewed = "last_viewed"; AggregationBuilder aggregation = AggregationBuilders.terms(ENTITY_AGG_NAME) - .field(ESUtils.toKeywordField(DataHubUsageEventConstants.ENTITY_URN, false)) + .field( + ESUtils.toKeywordField( + DataHubUsageEventConstants.ENTITY_URN, false, aspectRetriever)) .size(MAX_CONTENT) .order(BucketOrder.aggregation(lastViewed, false)) .subAggregation( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java new file mode 100644 index 00000000000000..a5ef1c8fa58b12 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/SearchServiceSearchRetriever.java @@ -0,0 +1,51 @@ +package com.linkedin.metadata.search; + +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import lombok.Setter; + +@Getter +@Builder +public class SearchServiceSearchRetriever implements SearchRetriever { + private static final SearchFlags RETRIEVER_SEARCH_FLAGS = + new SearchFlags() + .setFulltext(false) + .setMaxAggValues(20) + .setSkipCache(false) + .setSkipAggregates(true) + .setSkipHighlighting(true) + .setIncludeSoftDeleted(false) + .setIncludeRestricted(false); + + @Setter private OperationContext systemOperationContext; + private final SearchService searchService; + + @Override + public ScrollResult scroll( + @Nonnull List entities, + @Nullable Filter filters, + @Nullable String scrollId, + int count) { + SortCriterion urnSort = new SortCriterion(); + urnSort.setField("urn"); + urnSort.setOrder(SortOrder.ASCENDING); + return searchService.scrollAcrossEntities( + systemOperationContext.withSearchFlags(flags -> RETRIEVER_SEARCH_FLAGS), + entities, + "*", + filters, + urnSort, + scrollId, + null, + count); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index 19cd1f767f4729..578c34611a75aa 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -21,6 +21,7 @@ import com.linkedin.metadata.search.utils.SearchUtils; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import java.io.IOException; import java.util.Collection; @@ -55,24 +56,14 @@ public class ElasticSearchService implements EntitySearchService, ElasticSearchI private final ESWriteDAO esWriteDAO; @Override - public void configure() { - indexBuilders.reindexAll(); + public void reindexAll(Collection> properties) { + indexBuilders.reindexAll(properties); } @Override - public List buildReindexConfigs() { - return indexBuilders.buildReindexConfigs(); - } - - @Override - public List buildReindexConfigsWithAllStructProps( - Collection properties) throws IOException { - return indexBuilders.buildReindexConfigsWithAllStructProps(properties); - } - - @Override - public void reindexAll() { - configure(); + public List buildReindexConfigs( + Collection> properties) throws IOException { + return indexBuilders.buildReindexConfigs(properties); } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java index cc6a0f3e3d6f99..2d04e997740502 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java @@ -81,6 +81,8 @@ public class ESIndexBuilder { @Getter private final boolean enableIndexMappingsReindex; + @Getter private final boolean enableStructuredPropertiesReindex; + @Getter private final ElasticSearchConfiguration elasticSearchConfiguration; @Getter private final GitVersion gitVersion; @@ -101,6 +103,7 @@ public ESIndexBuilder( Map> indexSettingOverrides, boolean enableIndexSettingsReindex, boolean enableIndexMappingsReindex, + boolean enableStructuredPropertiesReindex, ElasticSearchConfiguration elasticSearchConfiguration, GitVersion gitVersion) { this._searchClient = searchClient; @@ -112,6 +115,7 @@ public ESIndexBuilder( this.enableIndexSettingsReindex = enableIndexSettingsReindex; this.enableIndexMappingsReindex = enableIndexMappingsReindex; this.elasticSearchConfiguration = elasticSearchConfiguration; + this.enableStructuredPropertiesReindex = enableStructuredPropertiesReindex; this.gitVersion = gitVersion; RetryConfig config = @@ -143,6 +147,8 @@ public ReindexConfig buildReindexState( .name(indexName) .enableIndexSettingsReindex(enableIndexSettingsReindex) .enableIndexMappingsReindex(enableIndexMappingsReindex) + .enableStructuredPropertiesReindex( + enableStructuredPropertiesReindex && !copyStructuredPropertyMappings) .version(gitVersion.getVersion()); Map baseSettings = new HashMap<>(settings); @@ -293,7 +299,7 @@ public void buildIndex(ReindexConfig indexState) throws IOException { * @throws IOException communication issues with ES */ public void applyMappings(ReindexConfig indexState, boolean suppressError) throws IOException { - if (indexState.isPureMappingsAddition() || indexState.isPureStructuredProperty()) { + if (indexState.isPureMappingsAddition() || indexState.isPureStructuredPropertyAddition()) { log.info("Updating index {} mappings in place.", indexState.name()); PutMappingRequest request = new PutMappingRequest(indexState.name()).source(indexState.targetMappings()); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java index afc831b004ec38..eba4593c9042c5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/EntityIndexBuilders.java @@ -1,9 +1,11 @@ package com.linkedin.metadata.search.elasticsearch.indexbuilder; +import com.linkedin.common.urn.Urn; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.shared.ElasticSearchIndexed; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.io.IOException; import java.util.Collection; import java.util.List; @@ -26,8 +28,8 @@ public ESIndexBuilder getIndexBuilder() { } @Override - public void reindexAll() { - for (ReindexConfig config : buildReindexConfigs()) { + public void reindexAll(Collection> properties) { + for (ReindexConfig config : buildReindexConfigs(properties)) { try { indexBuilder.buildIndex(config); } catch (IOException e) { @@ -37,26 +39,8 @@ public void reindexAll() { } @Override - public List buildReindexConfigs() { - Map settings = settingsBuilder.getSettings(); - MappingsBuilder.setEntityRegistry(entityRegistry); - return entityRegistry.getEntitySpecs().values().stream() - .map( - entitySpec -> { - try { - Map mappings = MappingsBuilder.getMappings(entitySpec); - return indexBuilder.buildReindexState( - indexConvention.getIndexName(entitySpec), mappings, settings, true); - } catch (IOException e) { - throw new RuntimeException(e); - } - }) - .collect(Collectors.toList()); - } - - @Override - public List buildReindexConfigsWithAllStructProps( - Collection properties) { + public List buildReindexConfigs( + Collection> properties) { Map settings = settingsBuilder.getSettings(); MappingsBuilder.setEntityRegistry(entityRegistry); return entityRegistry.getEntitySpecs().values().stream() @@ -81,7 +65,7 @@ public List buildReindexConfigsWithAllStructProps( * @return index configurations impacted by the new property */ public List buildReindexConfigsWithNewStructProp( - StructuredPropertyDefinition property) { + Urn urn, StructuredPropertyDefinition property) { Map settings = settingsBuilder.getSettings(); MappingsBuilder.setEntityRegistry(entityRegistry); return entityRegistry.getEntitySpecs().values().stream() @@ -89,7 +73,7 @@ public List buildReindexConfigsWithNewStructProp( entitySpec -> { try { Map mappings = - MappingsBuilder.getMappings(entitySpec, List.of(property)); + MappingsBuilder.getMappings(entitySpec, List.of(Pair.of(urn, property))); return indexBuilder.buildReindexState( indexConvention.getIndexName(entitySpec), mappings, settings, true); } catch (IOException e) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java index f8d0f165bcddf5..5dc28a8fd598da 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java @@ -2,7 +2,7 @@ import static com.linkedin.metadata.Constants.ENTITY_TYPE_URN_PREFIX; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD; -import static com.linkedin.metadata.models.StructuredPropertyUtils.sanitizeStructuredPropertyFQN; +import static com.linkedin.metadata.models.StructuredPropertyUtils.toElasticsearchFieldName; import static com.linkedin.metadata.models.annotation.SearchableAnnotation.OBJECT_FIELD_TYPES; import static com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder.*; @@ -17,11 +17,13 @@ import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.utils.ESUtils; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.net.URISyntaxException; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; @@ -52,6 +54,15 @@ public static Map getPartialNgramConfigWithOverrides( public static final String WORD_GRAMS_LENGTH_2 = "wordGrams2"; public static final String WORD_GRAMS_LENGTH_3 = "wordGrams3"; public static final String WORD_GRAMS_LENGTH_4 = "wordGrams4"; + public static final Set SUBFIELDS = + Set.of( + KEYWORD, + DELIMITED, + LENGTH, + NGRAM, + WORD_GRAMS_LENGTH_2, + WORD_GRAMS_LENGTH_3, + WORD_GRAMS_LENGTH_4); // Alias field mappings constants public static final String ALIAS = "alias"; @@ -72,7 +83,7 @@ private MappingsBuilder() {} */ public static Map getMappings( @Nonnull final EntitySpec entitySpec, - Collection structuredProperties) { + Collection> structuredProperties) { Map mappings = getMappings(entitySpec); String entityName = entitySpec.getEntityAnnotation().getName(); @@ -80,9 +91,11 @@ public static Map getMappings( getMappingsForStructuredProperty( structuredProperties.stream() .filter( - prop -> { + urnProp -> { try { - return prop.getEntityTypes() + return urnProp + .getSecond() + .getEntityTypes() .contains(Urn.createFromString(ENTITY_TYPE_URN_PREFIX + entityName)); } catch (URISyntaxException e) { return false; @@ -165,10 +178,11 @@ private static Map getMappingsForSystemCreated() { } public static Map getMappingsForStructuredProperty( - Collection properties) { + Collection> properties) { return properties.stream() .map( - property -> { + urnProperty -> { + StructuredPropertyDefinition property = urnProperty.getSecond(); Map mappingForField = new HashMap<>(); String valueType = property.getValueType().getId(); if (valueType.equalsIgnoreCase(LogicalValueType.STRING.name())) { @@ -183,7 +197,7 @@ public static Map getMappingsForStructuredProperty( mappingForField.put(TYPE, ESUtils.DOUBLE_FIELD_TYPE); } return Map.entry( - sanitizeStructuredPropertyFQN(property.getQualifiedName()), mappingForField); + toElasticsearchFieldName(urnProperty.getFirst(), property), mappingForField); }) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java index fbb7fcadba8bca..fd8db19ea6a7bc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ReindexConfig.java @@ -11,6 +11,9 @@ import com.google.common.collect.MapDifference; import com.google.common.collect.Maps; import com.linkedin.metadata.search.utils.ESUtils; +import com.linkedin.util.Pair; +import java.util.AbstractMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -61,6 +64,7 @@ public class ReindexConfig { private final Map targetMappings; private final boolean enableIndexMappingsReindex; private final boolean enableIndexSettingsReindex; + private final boolean enableStructuredPropertiesReindex; private final String version; /* Calculated */ @@ -70,7 +74,8 @@ public class ReindexConfig { private final boolean isPureMappingsAddition; private final boolean isSettingsReindex; private final boolean hasNewStructuredProperty; - private final boolean isPureStructuredProperty; + private final boolean isPureStructuredPropertyAddition; + private final boolean hasRemovedStructuredProperty; public static ReindexConfigBuilder builder() { return new CalculatedBuilder(); @@ -102,7 +107,11 @@ private ReindexConfigBuilder hasNewStructuredProperty(boolean ignored) { return this; } - private ReindexConfigBuilder isPureStructuredProperty(boolean ignored) { + private ReindexConfigBuilder isPureStructuredPropertyAddition(boolean ignored) { + return this; + } + + private ReindexConfigBuilder hasRemovedStructuredProperty(boolean ignored) { return this; } @@ -156,7 +165,7 @@ public ReindexConfig build() { super.requiresApplyMappings = !mappingsDiff.entriesDiffering().isEmpty() || !mappingsDiff.entriesOnlyOnRight().isEmpty(); - super.isPureStructuredProperty = + super.isPureStructuredPropertyAddition = mappingsDiff .entriesDiffering() .keySet() @@ -169,6 +178,22 @@ public ReindexConfig build() { super.requiresApplyMappings && mappingsDiff.entriesDiffering().isEmpty() && !mappingsDiff.entriesOnlyOnRight().isEmpty(); + super.hasNewStructuredProperty = + (mappingsDiff.entriesDiffering().containsKey(STRUCTURED_PROPERTY_MAPPING_FIELD) + || mappingsDiff + .entriesOnlyOnRight() + .containsKey(STRUCTURED_PROPERTY_MAPPING_FIELD)) + && structuredPropertiesDiffCount(super.currentMappings, super.targetMappings) + .getSecond() + > 0; + super.hasRemovedStructuredProperty = + (mappingsDiff.entriesDiffering().containsKey(STRUCTURED_PROPERTY_MAPPING_FIELD) + || mappingsDiff + .entriesOnlyOnLeft() + .containsKey(STRUCTURED_PROPERTY_MAPPING_FIELD)) + && structuredPropertiesDiffCount(super.currentMappings, super.targetMappings) + .getFirst() + > 0; if (super.requiresApplyMappings && super.isPureMappingsAddition) { log.info( @@ -181,19 +206,6 @@ public ReindexConfig build() { super.name, mappingsDiff.entriesDiffering()); } - super.hasNewStructuredProperty = - (mappingsDiff.entriesDiffering().containsKey(STRUCTURED_PROPERTY_MAPPING_FIELD) - || mappingsDiff - .entriesOnlyOnRight() - .containsKey(STRUCTURED_PROPERTY_MAPPING_FIELD)) - && getOrDefault( - super.currentMappings, - List.of("properties", STRUCTURED_PROPERTY_MAPPING_FIELD, "properties")) - .size() - < getOrDefault( - super.targetMappings, - List.of("properties", STRUCTURED_PROPERTY_MAPPING_FIELD, "properties")) - .size(); /* Consider analysis and settings changes */ super.requiresApplySettings = !isSettingsEqual() || !isAnalysisEqual(); @@ -208,7 +220,26 @@ < getOrDefault( "Index: {} - There's diff between new mappings, however reindexing is DISABLED.", super.name); } + } else if (super.hasRemovedStructuredProperty) { + if (super.enableIndexMappingsReindex + && super.enableIndexMappingsReindex + && super.enableStructuredPropertiesReindex) { + super.requiresApplyMappings = true; + super.requiresReindex = true; + } else { + if (!super.enableIndexMappingsReindex) { + log.warn( + "Index: {} - There's diff between new mappings, however reindexing is DISABLED.", + super.name); + } + if (!super.enableIndexMappingsReindex) { + log.warn( + "Index: {} - There's a removed Structured Property, however Structured Property reindexing is DISABLED.", + super.name); + } + } } + if (super.isSettingsReindex) { try { if (!isAnalysisEqual()) { @@ -255,6 +286,46 @@ private static TreeMap getOrDefault( } } + /** + * Return counts for removed and added structured properties based on the difference between the + * existing mapping configuration and the target configuration + * + * @return count of structured properties to be removed and added to the index mapping + */ + private static Pair structuredPropertiesDiffCount( + Map current, Map target) { + Set currentStructuredProperties = new HashSet<>(); + Set targetStructuredProperties = new HashSet<>(); + + // add non-versioned property ids + currentStructuredProperties.addAll( + getOrDefault( + current, List.of("properties", STRUCTURED_PROPERTY_MAPPING_FIELD, "properties")) + .keySet() + .stream() + .filter(k -> !STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD.equals(k)) + .collect(Collectors.toSet())); + targetStructuredProperties.addAll( + getOrDefault( + target, List.of("properties", STRUCTURED_PROPERTY_MAPPING_FIELD, "properties")) + .keySet() + .stream() + .filter(k -> !STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD.equals(k)) + .collect(Collectors.toSet())); + + // Extract versioned/typed property ids + currentStructuredProperties.addAll(getVersionedStructuredPropertyIds(current)); + targetStructuredProperties.addAll(getVersionedStructuredPropertyIds(target)); + + return Pair.of( + currentStructuredProperties.stream() + .filter(p -> !targetStructuredProperties.contains(p)) + .count(), + targetStructuredProperties.stream() + .filter(p -> !currentStructuredProperties.contains(p)) + .count()); + } + private boolean isAnalysisEqual() { if (super.targetSettings == null || !super.targetSettings.containsKey("index")) { return true; @@ -327,7 +398,7 @@ private static MapDifference calculateMapDifference( .collect(Collectors.toSet()); if (!targetObjectFields.isEmpty()) { - log.info("Object fields filtered from comparison: {}", targetObjectFields); + log.debug("Object fields filtered from comparison: {}", targetObjectFields); Map filteredCurrentMappings = removeKeys(currentMappings, targetObjectFields); Map filteredTargetMappings = removeKeys(targetMappings, targetObjectFields); @@ -336,6 +407,29 @@ private static MapDifference calculateMapDifference( return Maps.difference(currentMappings, targetMappings); } + + /** + * Given a mapping return a unique string for each version/typed structured property + * + * @param mappings Elastic mappings + * @return set of unique ids for each versioned/typed structured property + */ + private static Set getVersionedStructuredPropertyIds(Map mappings) { + Map versionedMappings = + getOrDefault( + mappings, + List.of( + "properties", + STRUCTURED_PROPERTY_MAPPING_FIELD, + "properties", + STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD, + "properties")); + + return flattenStructuredPropertyPath( + Map.entry(STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD, versionedMappings), 0) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + } } private static Map removeKeys( @@ -373,4 +467,22 @@ private static boolean equalsGroup(Map newSettings, Settings old } return true; } + + /** + * Return a map with dot delimited path as keys + * + * @param entry for root map + * @return dot delimited key path map + */ + private static Stream> flattenStructuredPropertyPath( + Map.Entry entry, int depth) { + if (entry.getValue() instanceof Map && depth < 5) { + Map nested = (Map) entry.getValue(); + + return nested.entrySet().stream() + .map(e -> new AbstractMap.SimpleEntry(entry.getKey() + "." + e.getKey(), e.getValue())) + .flatMap(e -> flattenStructuredPropertyPath(e, depth + 1)); + } + return Stream.of(entry); + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index 97ec9f2192e523..b55418d12c7c29 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -607,11 +607,7 @@ private QueryBuilder buildQueryStringV2( EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); QueryBuilder query = - SearchRequestHandler.getBuilder( - entitySpec, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) .getQuery( finalOpContext, input, @@ -647,11 +643,7 @@ private QueryBuilder buildQueryStringBrowseAcrossEntities( final BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery(); QueryBuilder query = - SearchRequestHandler.getBuilder( - entitySpecs, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) .getQuery( finalOpContext, input, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index d8c5c3317a2ec1..b537a396340277 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -115,10 +115,7 @@ private SearchResult executeAndExtract( return transformIndexIntoEntityName( opContext.getSearchContext().getIndexConvention(), SearchRequestHandler.getBuilder( - entitySpec, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + entitySpec, searchConfiguration, customSearchConfiguration) .extractResult(opContext, searchResponse, filter, from, size)); } catch (Exception e) { log.error("Search query failed", e); @@ -215,10 +212,7 @@ private ScrollResult executeAndExtract( return transformIndexIntoEntityName( opContext.getSearchContext().getIndexConvention(), SearchRequestHandler.getBuilder( - entitySpecs, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + entitySpecs, searchConfiguration, customSearchConfiguration) .extractScrollResult( opContext, searchResponse, filter, keepAlive, size, supportsPointInTime())); } catch (Exception e) { @@ -261,11 +255,7 @@ public SearchResult search( Filter transformedFilters = transformFilterForEntities(postFilters, indexConvention); // Step 1: construct the query final SearchRequest searchRequest = - SearchRequestHandler.getBuilder( - entitySpecs, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) .getSearchRequest( opContext, finalInput, transformedFilters, sortCriterion, from, size, facets); searchRequest.indices( @@ -298,11 +288,7 @@ public SearchResult filter( EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); Filter transformedFilters = transformFilterForEntities(filters, indexConvention); final SearchRequest searchRequest = - SearchRequestHandler.getBuilder( - entitySpec, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) .getFilterRequest(opContext, transformedFilters, sortCriterion, from, size); searchRequest.indices(indexConvention.getIndexName(entitySpec)); @@ -335,10 +321,7 @@ public AutoCompleteResult autoComplete( EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName); IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); AutocompleteRequestHandler builder = - AutocompleteRequestHandler.getBuilder( - entitySpec, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()); + AutocompleteRequestHandler.getBuilder(entitySpec, customSearchConfiguration); SearchRequest req = builder.getSearchRequest( opContext, @@ -383,11 +366,7 @@ public Map aggregateByValue( } IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); final SearchRequest searchRequest = - SearchRequestHandler.getBuilder( - entitySpecs, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) .getAggregationRequest( opContext, field, @@ -502,10 +481,7 @@ private SearchRequest getScrollRequest( } return SearchRequestHandler.getBuilder( - entitySpecs, - searchConfiguration, - customSearchConfiguration, - opContext.getRetrieverContext().get().getAspectRetriever()) + entitySpecs, searchConfiguration, customSearchConfiguration) .getSearchRequest( opContext, finalInput, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java index 971cd7298639d1..c6abc7c261f3dd 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java @@ -1,10 +1,11 @@ package com.linkedin.metadata.search.elasticsearch.query.request; import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.search.utils.ESUtils.toFacetField; +import static com.linkedin.metadata.search.utils.ESUtils.toParentField; import static com.linkedin.metadata.utils.SearchUtil.*; import com.linkedin.data.template.LongMap; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.StructuredPropertyUtils; @@ -135,15 +136,16 @@ private AggregationBuilder facetToAggregationBuilder( opContext.getSearchContext().getSearchFlags().getMaxAggValues(), configs.getMaxTermBucketSize()); for (int i = facets.size() - 1; i >= 0; i--) { - String facet = facets.get(i); - if (facet.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { - String structPropFqn = facet.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1); - StructuredPropertyUtils.validateStructuredPropertyFQN( - Set.of(structPropFqn), opContext.getRetrieverContext().get().getAspectRetriever()); - facet = - STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX - + StructuredPropertyUtils.sanitizeStructuredPropertyFQN(structPropFqn); - } + String facet = + StructuredPropertyUtils.lookupDefinitionFromFilterOrFacetName( + facets.get(i), opContext.getAspectRetriever()) + .map( + urnDefinition -> + STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX + + StructuredPropertyUtils.toElasticsearchFieldName( + urnDefinition.getFirst(), urnDefinition.getSecond())) + .orElse(facets.get(i)); + AggregationBuilder aggBuilder; if (facet.contains(AGGREGATION_SPECIAL_TYPE_DELIMITER)) { List specialTypeFields = List.of(facet.split(AGGREGATION_SPECIAL_TYPE_DELIMITER)); @@ -152,9 +154,11 @@ private AggregationBuilder facetToAggregationBuilder( aggBuilder = INDEX_VIRTUAL_FIELD.equalsIgnoreCase(specialTypeFields.get(1)) ? AggregationBuilders.missing(inputFacet) - .field(getAggregationField(ES_INDEX_FIELD)) + .field(getAggregationField(ES_INDEX_FIELD, opContext.getAspectRetriever())) : AggregationBuilders.missing(inputFacet) - .field(getAggregationField(specialTypeFields.get(1))); + .field( + getAggregationField( + specialTypeFields.get(1), opContext.getAspectRetriever())); break; default: throw new UnsupportedOperationException( @@ -164,11 +168,11 @@ private AggregationBuilder facetToAggregationBuilder( aggBuilder = facet.equalsIgnoreCase(INDEX_VIRTUAL_FIELD) ? AggregationBuilders.terms(inputFacet) - .field(getAggregationField(ES_INDEX_FIELD)) + .field(getAggregationField(ES_INDEX_FIELD, opContext.getAspectRetriever())) .size(maxTermBuckets) .minDocCount(0) : AggregationBuilders.terms(inputFacet) - .field(getAggregationField(facet)) + .field(getAggregationField(facet, opContext.getAspectRetriever())) .size(maxTermBuckets); } if (lastAggBuilder != null) { @@ -180,13 +184,14 @@ private AggregationBuilder facetToAggregationBuilder( return lastAggBuilder; } - private String getAggregationField(final String facet) { + private String getAggregationField( + final String facet, @Nullable AspectRetriever aspectRetriever) { if (facet.startsWith("has")) { // Boolean hasX field, not a keyword field. Return the name of the original facet. return facet; } // Otherwise assume that this field is of keyword type. - return ESUtils.toKeywordField(facet, false); + return ESUtils.toKeywordField(facet, false, aspectRetriever); } List getDefaultFacetFieldsFromAnnotation(final SearchableAnnotation annotation) { @@ -229,10 +234,12 @@ private String computeDisplayName(String name) { } List extractAggregationMetadata( - @Nonnull SearchResponse searchResponse, @Nullable Filter filter) { + @Nonnull SearchResponse searchResponse, + @Nullable Filter filter, + @Nullable AspectRetriever aspectRetriever) { final List aggregationMetadataList = new ArrayList<>(); if (searchResponse.getAggregations() == null) { - return addFiltersToAggregationMetadata(aggregationMetadataList, filter); + return addFiltersToAggregationMetadata(aggregationMetadataList, filter, aspectRetriever); } for (Map.Entry entry : searchResponse.getAggregations().getAsMap().entrySet()) { @@ -243,7 +250,7 @@ List extractAggregationMetadata( processMissingAggregations(entry, aggregationMetadataList); } } - return addFiltersToAggregationMetadata(aggregationMetadataList, filter); + return addFiltersToAggregationMetadata(aggregationMetadataList, filter, aspectRetriever); } private void processTermAggregations( @@ -343,38 +350,45 @@ private static Map extractTermAggregations( /** Injects the missing conjunctive filters into the aggregations list. */ public List addFiltersToAggregationMetadata( - @Nonnull final List originalMetadata, @Nullable final Filter filter) { + @Nonnull final List originalMetadata, + @Nullable final Filter filter, + @Nullable AspectRetriever aspectRetriever) { if (filter == null) { return originalMetadata; } if (filter.getOr() != null) { - addOrFiltersToAggregationMetadata(filter.getOr(), originalMetadata); + addOrFiltersToAggregationMetadata(filter.getOr(), originalMetadata, aspectRetriever); } else if (filter.getCriteria() != null) { - addCriteriaFiltersToAggregationMetadata(filter.getCriteria(), originalMetadata); + addCriteriaFiltersToAggregationMetadata( + filter.getCriteria(), originalMetadata, aspectRetriever); } return originalMetadata; } void addOrFiltersToAggregationMetadata( @Nonnull final ConjunctiveCriterionArray or, - @Nonnull final List originalMetadata) { + @Nonnull final List originalMetadata, + @Nullable AspectRetriever aspectRetriever) { for (ConjunctiveCriterion conjunction : or) { // For each item in the conjunction, inject an empty aggregation if necessary - addCriteriaFiltersToAggregationMetadata(conjunction.getAnd(), originalMetadata); + addCriteriaFiltersToAggregationMetadata( + conjunction.getAnd(), originalMetadata, aspectRetriever); } } private void addCriteriaFiltersToAggregationMetadata( @Nonnull final CriterionArray criteria, - @Nonnull final List originalMetadata) { + @Nonnull final List originalMetadata, + @Nullable AspectRetriever aspectRetriever) { for (Criterion criterion : criteria) { - addCriterionFiltersToAggregationMetadata(criterion, originalMetadata); + addCriterionFiltersToAggregationMetadata(criterion, originalMetadata, aspectRetriever); } } private void addCriterionFiltersToAggregationMetadata( @Nonnull final Criterion criterion, - @Nonnull final List aggregationMetadata) { + @Nonnull final List aggregationMetadata, + @Nullable AspectRetriever aspectRetriever) { // We should never see duplicate aggregation for the same field in aggregation metadata list. final Map aggregationMetadataMap = @@ -382,7 +396,7 @@ private void addCriterionFiltersToAggregationMetadata( .collect(Collectors.toMap(AggregationMetadata::getName, agg -> agg)); // Map a filter criterion to a facet field (e.g. domains.keyword -> domains) - final String finalFacetField = toFacetField(criterion.getField()); + final String finalFacetField = toParentField(criterion.getField(), aspectRetriever); if (finalFacetField == null) { log.warn( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index 37a7e5adde2dcb..8ee9587ca2ae40 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -8,7 +8,6 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; @@ -52,16 +51,13 @@ public class AutocompleteRequestHandler { private static final Map AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME = new ConcurrentHashMap<>(); - private final AspectRetriever aspectRetriever; - private final CustomizedQueryHandler customizedQueryHandler; private final EntitySpec entitySpec; public AutocompleteRequestHandler( @Nonnull EntitySpec entitySpec, - @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull AspectRetriever aspectRetriever) { + @Nullable CustomSearchConfiguration customSearchConfiguration) { this.entitySpec = entitySpec; List fieldSpecs = entitySpec.getSearchableFieldSpecs(); this.customizedQueryHandler = CustomizedQueryHandler.builder(customSearchConfiguration).build(); @@ -87,17 +83,13 @@ public AutocompleteRequestHandler( set1.addAll(set2); return set1; })); - this.aspectRetriever = aspectRetriever; } public static AutocompleteRequestHandler getBuilder( @Nonnull EntitySpec entitySpec, - @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull AspectRetriever aspectRetriever) { + @Nullable CustomSearchConfiguration customSearchConfiguration) { return AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME.computeIfAbsent( - entitySpec, - k -> - new AutocompleteRequestHandler(entitySpec, customSearchConfiguration, aspectRetriever)); + entitySpec, k -> new AutocompleteRequestHandler(entitySpec, customSearchConfiguration)); } public SearchRequest getSearchRequest( @@ -120,7 +112,8 @@ public SearchRequest getSearchRequest( // Initial query with input filters BoolQueryBuilder filterQuery = - ESUtils.buildFilterQuery(filter, false, searchableFieldTypes, aspectRetriever); + ESUtils.buildFilterQuery( + filter, false, searchableFieldTypes, opContext.getAspectRetriever()); baseQuery.filter(filterQuery); // Add autocomplete query diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java index 4835ebe164e1c7..6a1f24a92e4a3a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java @@ -7,6 +7,7 @@ import static com.linkedin.metadata.search.elasticsearch.query.request.CustomizedQueryHandler.unquote; import com.google.common.annotations.VisibleForTesting; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; @@ -109,7 +110,11 @@ private QueryBuilder buildInternalQuery( getSimpleQuery(opContext.getEntityRegistry(), customQueryConfig, entitySpecs, sanitizedQuery) .ifPresent(finalQuery::should); getPrefixAndExactMatchQuery( - opContext.getEntityRegistry(), customQueryConfig, entitySpecs, sanitizedQuery) + opContext.getEntityRegistry(), + customQueryConfig, + entitySpecs, + sanitizedQuery, + opContext.getAspectRetriever()) .ifPresent(finalQuery::should); } else { final String withoutQueryPrefix = @@ -121,7 +126,11 @@ private QueryBuilder buildInternalQuery( .ifPresent(finalQuery::should); if (exactMatchConfiguration.isEnableStructured()) { getPrefixAndExactMatchQuery( - opContext.getEntityRegistry(), customQueryConfig, entitySpecs, withoutQueryPrefix) + opContext.getEntityRegistry(), + customQueryConfig, + entitySpecs, + withoutQueryPrefix, + opContext.getAspectRetriever()) .ifPresent(finalQuery::should); } } @@ -369,7 +378,8 @@ private Optional getPrefixAndExactMatchQuery( @Nonnull EntityRegistry entityRegistry, @Nullable QueryConfiguration customQueryConfig, @Nonnull List entitySpecs, - String query) { + String query, + @Nullable AspectRetriever aspectRetriever) { final boolean isPrefixQuery = customQueryConfig == null @@ -408,7 +418,8 @@ private Optional getPrefixAndExactMatchQuery( if (caseSensitivityEnabled) { finalQuery.should( QueryBuilders.termQuery( - ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), + ESUtils.toKeywordField( + searchFieldConfig.fieldName(), false, aspectRetriever), unquotedQuery) .caseInsensitive(false) .boost( @@ -419,7 +430,8 @@ private Optional getPrefixAndExactMatchQuery( // Exact match case-insensitive finalQuery.should( QueryBuilders.termQuery( - ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), + ESUtils.toKeywordField( + searchFieldConfig.fieldName(), false, aspectRetriever), unquotedQuery) .caseInsensitive(true) .boost( @@ -432,7 +444,8 @@ private Optional getPrefixAndExactMatchQuery( if (searchFieldConfig.isWordGramSubfield() && isPrefixQuery) { finalQuery.should( QueryBuilders.matchPhraseQuery( - ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), + ESUtils.toKeywordField( + searchFieldConfig.fieldName(), false, aspectRetriever), unquotedQuery) .boost( searchFieldConfig.boost() diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index dfb33897bcf4aa..66ad1e3be363f3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -8,7 +8,6 @@ import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.DoubleMap; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; @@ -117,8 +116,7 @@ private SearchRequestHandler( public static SearchRequestHandler getBuilder( @Nonnull EntitySpec entitySpec, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull AspectRetriever aspectRetriever) { + @Nullable CustomSearchConfiguration customSearchConfiguration) { return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent( ImmutableList.of(entitySpec), k -> new SearchRequestHandler(entitySpec, configs, customSearchConfiguration)); @@ -127,8 +125,7 @@ public static SearchRequestHandler getBuilder( public static SearchRequestHandler getBuilder( @Nonnull List entitySpecs, @Nonnull SearchConfiguration configs, - @Nullable CustomSearchConfiguration customSearchConfiguration, - @Nonnull AspectRetriever aspectRetriever) { + @Nullable CustomSearchConfiguration customSearchConfiguration) { return REQUEST_HANDLER_BY_ENTITY_NAME.computeIfAbsent( ImmutableList.copyOf(entitySpecs), k -> new SearchRequestHandler(entitySpecs, configs, customSearchConfiguration)); @@ -167,10 +164,7 @@ public static BoolQueryBuilder getFilterQuery( Map> searchableFieldTypes) { BoolQueryBuilder filterQuery = ESUtils.buildFilterQuery( - filter, - false, - searchableFieldTypes, - opContext.getRetrieverContext().get().getAspectRetriever()); + filter, false, searchableFieldTypes, opContext.getAspectRetriever()); return applyDefaultSearchFilters(opContext, filter, filterQuery); } @@ -337,7 +331,9 @@ public SearchRequest getAggregationRequest( searchSourceBuilder.query(filterQuery); searchSourceBuilder.size(0); searchSourceBuilder.aggregation( - AggregationBuilders.terms(field).field(ESUtils.toKeywordField(field, false)).size(limit)); + AggregationBuilders.terms(field) + .field(ESUtils.toKeywordField(field, false, opContext.getAspectRetriever())) + .size(limit)); searchRequest.source(searchSourceBuilder); return searchRequest; @@ -528,7 +524,8 @@ private SearchResultMetadata extractSearchResultMetadata( if (Boolean.FALSE.equals(searchFlags.isSkipAggregates())) { final List aggregationMetadataList = - aggregationQueryBuilder.extractAggregationMetadata(searchResponse, filter); + aggregationQueryBuilder.extractAggregationMetadata( + searchResponse, filter, opContext.getAspectRetriever()); searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList)); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index a655f90597e20b..dd36f0a9456a74 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.search.transformer; import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.models.StructuredPropertyUtils.sanitizeStructuredPropertyFQN; +import static com.linkedin.metadata.models.StructuredPropertyUtils.toElasticsearchFieldName; import static com.linkedin.metadata.models.annotation.SearchableAnnotation.OBJECT_FIELD_TYPES; import static com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder.SYSTEM_CREATED_FIELD; @@ -19,7 +19,6 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.AspectRetriever; -import com.linkedin.metadata.aspect.validation.StructuredPropertiesValidator; import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; @@ -27,6 +26,7 @@ import com.linkedin.metadata.models.SearchScoreFieldSpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.SearchableRefFieldSpec; +import com.linkedin.metadata.models.StructuredPropertyUtils; import com.linkedin.metadata.models.annotation.SearchableAnnotation.FieldType; import com.linkedin.metadata.models.extractor.FieldExtractor; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -38,12 +38,15 @@ import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.Setter; import lombok.extern.slf4j.Slf4j; @@ -109,6 +112,37 @@ public static ObjectNode withSystemCreated( return searchDocument; } + /** + * Handle object type UPSERTS where the new value to upsert removes a previous key. Only enabling + * for structured properties to start with i.e. + * + *

New => { "structuredProperties.foobar": "value1" } Old => { "structuredProperties.foobar": + * "value1" "structuredProperties.foobar2": "value2" } Expected => { + * "structuredProperties.foobar": "value1" "structuredProperties.foobar2": null } + * + * @param searchDocument new document + * @param previousSearchDocument previous document (if not present, no-op) + * @return searchDocument to upsert + */ + public static ObjectNode handleRemoveFields( + @Nonnull ObjectNode searchDocument, @Nullable ObjectNode previousSearchDocument) { + if (previousSearchDocument != null) { + Set documentFields = objectFieldsFilter(searchDocument.fieldNames()); + objectFieldsFilter(previousSearchDocument.fieldNames()).stream() + .filter(prevFieldName -> !documentFields.contains(prevFieldName)) + .forEach(removeFieldName -> searchDocument.set(removeFieldName, null)); + } + // no-op + return searchDocument; + } + + private static Set objectFieldsFilter(Iterator fieldNames) { + Iterable iterable = () -> fieldNames; + return StreamSupport.stream(iterable.spliterator(), false) + .filter(fieldName -> fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) + .collect(Collectors.toSet()); + } + public Optional transformAspect( @Nonnull OperationContext opContext, final @Nonnull Urn urn, @@ -388,25 +422,28 @@ private void setStructuredPropertiesSearchValue( .entrySet() .forEach( propertyEntry -> { - StructuredPropertyDefinition definition = - new StructuredPropertyDefinition( - definitions - .get(propertyEntry.getKey()) - .get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) - .data()); + Optional definition = + Optional.ofNullable( + definitions + .get(propertyEntry.getKey()) + .get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) + .map(def -> new StructuredPropertyDefinition(def.data())); + + LogicalValueType logicalValueType = + definition + .map(StructuredPropertyUtils::getLogicalValueType) + .orElse(LogicalValueType.UNKNOWN); String fieldName = String.join( ".", List.of( STRUCTURED_PROPERTY_MAPPING_FIELD, - sanitizeStructuredPropertyFQN(definition.getQualifiedName()))); + toElasticsearchFieldName( + propertyEntry.getKey(), definition.orElse(null)))); if (forDelete) { searchDocument.set(fieldName, JsonNodeFactory.instance.nullNode()); } else { - LogicalValueType logicalValueType = - StructuredPropertiesValidator.getLogicalValueType(definition.getValueType()); - ArrayNode arrayNode = JsonNodeFactory.instance.arrayNode(); propertyEntry @@ -487,7 +524,7 @@ private Optional getNodeForRef( final Object fieldValue, final FieldType fieldType) { EntityRegistry entityRegistry = opContext.getEntityRegistry(); - AspectRetriever aspectRetriever = opContext.getRetrieverContext().get().getAspectRetriever(); + AspectRetriever aspectRetriever = opContext.getAspectRetriever(); if (depth == 0) { if (fieldValue.toString().isEmpty()) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index a6381b07c087ce..e299dde62b1841 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.models.annotation.SearchableAnnotation.OBJECT_FIELD_TYPES; +import static com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder.SUBFIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.KEYWORD_FIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.PATH_HIERARCHY_FIELDS; import static com.linkedin.metadata.search.utils.SearchUtils.isUrn; @@ -50,7 +51,6 @@ /** TODO: Add more robust unit tests for this critical class. */ @Slf4j public class ESUtils { - private static final String DEFAULT_SEARCH_RESULTS_SORT_BY_FIELD = "urn"; public static final String KEYWORD_ANALYZER = "keyword"; public static final String KEYWORD_SUFFIX = ".keyword"; @@ -140,7 +140,7 @@ public static BoolQueryBuilder buildFilterQuery( @Nullable Filter filter, boolean isTimeseries, final Map> searchableFieldTypes, - @Nonnull AspectRetriever aspectRetriever) { + @Nullable AspectRetriever aspectRetriever) { BoolQueryBuilder finalQueryBuilder = QueryBuilders.boolQuery(); if (filter == null) { return finalQueryBuilder; @@ -155,7 +155,8 @@ public static BoolQueryBuilder buildFilterQuery( .forEach( or -> finalQueryBuilder.should( - ESUtils.buildConjunctiveFilterQuery(or, isTimeseries, searchableFieldTypes))); + ESUtils.buildConjunctiveFilterQuery( + or, isTimeseries, searchableFieldTypes, aspectRetriever))); // The default is not always 1 (ensure consistent default) finalQueryBuilder.minimumShouldMatch(1); } else if (filter.getCriteria() != null) { @@ -170,7 +171,8 @@ public static BoolQueryBuilder buildFilterQuery( || criterion.hasValues() || criterion.getCondition() == Condition.IS_NULL) { andQueryBuilder.must( - getQueryBuilderFromCriterion(criterion, isTimeseries, searchableFieldTypes)); + getQueryBuilderFromCriterion( + criterion, isTimeseries, searchableFieldTypes, aspectRetriever)); } }); finalQueryBuilder.should(andQueryBuilder); @@ -184,7 +186,8 @@ public static BoolQueryBuilder buildFilterQuery( public static BoolQueryBuilder buildConjunctiveFilterQuery( @Nonnull ConjunctiveCriterion conjunctiveCriterion, boolean isTimeseries, - Map> searchableFieldTypes) { + Map> searchableFieldTypes, + @Nullable AspectRetriever aspectRetriever) { final BoolQueryBuilder andQueryBuilder = new BoolQueryBuilder(); conjunctiveCriterion .getAnd() @@ -196,10 +199,12 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( if (!criterion.isNegated()) { // `filter` instead of `must` (enables caching and bypasses scoring) andQueryBuilder.filter( - getQueryBuilderFromCriterion(criterion, isTimeseries, searchableFieldTypes)); + getQueryBuilderFromCriterion( + criterion, isTimeseries, searchableFieldTypes, aspectRetriever)); } else { andQueryBuilder.mustNot( - getQueryBuilderFromCriterion(criterion, isTimeseries, searchableFieldTypes)); + getQueryBuilderFromCriterion( + criterion, isTimeseries, searchableFieldTypes, aspectRetriever)); } } }); @@ -237,11 +242,9 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( public static QueryBuilder getQueryBuilderFromCriterion( @Nonnull final Criterion criterion, boolean isTimeseries, - final Map> searchableFieldTypes) { - final String fieldName = toFacetField(criterion.getField()); - if (fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD)) { - criterion.setField(fieldName); - } + final Map> searchableFieldTypes, + @Nullable AspectRetriever aspectRetriever) { + final String fieldName = toParentField(criterion.getField(), aspectRetriever); /* * Check the field-name for a "sibling" field, or one which should ALWAYS @@ -256,11 +259,11 @@ public static QueryBuilder getQueryBuilderFromCriterion( if (maybeFieldToExpand.isPresent()) { return getQueryBuilderFromCriterionForFieldToExpand( - maybeFieldToExpand.get(), criterion, isTimeseries, searchableFieldTypes); + maybeFieldToExpand.get(), criterion, isTimeseries, searchableFieldTypes, aspectRetriever); } return getQueryBuilderFromCriterionForSingleField( - criterion, isTimeseries, searchableFieldTypes); + criterion, isTimeseries, searchableFieldTypes, criterion.getField(), aspectRetriever); } public static String getElasticTypeForFieldType(SearchableAnnotation.FieldType fieldType) { @@ -398,28 +401,64 @@ public static String escapeReservedCharacters(@Nonnull String input) { return input; } + /** + * Resolve structured property field, or normal field, and strip subfields + * + * @param filterField name of the field used in the filter request + * @param aspectRetriever aspect retriever, used if structured property + * @return normalized field name without subfields + */ @Nonnull - public static String toFacetField(@Nonnull final String filterField) { - String fieldName = filterField; - if (fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD + ".")) { - String fqn = fieldName.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1); - fieldName = - STRUCTURED_PROPERTY_MAPPING_FIELD - + "." - + StructuredPropertyUtils.sanitizeStructuredPropertyFQN(fqn); + public static String toParentField( + @Nonnull final String filterField, @Nullable final AspectRetriever aspectRetriever) { + String fieldName = + StructuredPropertyUtils.lookupDefinitionFromFilterOrFacetName(filterField, aspectRetriever) + .map( + urnDefinition -> + STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX + + StructuredPropertyUtils.toElasticsearchFieldName( + urnDefinition.getFirst(), urnDefinition.getSecond())) + .orElse(filterField); + + for (String subfield : SUBFIELDS) { + String SUFFIX = "." + subfield; + if (filterField.endsWith(SUFFIX)) { + return fieldName.replace(SUFFIX, ""); + } } - return fieldName.replace(ESUtils.KEYWORD_SUFFIX, ""); + + return fieldName; } + /** + * Return resolved structured property field, normal field, or subfield which is of type `keyword` + * + * @param filterField the field name used in the filter + * @param skipKeywordSuffix prevent use of `keyword` subfield, useful when parent field is known + * or always `keyword` + * @param aspectRetriever aspect retriever, used if structured property field + * @return the preferred field to use for `keyword` queries + */ @Nonnull public static String toKeywordField( - @Nonnull final String filterField, final boolean skipKeywordSuffix) { + @Nonnull final String filterField, + final boolean skipKeywordSuffix, + @Nullable final AspectRetriever aspectRetriever) { + String fieldName = + StructuredPropertyUtils.lookupDefinitionFromFilterOrFacetName(filterField, aspectRetriever) + .map( + urnDefinition -> + STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX + + StructuredPropertyUtils.toElasticsearchFieldName( + urnDefinition.getFirst(), urnDefinition.getSecond())) + .orElse(filterField); + return skipKeywordSuffix - || KEYWORD_FIELDS.contains(filterField) - || PATH_HIERARCHY_FIELDS.contains(filterField) - || filterField.contains(".") - ? filterField - : filterField + ESUtils.KEYWORD_SUFFIX; + || KEYWORD_FIELDS.contains(fieldName) + || PATH_HIERARCHY_FIELDS.contains(fieldName) + || SUBFIELDS.stream().anyMatch(subfield -> fieldName.endsWith("." + subfield)) + ? fieldName + : fieldName + ESUtils.KEYWORD_SUFFIX; } public static RequestOptions buildReindexTaskRequestOptions( @@ -464,7 +503,8 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( @Nonnull final List fields, @Nonnull final Criterion criterion, final boolean isTimeseries, - final Map> searchableFieldTypes) { + final Map> searchableFieldTypes, + @Nonnull AspectRetriever aspectRetriever) { final BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); for (String field : fields) { Criterion criterionToQuery = new Criterion(); @@ -476,10 +516,11 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( if (criterion.hasValue()) { criterionToQuery.setValue(criterion.getValue()); } - criterionToQuery.setField(toKeywordField(field, isTimeseries)); + criterionToQuery.setField(toKeywordField(field, isTimeseries, aspectRetriever)); orQueryBuilder.should( getQueryBuilderFromCriterionForSingleField( - criterionToQuery, isTimeseries, searchableFieldTypes)); + criterionToQuery, isTimeseries, searchableFieldTypes, null, aspectRetriever) + .queryName(field)); } return orQueryBuilder; } @@ -488,40 +529,49 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( private static QueryBuilder getQueryBuilderFromCriterionForSingleField( @Nonnull Criterion criterion, boolean isTimeseries, - final Map> searchableFieldTypes) { + final Map> searchableFieldTypes, + @Nullable String queryName, + @Nonnull AspectRetriever aspectRetriever) { final Condition condition = criterion.getCondition(); - final String fieldName = toFacetField(criterion.getField()); + final String fieldName = toParentField(criterion.getField(), aspectRetriever); if (condition == Condition.IS_NULL) { return QueryBuilders.boolQuery() - .mustNot(QueryBuilders.existsQuery(criterion.getField())) - .queryName(fieldName); + .mustNot(QueryBuilders.existsQuery(fieldName)) + .queryName(queryName != null ? queryName : fieldName); } else if (condition == Condition.EXISTS) { return QueryBuilders.boolQuery() - .must(QueryBuilders.existsQuery(criterion.getField())) - .queryName(fieldName); + .must(QueryBuilders.existsQuery(fieldName)) + .queryName(queryName != null ? queryName : fieldName); } else if (criterion.hasValues() || criterion.hasValue()) { if (condition == Condition.EQUAL) { return buildEqualsConditionFromCriterion( - fieldName, criterion, isTimeseries, searchableFieldTypes); + fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever) + .queryName(queryName != null ? queryName : fieldName); } else if (RANGE_QUERY_CONDITIONS.contains(condition)) { return buildRangeQueryFromCriterion( - criterion, fieldName, searchableFieldTypes, condition, isTimeseries); + criterion, + fieldName, + searchableFieldTypes, + condition, + isTimeseries, + aspectRetriever) + .queryName(queryName != null ? queryName : fieldName); } else if (condition == Condition.CONTAIN) { return QueryBuilders.wildcardQuery( - toKeywordField(criterion.getField(), isTimeseries), + toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), "*" + ESUtils.escapeReservedCharacters(criterion.getValue().trim()) + "*") - .queryName(fieldName); + .queryName(queryName != null ? queryName : fieldName); } else if (condition == Condition.START_WITH) { return QueryBuilders.wildcardQuery( - toKeywordField(criterion.getField(), isTimeseries), + toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), ESUtils.escapeReservedCharacters(criterion.getValue().trim()) + "*") - .queryName(fieldName); + .queryName(queryName != null ? queryName : fieldName); } else if (condition == Condition.END_WITH) { return QueryBuilders.wildcardQuery( - toKeywordField(criterion.getField(), isTimeseries), + toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), "*" + ESUtils.escapeReservedCharacters(criterion.getValue().trim())) - .queryName(fieldName); + .queryName(queryName != null ? queryName : fieldName); } } throw new UnsupportedOperationException("Unsupported condition: " + condition); @@ -531,20 +581,21 @@ private static QueryBuilder buildEqualsConditionFromCriterion( @Nonnull final String fieldName, @Nonnull final Criterion criterion, final boolean isTimeseries, - final Map> searchableFieldTypes) { + final Map> searchableFieldTypes, + @Nonnull AspectRetriever aspectRetriever) { /* * If the newer 'values' field of Criterion.pdl is set, then we * handle using the following code to allow multi-match. */ if (!criterion.getValues().isEmpty()) { return buildEqualsConditionFromCriterionWithValues( - fieldName, criterion, isTimeseries, searchableFieldTypes); + fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever); } /* * Otherwise, we are likely using the deprecated 'value' field. * We handle using the legacy code path below. */ - return buildEqualsFromCriterionWithValue(fieldName, criterion, isTimeseries); + return buildEqualsFromCriterionWithValue(fieldName, criterion, isTimeseries, aspectRetriever); } /** @@ -555,8 +606,9 @@ private static QueryBuilder buildEqualsConditionFromCriterionWithValues( @Nonnull final String fieldName, @Nonnull final Criterion criterion, final boolean isTimeseries, - final Map> searchableFieldTypes) { - Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName); + final Map> searchableFieldTypes, + @Nonnull AspectRetriever aspectRetriever) { + Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever); if (fieldTypes.size() > 1) { log.warn( "Multiple field types for field name {}, determining best fit for set: {}", @@ -576,21 +628,32 @@ private static QueryBuilder buildEqualsConditionFromCriterionWithValues( return QueryBuilders.termsQuery(fieldName, doubleValues).queryName(fieldName); } return QueryBuilders.termsQuery( - toKeywordField(criterion.getField(), isTimeseries), criterion.getValues()) + toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), + criterion.getValues()) .queryName(fieldName); } private static Set getFieldTypes( - Map> searchableFields, String fieldName) { - Set fieldTypes = - searchableFields.getOrDefault(fieldName, Collections.emptySet()); - Set finalFieldTypes = - fieldTypes.stream().map(ESUtils::getElasticTypeForFieldType).collect(Collectors.toSet()); - if (fieldTypes.size() > 1) { + Map> searchableFields, + String fieldName, + @Nullable AspectRetriever aspectRetriever) { + + final Set finalFieldTypes; + if (fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) { + finalFieldTypes = + StructuredPropertyUtils.toElasticsearchFieldType(fieldName, aspectRetriever); + } else { + Set fieldTypes = + searchableFields.getOrDefault(fieldName, Collections.emptySet()); + finalFieldTypes = + fieldTypes.stream().map(ESUtils::getElasticTypeForFieldType).collect(Collectors.toSet()); + } + + if (finalFieldTypes.size() > 1) { log.warn( "Multiple field types for field name {}, determining best fit for set: {}", fieldName, - fieldTypes); + finalFieldTypes); } return finalFieldTypes; } @@ -600,8 +663,9 @@ private static RangeQueryBuilder buildRangeQueryFromCriterion( String fieldName, Map> searchableFieldTypes, Condition condition, - boolean isTimeseries) { - Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName); + boolean isTimeseries, + AspectRetriever aspectRetriever) { + Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName, aspectRetriever); // Determine criterion value, range query only accepts single value so take first value in // values if multiple @@ -624,7 +688,7 @@ private static RangeQueryBuilder buildRangeQueryFromCriterion( documentFieldName = fieldName; } else { criterionValue = criterionValueString; - documentFieldName = toKeywordField(fieldName, isTimeseries); + documentFieldName = toKeywordField(fieldName, isTimeseries, aspectRetriever); } // Set up QueryBuilder based on condition @@ -653,12 +717,14 @@ private static RangeQueryBuilder buildRangeQueryFromCriterion( private static QueryBuilder buildEqualsFromCriterionWithValue( @Nonnull final String fieldName, @Nonnull final Criterion criterion, - final boolean isTimeseries) { + final boolean isTimeseries, + @Nonnull AspectRetriever aspectRetriever) { // If the value is an URN style value, then we do not attempt to split it by comma (for obvious // reasons) if (isUrn(criterion.getValue())) { return QueryBuilders.matchQuery( - toKeywordField(criterion.getField(), isTimeseries), criterion.getValue().trim()) + toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), + criterion.getValue().trim()) .queryName(fieldName) .analyzer(KEYWORD_ANALYZER); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java index 29389f2e66558f..ad2825ead3d0da 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java @@ -165,7 +165,7 @@ private Callable processBatch( return () -> { StopWatch stopWatch = new StopWatch(); stopWatch.start(); - AspectRetriever aspectRetriever = opContext.getRetrieverContext().get().getAspectRetriever(); + AspectRetriever aspectRetriever = opContext.getAspectRetrieverOpt().get(); log.info("Batch {} for BA:{} started", batchNumber, entityKey); ExecutionResult executionResult = new ExecutionResult(); executionResult.setBatchNumber(batchNumber); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index da35c9e0b0784f..dff0a99a142b73 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -6,6 +6,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; import com.linkedin.common.InputField; @@ -130,8 +131,7 @@ public void handleChangeEvent( @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) { try { MCLItemImpl batch = - MCLItemImpl.builder() - .build(event, opContext.getRetrieverContext().get().getAspectRetriever()); + MCLItemImpl.builder().build(event, opContext.getAspectRetrieverOpt().get()); Stream sideEffects = AspectsBatch.applyMCLSideEffects(List.of(batch), opContext.getRetrieverContext().get()); @@ -187,7 +187,7 @@ private void handleUpdateChangeEvent( } // Step 1. Handle StructuredProperties Index Mapping changes - updateIndexMappings(entitySpec, aspectSpec, aspect, previousAspect); + updateIndexMappings(urn, entitySpec, aspectSpec, aspect, previousAspect); // Step 2. For all aspects, attempt to update Search updateSearchService(opContext, event); @@ -206,6 +206,7 @@ private void handleUpdateChangeEvent( } public void updateIndexMappings( + @Nonnull Urn urn, EntitySpec entitySpec, AspectSpec aspectSpec, RecordTemplate newValue, @@ -228,7 +229,7 @@ public void updateIndexMappings( if (newDefinition.getEntityTypes().size() > 0) { _entityIndexBuilders - .buildReindexConfigsWithNewStructProp(newDefinition) + .buildReindexConfigsWithNewStructProp(urn, newDefinition) .forEach( reindexState -> { try { @@ -526,8 +527,8 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev RecordTemplate previousAspect = event.getPreviousRecordTemplate(); String entityName = event.getEntitySpec().getName(); - Optional searchDocument; - Optional previousSearchDocument = Optional.empty(); + Optional searchDocument; + Optional previousSearchDocument = Optional.empty(); try { searchDocument = _searchDocumentTransformer @@ -539,8 +540,7 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev event.getChangeType(), event.getEntitySpec(), aspectSpec, - event.getAuditStamp())) - .map(Objects::toString); + event.getAuditStamp())); } catch (Exception e) { log.error( "Error in getting documents from aspect: {} for aspect {}", e, aspectSpec.getName()); @@ -557,7 +557,6 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev return; } - String searchDocumentValue = searchDocument.get(); if (_searchDiffMode && (systemMetadata == null || systemMetadata.getProperties() == null @@ -565,9 +564,8 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev if (previousAspect != null) { try { previousSearchDocument = - _searchDocumentTransformer - .transformAspect(opContext, urn, previousAspect, aspectSpec, false) - .map(Objects::toString); + _searchDocumentTransformer.transformAspect( + opContext, urn, previousAspect, aspectSpec, false); } catch (Exception e) { log.error( "Error in getting documents from previous aspect state: {} for aspect {}, continuing without diffing.", @@ -577,15 +575,19 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev } if (previousSearchDocument.isPresent()) { - String previousSearchDocumentValue = previousSearchDocument.get(); - if (searchDocumentValue.equals(previousSearchDocumentValue)) { + if (searchDocument.get().toString().equals(previousSearchDocument.get().toString())) { // No changes to search document, skip writing no-op update return; } } } - _entitySearchService.upsertDocument(opContext, entityName, searchDocument.get(), docId.get()); + String finalDocument = + SearchDocumentTransformer.handleRemoveFields( + searchDocument.get(), previousSearchDocument.orElse(null)) + .toString(); + + _entitySearchService.upsertDocument(opContext, entityName, finalDocument, docId.get()); } /** Process snapshot and update time-series index */ diff --git a/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java b/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java index e894558e3d1afd..51d2cee06730cb 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/shared/ElasticSearchIndexed.java @@ -1,31 +1,27 @@ package com.linkedin.metadata.shared; +import com.linkedin.common.urn.Urn; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.io.IOException; import java.util.Collection; import java.util.List; public interface ElasticSearchIndexed { - /** - * The index configurations for the given service. - * - * @return List of reindex configurations - */ - List buildReindexConfigs() throws IOException; - /** * The index configurations for the given service with StructuredProperties applied. * * @param properties The structured properties to apply to the index mappings * @return List of reindex configurations */ - List buildReindexConfigsWithAllStructProps( - Collection properties) throws IOException; + List buildReindexConfigs( + Collection> properties) throws IOException; /** * Mirrors the service's functions which are expected to build/reindex as needed based on the * reindex configurations above */ - void reindexAll() throws IOException; + void reindexAll(Collection> properties) + throws IOException; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java new file mode 100644 index 00000000000000..41addbe197f278 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java @@ -0,0 +1,201 @@ +package com.linkedin.metadata.structuredproperties.hooks; + +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_KEY_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.entity.Aspect; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.patch.GenericJsonPatch; +import com.linkedin.metadata.aspect.patch.PatchOperationType; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.StructuredPropertyUtils; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.structured.StructuredPropertyDefinition; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Getter +@Setter +@Accessors(chain = true) +public class PropertyDefinitionDeleteSideEffect extends MCPSideEffect { + public static final Integer SEARCH_SCROLL_SIZE = 1000; + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream applyMCPSideEffect( + Collection changeMCPS, @Nonnull RetrieverContext retrieverContext) { + return Stream.of(); + } + + @Override + protected Stream postMCPSideEffect( + Collection mclItems, @Nonnull RetrieverContext retrieverContext) { + return mclItems.stream().flatMap(item -> generatePatchRemove(item, retrieverContext)); + } + + private static Stream generatePatchRemove( + MCLItem mclItem, @Nonnull RetrieverContext retrieverContext) { + + if (STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME.equals(mclItem.getAspectName())) { + return generatePatchMCPs( + mclItem.getUrn(), + mclItem.getPreviousAspect(StructuredPropertyDefinition.class), + mclItem.getAuditStamp(), + retrieverContext); + } else if (STRUCTURED_PROPERTY_KEY_ASPECT_NAME.equals(mclItem.getAspectName())) { + Aspect definitionAspect = + retrieverContext + .getAspectRetriever() + .getLatestAspectObject(mclItem.getUrn(), STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME); + return generatePatchMCPs( + mclItem.getUrn(), + definitionAspect == null + ? null + : new StructuredPropertyDefinition(definitionAspect.data()), + mclItem.getAuditStamp(), + retrieverContext); + } + log.warn( + "Expected either {} or {} aspects but got {}", + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + STRUCTURED_PROPERTY_KEY_ASPECT_NAME, + mclItem.getAspectName()); + return Stream.empty(); + } + + private static Stream generatePatchMCPs( + Urn propertyUrn, + @Nullable StructuredPropertyDefinition definition, + @Nullable AuditStamp auditStamp, + @Nonnull RetrieverContext retrieverContext) { + EntityWithPropertyIterator iterator = + EntityWithPropertyIterator.builder() + .propertyUrn(propertyUrn) + .definition(definition) + .searchRetriever(retrieverContext.getSearchRetriever()) + .count(SEARCH_SCROLL_SIZE) + .build(); + return StreamSupport.stream( + Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false) + .flatMap( + scrollResult -> + scrollResult.getEntities().stream() + .map( + entity -> { + GenericJsonPatch.PatchOp patchOp = new GenericJsonPatch.PatchOp(); + patchOp.setOp(PatchOperationType.REMOVE.getValue()); + patchOp.setPath(String.format("/properties/%s", propertyUrn.toString())); + + EntitySpec entitySpec = + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(entity.getEntity().getEntityType()); + return PatchItemImpl.builder() + .urn(entity.getEntity()) + .entitySpec(entitySpec) + .aspectName(STRUCTURED_PROPERTIES_ASPECT_NAME) + .aspectSpec( + entitySpec.getAspectSpec(STRUCTURED_PROPERTIES_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .arrayPrimaryKeys( + Map.of("properties", List.of("propertyUrn"))) + .patch(List.of(patchOp)) + .build() + .getJsonPatch()) + .auditStamp(auditStamp) + .build(retrieverContext.getAspectRetriever().getEntityRegistry()); + })); + } + + /** + * Fetches pages of entity urns which have a value for the given structured property definition + */ + @Builder + public static class EntityWithPropertyIterator implements Iterator { + @Nonnull private final Urn propertyUrn; + @Nullable private final StructuredPropertyDefinition definition; + @Nonnull private final SearchRetriever searchRetriever; + private int count; + @Builder.Default private String scrollId = null; + @Builder.Default private boolean started = false; + + private List getEntities() { + if (definition != null && definition.getEntityTypes() != null) { + return definition.getEntityTypes().stream() + .map(StructuredPropertyUtils::getValueTypeId) + .collect(Collectors.toList()); + } else { + return Collections.emptyList(); + } + } + + private Filter getFilter() { + Filter propertyFilter = new Filter(); + final ConjunctiveCriterionArray disjunction = new ConjunctiveCriterionArray(); + final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); + final CriterionArray andCriterion = new CriterionArray(); + + final Criterion propertyExistsCriterion = new Criterion(); + // Cannot rely on automatic field name since the definition is deleted + propertyExistsCriterion.setField( + STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX + + StructuredPropertyUtils.toElasticsearchFieldName(propertyUrn, definition)); + propertyExistsCriterion.setCondition(Condition.EXISTS); + + andCriterion.add(propertyExistsCriterion); + conjunction.setAnd(andCriterion); + disjunction.add(conjunction); + propertyFilter.setOr(disjunction); + + return propertyFilter; + } + + @Override + public boolean hasNext() { + return !started || scrollId != null; + } + + @Override + public ScrollResult next() { + started = true; + ScrollResult result = searchRetriever.scroll(getEntities(), getFilter(), scrollId, count); + scrollId = result.getScrollId(); + return result; + } + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/StructuredPropertiesSoftDelete.java similarity index 96% rename from entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java rename to metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/StructuredPropertiesSoftDelete.java index 5efb1e8aebb06a..99b58328376755 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/StructuredPropertiesSoftDelete.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.aspect.hooks; +package com.linkedin.metadata.structuredproperties.hooks; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.ReadItem; diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/PropertyDefinitionValidator.java similarity index 68% rename from entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java rename to metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/PropertyDefinitionValidator.java index a4efc38d16082a..ae5472af622ad5 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/PropertyDefinitionValidator.java @@ -1,20 +1,20 @@ -package com.linkedin.metadata.aspect.validation; +package com.linkedin.metadata.structuredproperties.validation; import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import static com.linkedin.structured.PropertyCardinality.*; import com.google.common.collect.ImmutableSet; import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.GetMode; import com.linkedin.entity.Aspect; -import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; @@ -25,12 +25,13 @@ import java.util.Collection; import java.util.Collections; import java.util.Map; -import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Getter; import lombok.Setter; import lombok.experimental.Accessors; @@ -52,25 +53,7 @@ public class PropertyDefinitionValidator extends AspectPayloadValidator { protected Stream validateProposedAspects( @Nonnull Collection mcpItems, @Nonnull RetrieverContext retrieverContext) { - final String entityKeyAspect = - retrieverContext - .getAspectRetriever() - .getEntityRegistry() - .getEntitySpec(STRUCTURED_PROPERTY_ENTITY_NAME) - .getKeyAspectName(); - - return mcpItems.stream() - .filter(i -> ChangeType.DELETE.equals(i.getChangeType())) - .map( - i -> { - if (ImmutableSet.of(entityKeyAspect, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) - .contains(i.getAspectSpec().getName())) { - return AspectValidationException.forItem( - i, "Hard delete of Structured Property Definitions is not supported."); - } - return null; - }) - .filter(Objects::nonNull); + return Stream.empty(); } @Override @@ -78,10 +61,7 @@ protected Stream validatePreCommitAspects( @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { return validateDefinitionUpserts( changeMCPs.stream() - .filter( - i -> - ChangeType.UPSERT.equals(i.getChangeType()) - && STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME.equals(i.getAspectName())) + .filter(i -> STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME.equals(i.getAspectName())) .collect(Collectors.toList()), retrieverContext); } @@ -105,19 +85,24 @@ public static Stream validateDefinitionUpserts( "Cannot mutate a soft deleted Structured Property Definition") .ifPresent(exceptions::addException); + final StructuredPropertyDefinition newDefinition = + item.getAspect(StructuredPropertyDefinition.class); + + versionFormatCheck(item, newDefinition.getVersion()).ifPresent(exceptions::addException); + if (item.getPreviousSystemAspect() != null) { StructuredPropertyDefinition previousDefinition = item.getPreviousSystemAspect().getAspect(StructuredPropertyDefinition.class); - StructuredPropertyDefinition newDefinition = - item.getAspect(StructuredPropertyDefinition.class); - if (!newDefinition.getValueType().equals(previousDefinition.getValueType())) { + if (!newDefinition.getValueType().equals(previousDefinition.getValueType()) + && !allowBreakingWithVersion(previousDefinition, newDefinition, item, exceptions)) { exceptions.addException( item, "Value type cannot be changed as this is a backwards incompatible change"); } if (newDefinition.getCardinality().equals(SINGLE) - && previousDefinition.getCardinality().equals(MULTIPLE)) { + && previousDefinition.getCardinality().equals(MULTIPLE) + && !allowBreakingWithVersion(previousDefinition, newDefinition, item, exceptions)) { exceptions.addException( item, "Property definition cardinality cannot be changed from MULTI to SINGLE"); } @@ -127,10 +112,12 @@ public static Stream validateDefinitionUpserts( } // Assure new definition has only added allowed values, not removed them if (newDefinition.getAllowedValues() != null) { - if (!previousDefinition.hasAllowedValues() - || previousDefinition.getAllowedValues() == null) { + if ((!previousDefinition.hasAllowedValues() + || previousDefinition.getAllowedValues() == null) + && !allowBreakingWithVersion(previousDefinition, newDefinition, item, exceptions)) { exceptions.addException(item, "Cannot restrict values that were previously allowed"); - } else { + } else if (!allowBreakingWithVersion( + previousDefinition, newDefinition, item, exceptions)) { Set newAllowedValues = newDefinition.getAllowedValues().stream() .map(PropertyValue::getValue) @@ -163,4 +150,46 @@ static Optional softDeleteCheck } return Optional.empty(); } + + /** + * Allow new version if monotonically increasing + * + * @param oldDefinition previous version + * @param newDefinition next version + * @return whether version increase should allow breaking change + */ + private static boolean allowBreakingWithVersion( + @Nonnull StructuredPropertyDefinition oldDefinition, + @Nonnull StructuredPropertyDefinition newDefinition, + @Nonnull ChangeMCP item, + @Nonnull ValidationExceptionCollection exceptions) { + final String oldVersion = oldDefinition.getVersion(GetMode.NULL); + final String newVersion = newDefinition.getVersion(GetMode.NULL); + + if (newVersion != null && newVersion.contains(".")) { + exceptions.addException( + item, + String.format("Invalid version `%s` cannot contain the `.` character.", newVersion)); + } + + if (oldVersion == null && newVersion != null) { + return true; + } else if (newVersion != null) { + return newVersion.compareToIgnoreCase(oldVersion) > 0; + } + return false; + } + + private static Pattern VERSION_REGEX = Pattern.compile("[0-9]{14}"); + + private static Optional versionFormatCheck( + MCPItem item, @Nullable String version) { + if (version != null && !VERSION_REGEX.matcher(version).matches()) { + return Optional.of( + AspectValidationException.forItem( + item, + String.format("Invalid version specified. Must match %s", VERSION_REGEX.toString()))); + } + return Optional.empty(); + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/StructuredPropertiesValidator.java similarity index 94% rename from entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java rename to metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/StructuredPropertiesValidator.java index fcae6ca8cb71a9..cdbe2eb95a15d2 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/StructuredPropertiesValidator.java @@ -1,7 +1,9 @@ -package com.linkedin.metadata.aspect.validation; +package com.linkedin.metadata.structuredproperties.validation; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; -import static com.linkedin.metadata.aspect.validation.PropertyDefinitionValidator.softDeleteCheck; +import static com.linkedin.metadata.models.StructuredPropertyUtils.getLogicalValueType; +import static com.linkedin.metadata.models.StructuredPropertyUtils.getValueTypeId; +import static com.linkedin.metadata.structuredproperties.validation.PropertyDefinitionValidator.softDeleteCheck; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; @@ -66,23 +68,6 @@ public class StructuredPropertiesValidator extends AspectPayloadValidator { LogicalValueType.DATE, LogicalValueType.URN)); - public static LogicalValueType getLogicalValueType(Urn valueType) { - String valueTypeId = getValueTypeId(valueType); - if (valueTypeId.equals("string")) { - return LogicalValueType.STRING; - } else if (valueTypeId.equals("date")) { - return LogicalValueType.DATE; - } else if (valueTypeId.equals("number")) { - return LogicalValueType.NUMBER; - } else if (valueTypeId.equals("urn")) { - return LogicalValueType.URN; - } else if (valueTypeId.equals("rich_text")) { - return LogicalValueType.RICH_TEXT; - } - - return LogicalValueType.UNKNOWN; - } - @Nonnull private AspectPluginConfig config; @Override @@ -132,7 +117,11 @@ public static Stream validateProposedUpserts( StructuredPropertyDefinition structuredPropertyDefinition = lookupPropertyDefinition(propertyUrn, allStructuredPropertiesAspects); if (structuredPropertyDefinition == null) { - exceptions.addException(i, "Unexpected null value found."); + exceptions.addException( + i, + String.format( + "Unexpected null value found for %s Structured Property Definition.", + propertyUrn)); } log.debug( @@ -237,7 +226,7 @@ private static Set validateStructuredPropertyUrns( for (BatchItem i : exceptions.successful(mcpItems)) { StructuredProperties structuredProperties = i.getAspect(StructuredProperties.class); - log.warn("Validator called with {}", structuredProperties); + log.info("Validator called with {}", structuredProperties); Map> structuredPropertiesMap = structuredProperties.getProperties().stream() .collect( @@ -412,14 +401,6 @@ private static Optional validateType( return Optional.empty(); } - private static String getValueTypeId(@Nonnull final Urn valueType) { - String valueTypeId = valueType.getId(); - if (valueTypeId.startsWith("datahub.")) { - valueTypeId = valueTypeId.split("\\.")[1]; - } - return valueTypeId; - } - private static Map> fetchPropertyAspects( @Nonnull Collection mcpItems, AspectRetriever aspectRetriever, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java index 36eab7b69e6a12..13fde9e392927f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/ElasticSearchSystemMetadataService.java @@ -4,6 +4,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; +import com.linkedin.common.urn.Urn; import com.linkedin.metadata.run.AspectRowSummary; import com.linkedin.metadata.run.IngestionRunSummary; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; @@ -14,6 +15,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.mxe.SystemMetadata; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.security.MessageDigest; @@ -227,10 +229,10 @@ public List listRuns( } @Override - public void configure() { + public void reindexAll(Collection> properties) { log.info("Setting up system metadata index"); try { - for (ReindexConfig config : buildReindexConfigs()) { + for (ReindexConfig config : buildReindexConfigs(properties)) { _indexBuilder.buildIndex(config); } } catch (IOException ie) { @@ -239,7 +241,8 @@ public void configure() { } @Override - public List buildReindexConfigs() throws IOException { + public List buildReindexConfigs( + Collection> properties) throws IOException { return List.of( _indexBuilder.buildReindexState( _indexConvention.getIndexName(INDEX_NAME), @@ -247,17 +250,6 @@ public List buildReindexConfigs() throws IOException { Collections.emptyMap())); } - @Override - public List buildReindexConfigsWithAllStructProps( - Collection properties) throws IOException { - return buildReindexConfigs(); - } - - @Override - public void reindexAll() { - configure(); - } - @VisibleForTesting @Override public void clear() { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index 240108b1910831..ce4ff53eba91b9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -202,19 +202,9 @@ private static Pair toEnvAspectGener } @Override - public void configure() { - indexBuilders.reindexAll(); - } - - @Override - public List buildReindexConfigs() { - return indexBuilders.buildReindexConfigs(); - } - - @Override - public List buildReindexConfigsWithAllStructProps( - Collection properties) throws IOException { - return indexBuilders.buildReindexConfigsWithAllStructProps(properties); + public List buildReindexConfigs( + Collection> properties) throws IOException { + return indexBuilders.buildReindexConfigs(properties); } public String reindexAsync( @@ -224,8 +214,8 @@ public String reindexAsync( } @Override - public void reindexAll() { - configure(); + public void reindexAll(Collection> properties) { + indexBuilders.reindexAll(properties); } @Override @@ -308,7 +298,7 @@ public long countByFilter( .getEntityRegistry() .getEntitySpec(entityName) .getSearchableFieldTypes(), - opContext.getRetrieverContext().get().getAspectRetriever())); + opContext.getAspectRetriever())); CountRequest countRequest = new CountRequest(); countRequest.query(filterQueryBuilder); countRequest.indices(indexName); @@ -338,10 +328,7 @@ public List getAspectValues( QueryBuilders.boolQuery() .must( ESUtils.buildFilterQuery( - filter, - true, - searchableFieldTypes, - opContext.getRetrieverContext().get().getAspectRetriever())); + filter, true, searchableFieldTypes, opContext.getAspectRetriever())); filterQueryBuilder.must(QueryBuilders.matchQuery("urn", urn.toString())); // NOTE: We are interested only in the un-exploded rows as only they carry the `event` payload. filterQueryBuilder.mustNot(QueryBuilders.termQuery(MappingsBuilder.IS_EXPLODED_FIELD, true)); @@ -352,7 +339,8 @@ public List getAspectValues( .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) .setValue(startTimeMillis.toString()); filterQueryBuilder.must( - ESUtils.getQueryBuilderFromCriterion(startTimeCriterion, true, searchableFieldTypes)); + ESUtils.getQueryBuilderFromCriterion( + startTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); } if (endTimeMillis != null) { Criterion endTimeCriterion = @@ -361,7 +349,8 @@ public List getAspectValues( .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) .setValue(endTimeMillis.toString()); filterQueryBuilder.must( - ESUtils.getQueryBuilderFromCriterion(endTimeCriterion, true, searchableFieldTypes)); + ESUtils.getQueryBuilderFromCriterion( + endTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); } final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(filterQueryBuilder); @@ -446,7 +435,7 @@ public DeleteAspectValuesResult deleteAspectValues( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getRetrieverContext().get().getAspectRetriever()); + opContext.getAspectRetriever()); final Optional result = bulkProcessor @@ -482,7 +471,7 @@ public String deleteAspectValuesAsync( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getRetrieverContext().get().getAspectRetriever()); + opContext.getAspectRetriever()); final int batchSize = options.getBatchSize() > 0 ? options.getBatchSize() : DEFAULT_LIMIT; TimeValue timeout = options.getTimeoutSeconds() > 0 @@ -516,7 +505,7 @@ public String reindexAsync( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getRetrieverContext().get().getAspectRetriever()); + opContext.getAspectRetriever()); try { return this.reindexAsync(indexName, filterQueryBuilder, options); } catch (Exception e) { @@ -574,10 +563,7 @@ public TimeseriesScrollResult scrollAspects( QueryBuilders.boolQuery() .filter( ESUtils.buildFilterQuery( - filter, - true, - searchableFieldTypes, - opContext.getRetrieverContext().get().getAspectRetriever())); + filter, true, searchableFieldTypes, opContext.getAspectRetriever())); if (startTimeMillis != null) { Criterion startTimeCriterion = @@ -586,7 +572,8 @@ public TimeseriesScrollResult scrollAspects( .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) .setValue(startTimeMillis.toString()); filterQueryBuilder.filter( - ESUtils.getQueryBuilderFromCriterion(startTimeCriterion, true, searchableFieldTypes)); + ESUtils.getQueryBuilderFromCriterion( + startTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); } if (endTimeMillis != null) { Criterion endTimeCriterion = @@ -595,7 +582,8 @@ public TimeseriesScrollResult scrollAspects( .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) .setValue(endTimeMillis.toString()); filterQueryBuilder.filter( - ESUtils.getQueryBuilderFromCriterion(endTimeCriterion, true, searchableFieldTypes)); + ESUtils.getQueryBuilderFromCriterion( + endTimeCriterion, true, searchableFieldTypes, opContext.getAspectRetriever())); } SearchResponse response = diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java index b59cd3a647d71c..6b67789c3e2d8c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/indexbuilder/TimeseriesAspectIndexBuilders.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.timeseries.elastic.indexbuilder; +import com.linkedin.common.urn.Urn; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; @@ -29,8 +30,8 @@ public class TimeseriesAspectIndexBuilders implements ElasticSearchIndexed { @Nonnull private final IndexConvention indexConvention; @Override - public void reindexAll() { - for (ReindexConfig config : buildReindexConfigs()) { + public void reindexAll(Collection> properties) { + for (ReindexConfig config : buildReindexConfigs(properties)) { try { indexBuilder.buildIndex(config); } catch (IOException e) { @@ -69,7 +70,8 @@ public String reindexAsync( } @Override - public List buildReindexConfigs() { + public List buildReindexConfigs( + Collection> properties) { return entityRegistry.getEntitySpecs().values().stream() .flatMap( entitySpec -> @@ -94,10 +96,4 @@ public List buildReindexConfigs() { }) .collect(Collectors.toList()); } - - @Override - public List buildReindexConfigsWithAllStructProps( - Collection properties) throws IOException { - return buildReindexConfigs(); - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java index 95665450a25723..1bf96841e5fe13 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java @@ -5,6 +5,7 @@ import com.linkedin.data.schema.DataSchema; import com.linkedin.data.template.StringArray; import com.linkedin.data.template.StringArrayArray; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.TimeseriesFieldCollectionSpec; @@ -374,12 +375,13 @@ public GenericTable getAggregatedStats( filter, true, opContext.getEntityRegistry().getEntitySpec(entityName).getSearchableFieldTypes(), - opContext.getRetrieverContext().get().getAspectRetriever()); + opContext.getAspectRetriever()); AspectSpec aspectSpec = getTimeseriesAspectSpec(opContext, entityName, aspectName); // Build and attach the grouping aggregations final Pair topAndBottomAggregations = - makeGroupingAggregationBuilder(aspectSpec, null, groupingBuckets); + makeGroupingAggregationBuilder( + aspectSpec, null, groupingBuckets, opContext.getAspectRetriever()); AggregationBuilder rootAggregationBuilder = topAndBottomAggregations.getFirst(); AggregationBuilder mostNested = topAndBottomAggregations.getSecond(); @@ -462,7 +464,8 @@ private void addAggregationBuildersFromAggregationSpec( private Pair makeGroupingAggregationBuilder( AspectSpec aspectSpec, @Nullable AggregationBuilder baseAggregationBuilder, - @Nullable GroupingBucket[] groupingBuckets) { + @Nullable GroupingBucket[] groupingBuckets, + @Nonnull AspectRetriever aspectRetriever) { AggregationBuilder firstAggregationBuilder = baseAggregationBuilder; AggregationBuilder lastAggregationBuilder = baseAggregationBuilder; @@ -481,7 +484,8 @@ private Pair makeGroupingAggregationBuil } else if (curGroupingBucket.getType() == GroupingBucketType.STRING_GROUPING_BUCKET) { // Process the string grouping bucket using the 'terms' aggregation. // The field can be Keyword, Numeric, ip, boolean, or binary. - String fieldName = ESUtils.toKeywordField(curGroupingBucket.getKey(), true); + String fieldName = + ESUtils.toKeywordField(curGroupingBucket.getKey(), true, aspectRetriever); DataSchema.Type fieldType = getGroupingBucketKeyType(aspectSpec, curGroupingBucket); curAggregationBuilder = AggregationBuilders.terms(getGroupingBucketAggName(curGroupingBucket)) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java index 1cffbb6e2cf217..12b12cf105196e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java @@ -46,7 +46,7 @@ public static Map ingestCorpUserKeyAspects( .recordTemplate(aspect) .auditStamp(AspectGenerationUtils.createAuditStamp()) .systemMetadata(AspectGenerationUtils.createSystemMetadata()) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); } entityService.ingestAspects( opContext, @@ -83,7 +83,7 @@ public static Map ingestCorpUserInfoAspects( .recordTemplate(aspect) .auditStamp(AspectGenerationUtils.createAuditStamp()) .systemMetadata(AspectGenerationUtils.createSystemMetadata()) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); } entityService.ingestAspects( opContext, @@ -121,7 +121,7 @@ public static Map ingestChartInfoAspects( .recordTemplate(aspect) .auditStamp(AspectGenerationUtils.createAuditStamp()) .systemMetadata(AspectGenerationUtils.createSystemMetadata()) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); } entityService.ingestAspects( opContext, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java index 23513738fbc337..ef6c9e56e132b3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java @@ -92,11 +92,11 @@ private void configureComponents() { .entityRegistry(_testEntityRegistry) .build()) .graphRetriever(TestOperationContexts.emptyGraphRetriever) + .searchRetriever(TestOperationContexts.emptySearchRetriever) .build(), null, opContext -> - ((EntityServiceAspectRetriever) - opContext.getRetrieverContext().get().getAspectRetriever()) + ((EntityServiceAspectRetriever) opContext.getAspectRetrieverOpt().get()) .setSystemOperationContext(opContext)); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index 76c14be5f8c13e..b9f5984e576678 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -92,11 +92,11 @@ public void setupTest() { .entityRegistry(_testEntityRegistry) .build()) .graphRetriever(TestOperationContexts.emptyGraphRetriever) + .searchRetriever(TestOperationContexts.emptySearchRetriever) .build(), null, opContext -> - ((EntityServiceAspectRetriever) - opContext.getRetrieverContext().get().getAspectRetriever()) + ((EntityServiceAspectRetriever) opContext.getAspectRetrieverOpt().get()) .setSystemOperationContext(opContext)); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index 9a8186cc838ab2..91b01c55aac396 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -927,28 +927,28 @@ public void testRollbackAspect() throws AssertionError { .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn2) .aspectName(aspectName) .recordTemplate(writeAspect2) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn3) .aspectName(aspectName) .recordTemplate(writeAspect3) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1Overwrite) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1019,21 +1019,21 @@ public void testRollbackKey() throws AssertionError { .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(keyAspectName) .recordTemplate(writeKey1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1Overwrite) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1112,35 +1112,35 @@ public void testRollbackUrn() throws AssertionError { .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(keyAspectName) .recordTemplate(writeKey1) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn2) .aspectName(aspectName) .recordTemplate(writeAspect2) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn3) .aspectName(aspectName) .recordTemplate(writeAspect3) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn1) .aspectName(aspectName) .recordTemplate(writeAspect1Overwrite) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1188,7 +1188,7 @@ public void testIngestGetLatestAspect() throws AssertionError { .recordTemplate(writeAspect1) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1244,7 +1244,7 @@ public void testIngestGetLatestAspect() throws AssertionError { .recordTemplate(writeAspect2) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata2) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1298,7 +1298,7 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { .recordTemplate(writeAspect1) .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1325,7 +1325,7 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { .recordTemplate(writeAspect2) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1394,7 +1394,7 @@ public void testIngestSameAspect() throws AssertionError { .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1450,7 +1450,7 @@ public void testIngestSameAspect() throws AssertionError { .recordTemplate(writeAspect2) .systemMetadata(metadata2) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1511,42 +1511,42 @@ public void testRetention() throws AssertionError { .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1a) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName) .recordTemplate(writeAspect1b) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2a) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2b) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() @@ -1587,14 +1587,14 @@ public void testRetention() throws AssertionError { .recordTemplate(writeAspect1c) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever()), + .build(opContext.getAspectRetrieverOpt().get()), ChangeItemImpl.builder() .urn(entityUrn) .aspectName(aspectName2) .recordTemplate(writeAspect2c) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) - .build(opContext.getRetrieverContext().get().getAspectRetriever())); + .build(opContext.getAspectRetrieverOpt().get())); _entityServiceImpl.ingestAspects( opContext, AspectsBatchImpl.builder() diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphContainer.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphContainer.java index 481db53eafbbe9..9c67c610196ed4 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphContainer.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/dgraph/DgraphContainer.java @@ -224,9 +224,6 @@ public Set getLivenessCheckPortNumbers() { return Stream.of(getHttpPort(), getGrpcPort()).map(this::getMappedPort).collect(toSet()); } - @Override - protected void configure() {} - public int getHttpPort() { return getMappedPort(HTTP_PORT); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java index d1ee1996e5b8a4..b4ad5ce61d8f4e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/search/SearchGraphServiceTestBase.java @@ -72,7 +72,7 @@ public abstract class SearchGraphServiceTestBase extends GraphServiceTestBase { @BeforeClass public void setup() { _client = buildService(_enableMultiPathSearch); - _client.configure(); + _client.reindexAll(Collections.emptySet()); } @BeforeMethod @@ -121,7 +121,7 @@ protected GraphService getGraphService(boolean enableMultiPathSearch) { if (enableMultiPathSearch != _enableMultiPathSearch) { _enableMultiPathSearch = enableMultiPathSearch; _client = buildService(enableMultiPathSearch); - _client.configure(); + _client.reindexAll(Collections.emptySet()); } return _client; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java index 3f03c64c6f9211..3dbbfb2cebc3f3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java @@ -126,7 +126,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { .asSession(RequestContext.TEST, Authorizer.EMPTY, TestOperationContexts.TEST_USER_AUTH); settingsBuilder = new SettingsBuilder(null); elasticSearchService = buildEntitySearchService(); - elasticSearchService.configure(); + elasticSearchService.reindexAll(Collections.emptySet()); cacheManager = new ConcurrentMapCacheManager(); graphService = mock(GraphService.class); resetService(true, false); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java index d5a21cfecee277..a610cf95f827ae 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java @@ -39,6 +39,7 @@ import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; +import java.util.Collections; import javax.annotation.Nonnull; import org.opensearch.client.RestHighLevelClient; import org.springframework.cache.CacheManager; @@ -83,7 +84,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { settingsBuilder = new SettingsBuilder(null); elasticSearchService = buildEntitySearchService(); - elasticSearchService.configure(); + elasticSearchService.reindexAll(Collections.emptySet()); cacheManager = new ConcurrentMapCacheManager(); resetSearchService(); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java index c3dcf3aaee9b73..58574025aeeac3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/TestEntityTestBase.java @@ -25,6 +25,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.util.Collections; import java.util.List; import javax.annotation.Nonnull; import org.opensearch.client.RestHighLevelClient; @@ -64,7 +65,7 @@ public void setup() { new SnapshotEntityRegistry(new Snapshot()), new IndexConventionImpl("es_service_test")); settingsBuilder = new SettingsBuilder(null); elasticSearchService = buildService(); - elasticSearchService.configure(); + elasticSearchService.reindexAll(Collections.emptySet()); } @BeforeMethod diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java index 0858c3dd7eb996..92ca4c5ed8a05e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/IndexBuilderTestBase.java @@ -57,6 +57,7 @@ public void setup() { Map.of(), false, false, + false, new ElasticSearchConfiguration(), gitVersion); } @@ -103,6 +104,7 @@ public void testESIndexBuilderCreation() throws Exception { Map.of(), false, false, + false, new ElasticSearchConfiguration(), gitVersion); customIndexBuilder.buildIndex(TEST_INDEX_NAME, Map.of(), Map.of()); @@ -126,6 +128,7 @@ public void testMappingReindex() throws Exception { Map.of(), false, true, + false, new ElasticSearchConfiguration(), gitVersion); @@ -197,6 +200,7 @@ public void testSettingsNumberOfShardsReindex() throws Exception { Map.of(), true, false, + false, new ElasticSearchConfiguration(), gitVersion); @@ -238,6 +242,7 @@ public void testSettingsNoReindex() throws Exception { Map.of(), true, false, + false, new ElasticSearchConfiguration(), gitVersion), new ESIndexBuilder( @@ -249,6 +254,7 @@ public void testSettingsNoReindex() throws Exception { Map.of(), true, false, + false, new ElasticSearchConfiguration(), gitVersion), new ESIndexBuilder( @@ -260,6 +266,7 @@ public void testSettingsNoReindex() throws Exception { Map.of(), false, false, + false, new ElasticSearchConfiguration(), gitVersion), new ESIndexBuilder( @@ -271,6 +278,7 @@ public void testSettingsNoReindex() throws Exception { Map.of(), false, false, + false, new ElasticSearchConfiguration(), gitVersion)); @@ -314,6 +322,7 @@ public void testCopyStructuredPropertyMappings() throws Exception { Map.of(), false, true, + false, new ElasticSearchConfiguration(), gitVersion); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java index 9185e2e7ee072d..75da2bc62aaad1 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java @@ -7,6 +7,8 @@ import com.google.common.collect.ImmutableMap; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.SetMode; import com.linkedin.metadata.TestEntitySpecBuilder; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.EntitySpecBuilder; @@ -15,6 +17,7 @@ import com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder; import com.linkedin.metadata.search.elasticsearch.query.request.TestSearchFieldConfig; import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.util.Pair; import java.io.Serializable; import java.net.URISyntaxException; import java.util.List; @@ -178,19 +181,25 @@ public void testGetMappingsWithStructuredProperty() throws URISyntaxException { // Test that a structured property that does not apply to the entity does not alter the mappings StructuredPropertyDefinition structPropNotForThisEntity = new StructuredPropertyDefinition() + .setVersion(null, SetMode.REMOVE_IF_NULL) .setQualifiedName("propNotForThis") .setDisplayName("propNotForThis") .setEntityTypes(new UrnArray(Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "dataset"))) .setValueType(Urn.createFromString("urn:li:logicalType:STRING")); Map resultWithOnlyUnrelatedStructuredProp = MappingsBuilder.getMappings( - TestEntitySpecBuilder.getSpec(), List.of(structPropNotForThisEntity)); + TestEntitySpecBuilder.getSpec(), + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propNotForThis"), + structPropNotForThisEntity))); assertEquals(resultWithOnlyUnrelatedStructuredProp, resultWithoutStructuredProps); // Test that a structured property that does apply to this entity is included in the mappings String fqnOfRelatedProp = "propForThis"; StructuredPropertyDefinition structPropForThisEntity = new StructuredPropertyDefinition() + .setVersion(null, SetMode.REMOVE_IF_NULL) .setQualifiedName(fqnOfRelatedProp) .setDisplayName("propForThis") .setEntityTypes( @@ -200,7 +209,11 @@ public void testGetMappingsWithStructuredProperty() throws URISyntaxException { .setValueType(Urn.createFromString("urn:li:logicalType:STRING")); Map resultWithOnlyRelatedStructuredProp = MappingsBuilder.getMappings( - TestEntitySpecBuilder.getSpec(), List.of(structPropForThisEntity)); + TestEntitySpecBuilder.getSpec(), + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propForThis"), + structPropForThisEntity))); assertNotEquals(resultWithOnlyRelatedStructuredProp, resultWithoutStructuredProps); Map fieldsBefore = (Map) resultWithoutStructuredProps.get("properties"); @@ -231,7 +244,95 @@ public void testGetMappingsWithStructuredProperty() throws URISyntaxException { Map resultWithBothStructuredProps = MappingsBuilder.getMappings( TestEntitySpecBuilder.getSpec(), - List.of(structPropForThisEntity, structPropNotForThisEntity)); + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propForThis"), + structPropForThisEntity), + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propNotForThis"), + structPropNotForThisEntity))); + assertEquals(resultWithBothStructuredProps, resultWithOnlyRelatedStructuredProp); + } + + @Test + public void testGetMappingsWithStructuredPropertyV1() throws URISyntaxException { + // Baseline comparison: Mappings with no structured props + Map resultWithoutStructuredProps = + MappingsBuilder.getMappings(TestEntitySpecBuilder.getSpec()); + + // Test that a structured property that does not apply to the entity does not alter the mappings + StructuredPropertyDefinition structPropNotForThisEntity = + new StructuredPropertyDefinition() + .setVersion("00000000000001") + .setQualifiedName("propNotForThis") + .setDisplayName("propNotForThis") + .setEntityTypes(new UrnArray(Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "dataset"))) + .setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + Map resultWithOnlyUnrelatedStructuredProp = + MappingsBuilder.getMappings( + TestEntitySpecBuilder.getSpec(), + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propNotForThis"), + structPropNotForThisEntity))); + assertEquals(resultWithOnlyUnrelatedStructuredProp, resultWithoutStructuredProps); + + // Test that a structured property that does apply to this entity is included in the mappings + String fqnOfRelatedProp = "propForThis"; + StructuredPropertyDefinition structPropForThisEntity = + new StructuredPropertyDefinition() + .setVersion("00000000000001") + .setQualifiedName(fqnOfRelatedProp) + .setDisplayName("propForThis") + .setEntityTypes( + new UrnArray( + Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "dataset"), + Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "testEntity"))) + .setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + Map resultWithOnlyRelatedStructuredProp = + MappingsBuilder.getMappings( + TestEntitySpecBuilder.getSpec(), + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propForThis"), + structPropForThisEntity))); + assertNotEquals(resultWithOnlyRelatedStructuredProp, resultWithoutStructuredProps); + Map fieldsBefore = + (Map) resultWithoutStructuredProps.get("properties"); + Map fieldsAfter = + (Map) resultWithOnlyRelatedStructuredProp.get("properties"); + assertEquals(fieldsAfter.size(), fieldsBefore.size() + 1); + + Map structProps = (Map) fieldsAfter.get("structuredProperties"); + fieldsAfter = (Map) structProps.get("properties"); + + String newField = + fieldsAfter.keySet().stream() + .filter(field -> !fieldsBefore.containsKey(field)) + .findFirst() + .get(); + assertEquals(newField, "_versioned." + fqnOfRelatedProp + ".00000000000001.string"); + assertEquals( + fieldsAfter.get(newField), + Map.of( + "normalizer", + "keyword_normalizer", + "type", + "keyword", + "fields", + Map.of("keyword", Map.of("type", "keyword")))); + + // Test that only structured properties that apply are included + Map resultWithBothStructuredProps = + MappingsBuilder.getMappings( + TestEntitySpecBuilder.getSpec(), + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propForThis"), + structPropForThisEntity), + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:propNotForThis"), + structPropNotForThisEntity))); assertEquals(resultWithBothStructuredProps, resultWithOnlyRelatedStructuredProp); } @@ -239,6 +340,7 @@ public void testGetMappingsWithStructuredProperty() throws URISyntaxException { public void testGetMappingsForStructuredProperty() throws URISyntaxException { StructuredPropertyDefinition testStructProp = new StructuredPropertyDefinition() + .setVersion(null, SetMode.REMOVE_IF_NULL) .setQualifiedName("testProp") .setDisplayName("exampleProp") .setEntityTypes( @@ -247,10 +349,14 @@ public void testGetMappingsForStructuredProperty() throws URISyntaxException { Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "testEntity"))) .setValueType(Urn.createFromString("urn:li:logicalType:STRING")); Map structuredPropertyFieldMappings = - MappingsBuilder.getMappingsForStructuredProperty(List.of(testStructProp)); + MappingsBuilder.getMappingsForStructuredProperty( + List.of( + (Pair.of( + UrnUtils.getUrn("urn:li:structuredProperties:testProp"), testStructProp)))); assertEquals(structuredPropertyFieldMappings.size(), 1); String keyInMap = structuredPropertyFieldMappings.keySet().stream().findFirst().get(); assertEquals(keyInMap, "testProp"); + Object mappings = structuredPropertyFieldMappings.get(keyInMap); assertEquals( mappings, @@ -264,6 +370,7 @@ public void testGetMappingsForStructuredProperty() throws URISyntaxException { StructuredPropertyDefinition propWithNumericType = new StructuredPropertyDefinition() + .setVersion(null, SetMode.REMOVE_IF_NULL) .setQualifiedName("testPropNumber") .setDisplayName("examplePropNumber") .setEntityTypes( @@ -272,7 +379,11 @@ public void testGetMappingsForStructuredProperty() throws URISyntaxException { Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "testEntity"))) .setValueType(Urn.createFromString("urn:li:logicalType:NUMBER")); Map structuredPropertyFieldMappingsNumber = - MappingsBuilder.getMappingsForStructuredProperty(List.of(propWithNumericType)); + MappingsBuilder.getMappingsForStructuredProperty( + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperties:testPropNumber"), + propWithNumericType))); assertEquals(structuredPropertyFieldMappingsNumber.size(), 1); keyInMap = structuredPropertyFieldMappingsNumber.keySet().stream().findFirst().get(); assertEquals("testPropNumber", keyInMap); @@ -280,6 +391,61 @@ public void testGetMappingsForStructuredProperty() throws URISyntaxException { assertEquals(Map.of("type", "double"), mappings); } + @Test + public void testGetMappingsForStructuredPropertyV1() throws URISyntaxException { + StructuredPropertyDefinition testStructProp = + new StructuredPropertyDefinition() + .setVersion("00000000000001") + .setQualifiedName("testProp") + .setDisplayName("exampleProp") + .setEntityTypes( + new UrnArray( + Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "dataset"), + Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "testEntity"))) + .setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + Map structuredPropertyFieldMappings = + MappingsBuilder.getMappingsForStructuredProperty( + List.of( + (Pair.of( + UrnUtils.getUrn("urn:li:structuredProperties:testProp"), testStructProp)))); + assertEquals(structuredPropertyFieldMappings.size(), 1); + String keyInMap = structuredPropertyFieldMappings.keySet().stream().findFirst().get(); + assertEquals(keyInMap, "_versioned.testProp.00000000000001.string"); + + Object mappings = structuredPropertyFieldMappings.get(keyInMap); + assertEquals( + mappings, + Map.of( + "type", + "keyword", + "normalizer", + "keyword_normalizer", + "fields", + Map.of("keyword", Map.of("type", "keyword")))); + + StructuredPropertyDefinition propWithNumericType = + new StructuredPropertyDefinition() + .setVersion("00000000000001") + .setQualifiedName("testPropNumber") + .setDisplayName("examplePropNumber") + .setEntityTypes( + new UrnArray( + Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "dataset"), + Urn.createFromString(ENTITY_TYPE_URN_PREFIX + "testEntity"))) + .setValueType(Urn.createFromString("urn:li:logicalType:NUMBER")); + Map structuredPropertyFieldMappingsNumber = + MappingsBuilder.getMappingsForStructuredProperty( + List.of( + Pair.of( + UrnUtils.getUrn("urn:li:structuredProperty:testPropNumber"), + propWithNumericType))); + assertEquals(structuredPropertyFieldMappingsNumber.size(), 1); + keyInMap = structuredPropertyFieldMappingsNumber.keySet().stream().findFirst().get(); + assertEquals(keyInMap, "_versioned.testPropNumber.00000000000001.number"); + mappings = structuredPropertyFieldMappingsNumber.get(keyInMap); + assertEquals(Map.of("type", "double"), mappings); + } + @Test public void testRefMappingsBuilder() { EntityRegistry entityRegistry = getTestEntityRegistry(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java index 43ae6bd7a48b71..0ea2340ae82173 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java @@ -1,21 +1,31 @@ package com.linkedin.metadata.search.query.request; +import static com.linkedin.metadata.Constants.DATA_TYPE_URN_PREFIX; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.utils.SearchUtil.*; +import static org.mockito.ArgumentMatchers.anySet; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.SetMode; +import com.linkedin.entity.Aspect; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; import com.linkedin.metadata.search.elasticsearch.query.request.AggregationQueryBuilder; import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.structured.StructuredPropertyDefinition; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -28,10 +38,73 @@ public class AggregationQueryBuilderTest { private static AspectRetriever aspectRetriever; + private static AspectRetriever aspectRetrieverV1; @BeforeClass public static void setup() throws RemoteInvocationException, URISyntaxException { - aspectRetriever = TestOperationContexts.emptyAspectRetriever(null); + Urn helloUrn = Urn.createFromString("urn:li:structuredProperty:hello"); + Urn abFghTenUrn = Urn.createFromString("urn:li:structuredProperty:ab.fgh.ten"); + + // legacy + aspectRetriever = mock(AspectRetriever.class); + when(aspectRetriever.getEntityRegistry()) + .thenReturn(TestOperationContexts.defaultEntityRegistry()); + + StructuredPropertyDefinition structPropHelloDefinition = new StructuredPropertyDefinition(); + structPropHelloDefinition.setVersion(null, SetMode.REMOVE_IF_NULL); + structPropHelloDefinition.setValueType(Urn.createFromString(DATA_TYPE_URN_PREFIX + "string")); + structPropHelloDefinition.setQualifiedName("hello"); + when(aspectRetriever.getLatestAspectObjects(eq(Set.of(helloUrn)), anySet())) + .thenReturn( + Map.of( + helloUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(structPropHelloDefinition.data())))); + + StructuredPropertyDefinition structPropAbFghTenDefinition = new StructuredPropertyDefinition(); + structPropAbFghTenDefinition.setVersion(null, SetMode.REMOVE_IF_NULL); + structPropAbFghTenDefinition.setValueType( + Urn.createFromString(DATA_TYPE_URN_PREFIX + "string")); + structPropAbFghTenDefinition.setQualifiedName("ab.fgh.ten"); + when(aspectRetriever.getLatestAspectObjects(eq(Set.of(abFghTenUrn)), anySet())) + .thenReturn( + Map.of( + abFghTenUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(structPropAbFghTenDefinition.data())))); + + // V1 + aspectRetrieverV1 = mock(AspectRetriever.class); + when(aspectRetrieverV1.getEntityRegistry()) + .thenReturn(TestOperationContexts.defaultEntityRegistry()); + + StructuredPropertyDefinition structPropHelloDefinitionV1 = new StructuredPropertyDefinition(); + structPropHelloDefinitionV1.setVersion("00000000000001"); + structPropHelloDefinitionV1.setValueType(Urn.createFromString(DATA_TYPE_URN_PREFIX + "string")); + structPropHelloDefinitionV1.setQualifiedName("hello"); + when(aspectRetrieverV1.getLatestAspectObjects(eq(Set.of(helloUrn)), anySet())) + .thenReturn( + Map.of( + helloUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(structPropHelloDefinitionV1.data())))); + + StructuredPropertyDefinition structPropAbFghTenDefinitionV1 = + new StructuredPropertyDefinition(); + structPropAbFghTenDefinitionV1.setVersion("00000000000001"); + structPropAbFghTenDefinitionV1.setValueType( + Urn.createFromString(DATA_TYPE_URN_PREFIX + "string")); + structPropAbFghTenDefinitionV1.setQualifiedName("ab.fgh.ten"); + when(aspectRetrieverV1.getLatestAspectObjects(eq(Set.of(abFghTenUrn)), anySet())) + .thenReturn( + Map.of( + abFghTenUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(structPropAbFghTenDefinitionV1.data())))); } @Test @@ -171,26 +244,69 @@ public void testAggregateOverStructuredProperty() { List aggs = builder.getAggregations( - TestOperationContexts.systemContextNoSearchAuthorization(), + TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever), List.of("structuredProperties.ab.fgh.ten")); Assert.assertEquals(aggs.size(), 1); AggregationBuilder aggBuilder = aggs.get(0); Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder); TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder; // Check that field name is sanitized to correct field name - Assert.assertEquals(agg.field(), "structuredProperties.ab_fgh_ten"); + Assert.assertEquals( + agg.field(), + "structuredProperties.ab_fgh_ten.keyword", + "Terms aggregate must be on a keyword or subfield keyword"); // Two structured properties aggs = builder.getAggregations( - TestOperationContexts.systemContextNoSearchAuthorization(), + TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever), List.of("structuredProperties.ab.fgh.ten", "structuredProperties.hello")); Assert.assertEquals(aggs.size(), 2); Assert.assertEquals( aggs.stream() .map(aggr -> ((TermsAggregationBuilder) aggr).field()) .collect(Collectors.toSet()), - Set.of("structuredProperties.ab_fgh_ten", "structuredProperties.hello")); + Set.of("structuredProperties.ab_fgh_ten.keyword", "structuredProperties.hello.keyword")); + } + + @Test + public void testAggregateOverStructuredPropertyV1() { + SearchConfiguration config = new SearchConfiguration(); + config.setMaxTermBucketSize(25); + + AggregationQueryBuilder builder = + new AggregationQueryBuilder( + config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of())); + + List aggs = + builder.getAggregations( + TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1), + List.of("structuredProperties.ab.fgh.ten")); + Assert.assertEquals(aggs.size(), 1); + AggregationBuilder aggBuilder = aggs.get(0); + Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder); + TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder; + // Check that field name is sanitized to correct field name + Assert.assertEquals( + agg.field(), + "structuredProperties._versioned.ab_fgh_ten.00000000000001.string.keyword", + "Terms aggregation must be on a keyword field or subfield."); + + // Two structured properties + aggs = + builder.getAggregations( + TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1), + List.of( + "structuredProperties.ab.fgh.ten", + "structuredProperties._versioned.hello.00000000000001.string")); + Assert.assertEquals(aggs.size(), 2); + Assert.assertEquals( + aggs.stream() + .map(aggr -> ((TermsAggregationBuilder) aggr).field()) + .collect(Collectors.toSet()), + Set.of( + "structuredProperties._versioned.ab_fgh_ten.00000000000001.string.keyword", + "structuredProperties._versioned.hello.00000000000001.string.keyword")); } @Test @@ -240,7 +356,76 @@ public void testAggregateOverFieldsAndStructProp() { // Aggregate over fields and structured properties List aggs = builder.getAggregations( - TestOperationContexts.systemContextNoSearchAuthorization(), + TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever), + ImmutableList.of( + "test1", + "test2", + "hasTest1", + "structuredProperties.ab.fgh.ten", + "structuredProperties.hello")); + Assert.assertEquals(aggs.size(), 5); + Set facets = + aggs.stream() + .map(aggB -> ((TermsAggregationBuilder) aggB).field()) + .collect(Collectors.toSet()); + Assert.assertEquals( + facets, + ImmutableSet.of( + "test1.keyword", + "test2.keyword", + "hasTest1", + "structuredProperties.ab_fgh_ten.keyword", + "structuredProperties.hello.keyword")); + } + + @Test + public void testAggregateOverFieldsAndStructPropV1() { + SearchableAnnotation annotation1 = + new SearchableAnnotation( + "test1", + SearchableAnnotation.FieldType.KEYWORD, + true, + true, + false, + false, + Optional.empty(), + Optional.of("Has Test"), + 1.0, + Optional.of("hasTest1"), + Optional.empty(), + Collections.emptyMap(), + Collections.emptyList(), + false); + + SearchableAnnotation annotation2 = + new SearchableAnnotation( + "test2", + SearchableAnnotation.FieldType.KEYWORD, + true, + true, + false, + false, + Optional.of("Test Filter"), + Optional.empty(), + 1.0, + Optional.empty(), + Optional.empty(), + Collections.emptyMap(), + Collections.emptyList(), + false); + + SearchConfiguration config = new SearchConfiguration(); + config.setMaxTermBucketSize(25); + + AggregationQueryBuilder builder = + new AggregationQueryBuilder( + config, + ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation1, annotation2))); + + // Aggregate over fields and structured properties + List aggs = + builder.getAggregations( + TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1), ImmutableList.of( "test1", "test2", @@ -258,8 +443,8 @@ public void testAggregateOverFieldsAndStructProp() { "test1.keyword", "test2.keyword", "hasTest1", - "structuredProperties.ab_fgh_ten", - "structuredProperties.hello")); + "structuredProperties._versioned.ab_fgh_ten.00000000000001.string.keyword", + "structuredProperties._versioned.hello.00000000000001.string.keyword")); } @Test diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index 2f68f17dae2410..9376552f7abc55 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -5,7 +5,6 @@ import static org.testng.Assert.assertTrue; import com.linkedin.metadata.TestEntitySpecBuilder; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; @@ -34,9 +33,7 @@ public class AutocompleteRequestHandlerTest { private AutocompleteRequestHandler handler = AutocompleteRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - CustomSearchConfiguration.builder().build(), - TestOperationContexts.emptyAspectRetriever(null)); + TestEntitySpecBuilder.getSpec(), CustomSearchConfiguration.builder().build()); private OperationContext mockOpContext = TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class)); @@ -173,8 +170,7 @@ public void testCustomConfigWithDefault() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build(), - mock(AspectRetriever.class)); + .build()); SearchRequest autocompleteRequest = withoutDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -199,8 +195,7 @@ public void testCustomConfigWithDefault() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build(), - mock(AspectRetriever.class)); + .build()); autocompleteRequest = withDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); @@ -242,8 +237,7 @@ public void testCustomConfigWithInheritedQueryFunctionScores() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build(), - mock(AspectRetriever.class)); + .build()); SearchRequest autocompleteRequest = withInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -281,8 +275,7 @@ public void testCustomConfigWithInheritedQueryFunctionScores() { .should(List.of(Map.of("match_all", Map.of()))) .build()) .build())) - .build(), - mock(AspectRetriever.class)); + .build()); autocompleteRequest = noQueryCustomization.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -344,8 +337,7 @@ public void testCustomConfigWithFunctionScores() { Map.of( "deprecated", Map.of("value", false))))))) .build())) - .build(), - mock(AspectRetriever.class)); + .build()); SearchRequest autocompleteRequest = explicitNoInherit.getSearchRequest(mockOpContext, "input", null, null, 10); @@ -397,8 +389,7 @@ public void testCustomConfigWithFunctionScores() { Map.of( "deprecated", Map.of("value", false))))))) .build())) - .build(), - mock(AspectRetriever.class)); + .build()); autocompleteRequest = explicit.getSearchRequest(mockOpContext, "input", null, null, 10); sourceBuilder = autocompleteRequest.source(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index 1b41ff44bc9698..1cd9a274463d30 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -86,11 +86,7 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests { public void testDatasetFieldsAndHighlights() { EntitySpec entitySpec = operationContext.getEntityRegistry().getEntitySpec("dataset"); SearchRequestHandler datasetHandler = - SearchRequestHandler.getBuilder( - entitySpec, - testQueryConfig, - null, - operationContext.getRetrieverContext().get().getAspectRetriever()); + SearchRequestHandler.getBuilder(entitySpec, testQueryConfig, null); /* Ensure efficient query performance, we do not expect upstream/downstream/fineGrained lineage @@ -109,11 +105,7 @@ public void testDatasetFieldsAndHighlights() { @Test public void testSearchRequestHandlerHighlightingTurnedOff() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - testQueryConfig, - null, - operationContext.getRetrieverContext().get().getAspectRetriever()); + SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); SearchRequest searchRequest = requestHandler.getSearchRequest( operationContext.withSearchFlags( @@ -153,11 +145,7 @@ public void testSearchRequestHandlerHighlightingTurnedOff() { @Test public void testSearchRequestHandler() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - testQueryConfig, - null, - operationContext.getRetrieverContext().get().getAspectRetriever()); + SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); SearchRequest searchRequest = requestHandler.getSearchRequest( operationContext.withSearchFlags( @@ -220,11 +208,7 @@ public void testSearchRequestHandler() { @Test public void testAggregationsInSearch() { SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - testQueryConfig, - null, - operationContext.getRetrieverContext().get().getAspectRetriever()); + SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); final String nestedAggString = String.format("_entityType%stextFieldOverride", AGGREGATION_SEPARATOR_CHAR); SearchRequest searchRequest = @@ -292,11 +276,7 @@ public void testAggregationsInSearch() { public void testFilteredSearch() { final SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - testQueryConfig, - null, - operationContext.getRetrieverContext().get().getAspectRetriever()); + SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); final BoolQueryBuilder testQuery = constructFilterQuery(requestHandler, false); @@ -675,11 +655,7 @@ private BoolQueryBuilder getQuery(final Criterion filterCriterion) { .setAnd(new CriterionArray(ImmutableList.of(filterCriterion))))); final SearchRequestHandler requestHandler = - SearchRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), - testQueryConfig, - null, - operationContext.getRetrieverContext().get().getAspectRetriever()); + SearchRequestHandler.getBuilder(TestEntitySpecBuilder.getSpec(), testQueryConfig, null); return (BoolQueryBuilder) requestHandler diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java index 9953e08efb2d2f..def14f9be7054a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformerTest.java @@ -21,6 +21,7 @@ import com.linkedin.metadata.TestEntityUtil; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.entity.SearchRetriever; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableRefFieldSpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; @@ -186,6 +187,7 @@ public void testSetSearchableRefValue() throws URISyntaxException, RemoteInvocat RetrieverContext.builder() .aspectRetriever(aspectRetriever) .graphRetriever(mock(GraphRetriever.class)) + .searchRetriever(mock(SearchRetriever.class)) .build()); searchDocumentTransformer.setSearchableRefValue( @@ -241,6 +243,7 @@ public void testSetSearchableRefValue_RuntimeException() RetrieverContext.builder() .aspectRetriever(aspectRetriever) .graphRetriever(mock(GraphRetriever.class)) + .searchRetriever(mock(SearchRetriever.class)) .build()); ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); @@ -277,6 +280,7 @@ public void testSetSearchableRefValue_RuntimeException_URNExist() RetrieverContext.builder() .aspectRetriever(aspectRetriever) .graphRetriever(mock(GraphRetriever.class)) + .searchRetriever(mock(SearchRetriever.class)) .build()); ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); @@ -309,6 +313,7 @@ void testSetSearchableRefValue_WithInvalidURN() RetrieverContext.builder() .aspectRetriever(aspectRetriever) .graphRetriever(mock(GraphRetriever.class)) + .searchRetriever(mock(SearchRetriever.class)) .build()); ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java index 838df98fdce9c6..d56d9b0674884f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java @@ -1,18 +1,81 @@ package com.linkedin.metadata.search.utils; +import static com.linkedin.metadata.Constants.DATA_TYPE_URN_PREFIX; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static org.mockito.ArgumentMatchers.anySet; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.SetMode; import com.linkedin.data.template.StringArray; +import com.linkedin.entity.Aspect; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.structured.StructuredPropertyDefinition; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import java.net.URISyntaxException; import java.util.HashMap; +import java.util.Map; +import java.util.Set; import org.opensearch.index.query.QueryBuilder; import org.testng.Assert; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class ESUtilsTest { private static final String FIELD_TO_EXPAND = "fieldTags"; + private static AspectRetriever aspectRetriever; + private static AspectRetriever aspectRetrieverV1; + + @BeforeClass + public static void setup() throws RemoteInvocationException, URISyntaxException { + Urn abFghTenUrn = Urn.createFromString("urn:li:structuredProperty:ab.fgh.ten"); + + // legacy + aspectRetriever = mock(AspectRetriever.class); + when(aspectRetriever.getEntityRegistry()) + .thenReturn(TestOperationContexts.defaultEntityRegistry()); + + StructuredPropertyDefinition structPropAbFghTenDefinition = new StructuredPropertyDefinition(); + structPropAbFghTenDefinition.setVersion(null, SetMode.REMOVE_IF_NULL); + structPropAbFghTenDefinition.setValueType( + Urn.createFromString(DATA_TYPE_URN_PREFIX + "string")); + structPropAbFghTenDefinition.setQualifiedName("ab.fgh.ten"); + when(aspectRetriever.getLatestAspectObjects(eq(Set.of(abFghTenUrn)), anySet())) + .thenReturn( + Map.of( + abFghTenUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(structPropAbFghTenDefinition.data())))); + + // V1 + aspectRetrieverV1 = mock(AspectRetriever.class); + when(aspectRetrieverV1.getEntityRegistry()) + .thenReturn(TestOperationContexts.defaultEntityRegistry()); + + StructuredPropertyDefinition structPropAbFghTenDefinitionV1 = + new StructuredPropertyDefinition(); + structPropAbFghTenDefinitionV1.setVersion("00000000000001"); + structPropAbFghTenDefinitionV1.setValueType( + Urn.createFromString(DATA_TYPE_URN_PREFIX + "string")); + structPropAbFghTenDefinitionV1.setQualifiedName("ab.fgh.ten"); + when(aspectRetrieverV1.getLatestAspectObjects(eq(Set.of(abFghTenUrn)), anySet())) + .thenReturn( + Map.of( + abFghTenUrn, + Map.of( + STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME, + new Aspect(structPropAbFghTenDefinitionV1.data())))); + } + @Test public void testGetQueryBuilderFromCriterionEqualsValues() { @@ -23,7 +86,8 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { .setValues(new StringArray(ImmutableList.of("value1"))); QueryBuilder result = - ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); String expected = "{\n" + " \"terms\" : {\n" @@ -42,7 +106,9 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1", "value2"))); - result = ESUtils.getQueryBuilderFromCriterion(multiValueCriterion, false, new HashMap<>()); + result = + ESUtils.getQueryBuilderFromCriterion( + multiValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); expected = "{\n" + " \"terms\" : {\n" @@ -62,7 +128,9 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1", "value2"))); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); + result = + ESUtils.getQueryBuilderFromCriterion( + timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); expected = "{\n" + " \"terms\" : {\n" @@ -83,7 +151,8 @@ public void testGetQueryBuilderFromCriterionExists() { new Criterion().setField("myTestField").setCondition(Condition.EXISTS); QueryBuilder result = - ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); String expected = "{\n" + " \"bool\" : {\n" @@ -106,7 +175,9 @@ public void testGetQueryBuilderFromCriterionExists() { final Criterion timeseriesField = new Criterion().setField("myTestField").setCondition(Condition.EXISTS); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); + result = + ESUtils.getQueryBuilderFromCriterion( + timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); expected = "{\n" + " \"bool\" : {\n" @@ -132,7 +203,8 @@ public void testGetQueryBuilderFromCriterionIsNull() { new Criterion().setField("myTestField").setCondition(Condition.IS_NULL); QueryBuilder result = - ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); String expected = "{\n" + " \"bool\" : {\n" @@ -155,7 +227,9 @@ public void testGetQueryBuilderFromCriterionIsNull() { final Criterion timeseriesField = new Criterion().setField("myTestField").setCondition(Condition.IS_NULL); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); + result = + ESUtils.getQueryBuilderFromCriterion( + timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); expected = "{\n" + " \"bool\" : {\n" @@ -187,7 +261,8 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { // Ensure that the query is expanded! QueryBuilder result = - ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), mock(AspectRetriever.class)); String expected = "{\n" + " \"bool\" : {\n" @@ -225,7 +300,9 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { .setValues(new StringArray(ImmutableList.of("value1", "value2"))); // Ensure that the query is expanded without keyword. - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); + result = + ESUtils.getQueryBuilderFromCriterion( + timeseriesField, true, new HashMap<>(), mock(AspectRetriever.class)); expected = "{\n" + " \"bool\" : {\n" @@ -268,15 +345,41 @@ public void testGetQueryBuilderFromStructPropEqualsValue() { .setValues(new StringArray(ImmutableList.of("value1"))); QueryBuilder result = - ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), aspectRetriever); + String expected = + "{\n" + + " \"terms\" : {\n" + + " \"structuredProperties.ab_fgh_ten.keyword\" : [\n" + + " \"value1\"\n" + + " ],\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"structuredProperties.ab.fgh.ten\"\n" + + " }\n" + + "}"; + Assert.assertEquals(result.toString(), expected); + } + + @Test + public void testGetQueryBuilderFromStructPropEqualsValueV1() { + + final Criterion singleValueCriterion = + new Criterion() + .setField("structuredProperties.ab.fgh.ten") + .setCondition(Condition.EQUAL) + .setValues(new StringArray(ImmutableList.of("value1"))); + + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), aspectRetrieverV1); String expected = "{\n" + " \"terms\" : {\n" - + " \"structuredProperties.ab_fgh_ten\" : [\n" + + " \"structuredProperties._versioned.ab_fgh_ten.00000000000001.string.keyword\" : [\n" + " \"value1\"\n" + " ],\n" + " \"boost\" : 1.0,\n" - + " \"_name\" : \"structuredProperties.ab_fgh_ten\"\n" + + " \"_name\" : \"structuredProperties.ab.fgh.ten\"\n" + " }\n" + "}"; Assert.assertEquals(result.toString(), expected); @@ -288,7 +391,8 @@ public void testGetQueryBuilderFromStructPropExists() { new Criterion().setField("structuredProperties.ab.fgh.ten").setCondition(Condition.EXISTS); QueryBuilder result = - ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), aspectRetriever); String expected = "{\n" + " \"bool\" : {\n" @@ -302,7 +406,59 @@ public void testGetQueryBuilderFromStructPropExists() { + " ],\n" + " \"adjust_pure_negative\" : true,\n" + " \"boost\" : 1.0,\n" - + " \"_name\" : \"structuredProperties.ab_fgh_ten\"\n" + + " \"_name\" : \"structuredProperties.ab.fgh.ten\"\n" + + " }\n" + + "}"; + Assert.assertEquals(result.toString(), expected); + + // No diff in the timeseries field case for this condition. + final Criterion timeseriesField = + new Criterion().setField("myTestField").setCondition(Condition.EXISTS); + + result = + ESUtils.getQueryBuilderFromCriterion( + timeseriesField, true, new HashMap<>(), aspectRetriever); + expected = + "{\n" + + " \"bool\" : {\n" + + " \"must\" : [\n" + + " {\n" + + " \"exists\" : {\n" + + " \"field\" : \"myTestField\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"myTestField\"\n" + + " }\n" + + "}"; + Assert.assertEquals(result.toString(), expected); + } + + @Test + public void testGetQueryBuilderFromStructPropExistsV1() { + final Criterion singleValueCriterion = + new Criterion().setField("structuredProperties.ab.fgh.ten").setCondition(Condition.EXISTS); + + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion( + singleValueCriterion, false, new HashMap<>(), aspectRetrieverV1); + String expected = + "{\n" + + " \"bool\" : {\n" + + " \"must\" : [\n" + + " {\n" + + " \"exists\" : {\n" + + " \"field\" : \"structuredProperties._versioned.ab_fgh_ten.00000000000001.string\",\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"structuredProperties.ab.fgh.ten\"\n" + " }\n" + "}"; Assert.assertEquals(result.toString(), expected); @@ -311,7 +467,9 @@ public void testGetQueryBuilderFromStructPropExists() { final Criterion timeseriesField = new Criterion().setField("myTestField").setCondition(Condition.EXISTS); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); + result = + ESUtils.getQueryBuilderFromCriterion( + timeseriesField, true, new HashMap<>(), aspectRetrieverV1); expected = "{\n" + " \"bool\" : {\n" diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java new file mode 100644 index 00000000000000..ab205d0463c4c4 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java @@ -0,0 +1,193 @@ +package com.linkedin.metadata.structuredproperties.hooks; + +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_KEY_ASPECT_NAME; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.nullable; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; + +import com.linkedin.common.UrnArray; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.entity.Aspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.aspect.batch.PatchMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.structured.StructuredPropertyDefinition; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCL; +import io.datahubproject.metadata.context.RetrieverContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import jakarta.json.Json; +import jakarta.json.JsonPatch; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class PropertyDefinitionDeleteSideEffectTest { + private static final EntityRegistry TEST_REGISTRY = new TestEntityRegistry(); + private static final AspectPluginConfig TEST_PLUGIN_CONFIG = + AspectPluginConfig.builder() + .className(PropertyDefinitionDeleteSideEffect.class.getName()) + .enabled(true) + .supportedOperations(List.of("DELETE")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("structuredProperty") + .aspectName(STRUCTURED_PROPERTY_KEY_ASPECT_NAME) + .build(), + AspectPluginConfig.EntityAspectName.builder() + .entityName("structuredProperty") + .aspectName(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) + .build())) + .build(); + + private static final Urn TEST_PROPERTY_URN = + UrnUtils.getUrn("urn:li:structuredProperty:io.acryl.privacy.retentionTime"); + private static final StructuredPropertyDefinition TEST_PROPERTY_DEFINITION = + new StructuredPropertyDefinition() + .setValueType(UrnUtils.getUrn("urn:li:type:datahub.string")) + .setVersion("00000000000001") + .setEntityTypes( + new UrnArray(List.of(UrnUtils.getUrn("urn:li:entityType:datahub.dataset")))) + .setQualifiedName("io.acryl.privacy.retentionTime"); + private static final Urn TEST_DATASET_URN = + UrnUtils.getUrn( + "urn:li:dataset:(urn:li:dataPlatform:postgres,calm-pagoda-323403.jaffle_shop.customers,PROD)"); + private AspectRetriever mockAspectRetriever; + private SearchRetriever mockSearchRetriever; + private RetrieverContext retrieverContext; + + @BeforeMethod + public void setup() { + mockAspectRetriever = mock(AspectRetriever.class); + when(mockAspectRetriever.getEntityRegistry()).thenReturn(TEST_REGISTRY); + when(mockAspectRetriever.getLatestAspectObject( + eq(TEST_PROPERTY_URN), eq(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME))) + .thenReturn(new Aspect(TEST_PROPERTY_DEFINITION.data())); + + mockSearchRetriever = mock(SearchRetriever.class); + ScrollResult scrollResult = new ScrollResult(); + scrollResult.setPageSize(1); + scrollResult.setNumEntities(1); + scrollResult.setEntities( + new SearchEntityArray(List.of(new SearchEntity().setEntity(TEST_DATASET_URN)))); + when(mockSearchRetriever.scroll( + eq(List.of("dataset")), eq(expectedFilter()), nullable(String.class), anyInt())) + .thenReturn(scrollResult); + + retrieverContext = + RetrieverContext.builder() + .searchRetriever(mockSearchRetriever) + .aspectRetriever(mockAspectRetriever) + .graphRetriever(TestOperationContexts.emptyGraphRetriever) + .build(); + } + + @Test + public void testDeletePropertyKey() { + PropertyDefinitionDeleteSideEffect test = new PropertyDefinitionDeleteSideEffect(); + test.setConfig(TEST_PLUGIN_CONFIG); + + List result = + test.postMCPSideEffect( + Set.of( + TestMCL.builder() + .changeType(ChangeType.DELETE) + .urn(TEST_PROPERTY_URN) + .entitySpec(TEST_REGISTRY.getEntitySpec("structuredProperty")) + .aspectSpec( + TEST_REGISTRY + .getEntitySpec("structuredProperty") + .getAspectSpec(STRUCTURED_PROPERTY_KEY_ASPECT_NAME)) + .build()), + retrieverContext) + .collect(Collectors.toList()); + + assertEquals(1, result.size()); + + verify(mockAspectRetriever, times(1)) + .getLatestAspectObject( + eq(TEST_PROPERTY_URN), eq(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)); + verify(mockSearchRetriever, times(1)) + .scroll(eq(List.of("dataset")), eq(expectedFilter()), nullable(String.class), anyInt()); + + JsonPatch expectedPatch = + Json.createPatchBuilder().remove("/properties/" + TEST_PROPERTY_URN).build(); + assertEquals(((PatchMCP) result.get(0)).getPatch(), expectedPatch); + } + + @Test + public void testDeletePropertyDefinition() { + PropertyDefinitionDeleteSideEffect test = new PropertyDefinitionDeleteSideEffect(); + test.setConfig(TEST_PLUGIN_CONFIG); + + List result = + test.postMCPSideEffect( + Set.of( + TestMCL.builder() + .changeType(ChangeType.DELETE) + .urn(TEST_PROPERTY_URN) + .entitySpec(TEST_REGISTRY.getEntitySpec("structuredProperty")) + .aspectSpec( + TEST_REGISTRY + .getEntitySpec("structuredProperty") + .getAspectSpec(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) + .previousRecordTemplate(TEST_PROPERTY_DEFINITION) + .build()), + retrieverContext) + .collect(Collectors.toList()); + + assertEquals(1, result.size()); + + verify(mockAspectRetriever, times(0)).getLatestAspectObject(any(), any()); + verify(mockAspectRetriever, times(0)).getLatestAspectObjects(any(), any()); + verify(mockSearchRetriever, times(1)) + .scroll(eq(List.of("dataset")), eq(expectedFilter()), nullable(String.class), anyInt()); + + JsonPatch expectedPatch = + Json.createPatchBuilder().remove("/properties/" + TEST_PROPERTY_URN).build(); + assertEquals(((PatchMCP) result.get(0)).getPatch(), expectedPatch); + } + + private static Filter expectedFilter() { + Filter propertyFilter = new Filter(); + final ConjunctiveCriterionArray disjunction = new ConjunctiveCriterionArray(); + final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); + final CriterionArray andCriterion = new CriterionArray(); + + final Criterion propertyExistsCriterion = new Criterion(); + propertyExistsCriterion.setField( + "structuredProperties._versioned.io_acryl_privacy_retentionTime.00000000000001.string"); + propertyExistsCriterion.setCondition(Condition.EXISTS); + + andCriterion.add(propertyExistsCriterion); + conjunction.setAnd(andCriterion); + disjunction.add(conjunction); + propertyFilter.setOr(disjunction); + + return propertyFilter; + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/StructuredPropertiesSoftDeleteTest.java similarity index 98% rename from entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/StructuredPropertiesSoftDeleteTest.java index e1e84f57285406..9109eeb7f96a5b 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/StructuredPropertiesSoftDeleteTest.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.aspect.hooks; +package com.linkedin.metadata.structuredproperties.hooks; import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; import static org.mockito.Mockito.mock; diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java similarity index 70% rename from entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java index 841cbf5a77becb..22224f16f2210b 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/PropertyDefinitionValidatorTest.java @@ -1,34 +1,27 @@ -package com.linkedin.metadata.aspect.validators; +package com.linkedin.metadata.structuredproperties.validators; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import static org.testng.Assert.*; +import static org.testng.AssertJUnit.assertEquals; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.Constants; +import com.linkedin.data.template.SetMode; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.GraphRetriever; import com.linkedin.metadata.aspect.RetrieverContext; -import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; -import com.linkedin.metadata.aspect.validation.PropertyDefinitionValidator; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.structuredproperties.validation.PropertyDefinitionValidator; import com.linkedin.structured.PrimitivePropertyValue; import com.linkedin.structured.PropertyCardinality; import com.linkedin.structured.PropertyValue; import com.linkedin.structured.PropertyValueArray; import com.linkedin.structured.StructuredPropertyDefinition; -import com.linkedin.structured.StructuredPropertyKey; import com.linkedin.test.metadata.aspect.TestEntityRegistry; import com.linkedin.test.metadata.aspect.batch.TestMCP; import java.net.URISyntaxException; -import java.util.List; -import java.util.Set; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -109,6 +102,7 @@ public void testCannotChangeMultipleToSingle() oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); StructuredPropertyDefinition newProperty = oldProperty.copy(); newProperty.setCardinality(PropertyCardinality.SINGLE); + newProperty.setVersion(null, SetMode.REMOVE_IF_NULL); assertEquals( PropertyDefinitionValidator.validateDefinitionUpserts( TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), @@ -117,6 +111,30 @@ public void testCannotChangeMultipleToSingle() 1); } + @Test + public void testCanChangeMultipleToSingleWithNewVersion() + throws URISyntaxException, CloneNotSupportedException { + StructuredPropertyDefinition oldProperty = new StructuredPropertyDefinition(); + oldProperty.setEntityTypes( + new UrnArray( + Urn.createFromString("urn:li:logicalEntity:dataset"), + Urn.createFromString("urn:li:logicalEntity:chart"), + Urn.createFromString("urn:li:logicalEntity:glossaryTerm"))); + oldProperty.setDisplayName("oldProp"); + oldProperty.setQualifiedName("prop3"); + oldProperty.setCardinality(PropertyCardinality.MULTIPLE); + oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + StructuredPropertyDefinition newProperty = oldProperty.copy(); + newProperty.setCardinality(PropertyCardinality.SINGLE); + newProperty.setVersion("00000000000001"); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockRetrieverContext) + .count(), + 0); + } + @Test public void testCannotChangeValueType() throws URISyntaxException, CloneNotSupportedException { StructuredPropertyDefinition oldProperty = new StructuredPropertyDefinition(); @@ -131,6 +149,7 @@ public void testCannotChangeValueType() throws URISyntaxException, CloneNotSuppo oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); StructuredPropertyDefinition newProperty = oldProperty.copy(); newProperty.setValueType(Urn.createFromString("urn:li:logicalType:NUMBER")); + newProperty.setVersion(null, SetMode.REMOVE_IF_NULL); assertEquals( PropertyDefinitionValidator.validateDefinitionUpserts( TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), @@ -139,6 +158,30 @@ public void testCannotChangeValueType() throws URISyntaxException, CloneNotSuppo 1); } + @Test + public void testCanChangeValueTypeWithNewVersion() + throws URISyntaxException, CloneNotSupportedException { + StructuredPropertyDefinition oldProperty = new StructuredPropertyDefinition(); + oldProperty.setEntityTypes( + new UrnArray( + Urn.createFromString("urn:li:logicalEntity:dataset"), + Urn.createFromString("urn:li:logicalEntity:chart"), + Urn.createFromString("urn:li:logicalEntity:glossaryTerm"))); + oldProperty.setDisplayName("oldProp"); + oldProperty.setQualifiedName("prop3"); + oldProperty.setCardinality(PropertyCardinality.MULTIPLE); + oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + StructuredPropertyDefinition newProperty = oldProperty.copy(); + newProperty.setValueType(Urn.createFromString("urn:li:logicalType:NUMBER")); + newProperty.setVersion("00000000000001"); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockRetrieverContext) + .count(), + 0); + } + @Test public void testCanChangeDisplayName() throws URISyntaxException, CloneNotSupportedException, AspectValidationException { @@ -185,6 +228,30 @@ public void testCannotChangeFullyQualifiedName() 1); } + @Test + public void testCannotChangeFullyQualifiedNameWithVersionChange() + throws URISyntaxException, CloneNotSupportedException { + StructuredPropertyDefinition oldProperty = new StructuredPropertyDefinition(); + oldProperty.setEntityTypes( + new UrnArray( + Urn.createFromString("urn:li:logicalEntity:dataset"), + Urn.createFromString("urn:li:logicalEntity:chart"), + Urn.createFromString("urn:li:logicalEntity:glossaryTerm"))); + oldProperty.setDisplayName("oldProp"); + oldProperty.setQualifiedName("prop3"); + oldProperty.setCardinality(PropertyCardinality.MULTIPLE); + oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + StructuredPropertyDefinition newProperty = oldProperty.copy(); + newProperty.setQualifiedName("newProp"); + newProperty.setVersion("00000000000001"); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockRetrieverContext) + .count(), + 1); + } + @Test public void testCannotChangeRestrictAllowedValues() throws URISyntaxException, CloneNotSupportedException { @@ -203,6 +270,7 @@ public void testCannotChangeRestrictAllowedValues() PropertyValue allowedValue = new PropertyValue().setValue(PrimitivePropertyValue.create(1.0)).setDescription("hello"); newProperty.setAllowedValues(new PropertyValueArray(allowedValue)); + newProperty.setVersion(null, SetMode.REMOVE_IF_NULL); assertEquals( PropertyDefinitionValidator.validateDefinitionUpserts( TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), @@ -222,6 +290,46 @@ public void testCannotChangeRestrictAllowedValues() 1); } + @Test + public void testCanChangeRestrictAllowedValuesWithVersionChange() + throws URISyntaxException, CloneNotSupportedException { + // No constraint -> constraint case + StructuredPropertyDefinition oldProperty = new StructuredPropertyDefinition(); + oldProperty.setEntityTypes( + new UrnArray( + Urn.createFromString("urn:li:logicalEntity:dataset"), + Urn.createFromString("urn:li:logicalEntity:chart"), + Urn.createFromString("urn:li:logicalEntity:glossaryTerm"))); + oldProperty.setDisplayName("oldProp"); + oldProperty.setQualifiedName("prop3"); + oldProperty.setCardinality(PropertyCardinality.MULTIPLE); + oldProperty.setValueType(Urn.createFromString("urn:li:logicalType:STRING")); + + StructuredPropertyDefinition newProperty = oldProperty.copy(); + newProperty.setVersion("00000000000001"); + PropertyValue allowedValue = + new PropertyValue().setValue(PrimitivePropertyValue.create(1.0)).setDescription("hello"); + newProperty.setAllowedValues(new PropertyValueArray(allowedValue)); + + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockRetrieverContext) + .count(), + 0); + + // Remove allowed values from constraint case + PropertyValue oldAllowedValue = + new PropertyValue().setValue(PrimitivePropertyValue.create(3.0)).setDescription("hello"); + oldProperty.setAllowedValues((new PropertyValueArray(allowedValue, oldAllowedValue))); + assertEquals( + PropertyDefinitionValidator.validateDefinitionUpserts( + TestMCP.ofOneMCP(testPropertyUrn, oldProperty, newProperty, entityRegistry), + mockRetrieverContext) + .count(), + 0); + } + @Test public void testCanExpandAllowedValues() throws URISyntaxException, CloneNotSupportedException, AspectValidationException { @@ -289,61 +397,4 @@ public void testCanChangeAllowedValueDescriptions() .count(), 0); } - - @Test - public void testHardDeleteBlock() { - PropertyDefinitionValidator test = - new PropertyDefinitionValidator() - .setConfig( - AspectPluginConfig.builder() - .enabled(true) - .className(PropertyDefinitionValidator.class.getName()) - .supportedOperations(List.of("DELETE")) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) - .aspectName(Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) - .build(), - AspectPluginConfig.EntityAspectName.builder() - .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) - .aspectName("structuredPropertyKey") - .build())) - .build()); - - assertEquals( - test.validateProposed( - Set.of( - TestMCP.builder() - .changeType(ChangeType.DELETE) - .urn(UrnUtils.getUrn("urn:li:structuredProperty:foo.bar")) - .entitySpec(entityRegistry.getEntitySpec("structuredProperty")) - .aspectSpec( - entityRegistry - .getEntitySpec(STRUCTURED_PROPERTY_ENTITY_NAME) - .getKeyAspectSpec()) - .recordTemplate(new StructuredPropertyKey()) - .build()), - mockRetrieverContext) - .count(), - 1); - - assertEquals( - test.validateProposed( - Set.of( - TestMCP.builder() - .changeType(ChangeType.DELETE) - .urn(UrnUtils.getUrn("urn:li:structuredProperty:foo.bar")) - .entitySpec(entityRegistry.getEntitySpec("structuredProperty")) - .aspectSpec( - entityRegistry - .getEntitySpec(STRUCTURED_PROPERTY_ENTITY_NAME) - .getAspectSpecMap() - .get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) - .recordTemplate(new StructuredPropertyDefinition()) - .build()), - mockRetrieverContext) - .count(), - 1); - } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/StructuredPropertiesValidatorTest.java similarity index 99% rename from entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/StructuredPropertiesValidatorTest.java index 77cf453f517beb..7a2b8fd69f3686 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/StructuredPropertiesValidatorTest.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.aspect.validators; +package com.linkedin.metadata.structuredproperties.validators; import static org.testng.Assert.assertEquals; @@ -7,8 +7,8 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; -import com.linkedin.metadata.aspect.validation.StructuredPropertiesValidator; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.structuredproperties.validation.StructuredPropertiesValidator; import com.linkedin.structured.PrimitivePropertyValue; import com.linkedin.structured.PrimitivePropertyValueArray; import com.linkedin.structured.PropertyValue; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java index 8643855162fa74..7067dd3a6763e7 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/systemmetadata/SystemMetadataServiceTestBase.java @@ -11,6 +11,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; import com.linkedin.mxe.SystemMetadata; +import java.util.Collections; import java.util.List; import javax.annotation.Nonnull; import org.opensearch.client.RestHighLevelClient; @@ -38,7 +39,7 @@ public abstract class SystemMetadataServiceTestBase extends AbstractTestNGSpring @BeforeClass public void setup() { _client = buildService(); - _client.configure(); + _client.reindexAll(Collections.emptySet()); } @BeforeMethod diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index 3ccd085cab9c00..b44f01d90dae40 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -58,6 +58,7 @@ import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; import java.util.Calendar; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.function.Function; @@ -128,7 +129,7 @@ public void setup() throws RemoteInvocationException, URISyntaxException { entityRegistry, new IndexConventionImpl("es_timeseries_aspect_service_test")); elasticSearchTimeseriesAspectService = buildService(); - elasticSearchTimeseriesAspectService.configure(); + elasticSearchTimeseriesAspectService.reindexAll(Collections.emptySet()); EntitySpec entitySpec = entityRegistry.getEntitySpec(ENTITY_NAME); aspectSpec = entitySpec.getAspectSpec(ASPECT_NAME); } diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index 5da970b46afc78..28a4a2b00cd6f1 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -40,6 +40,7 @@ import io.datahubproject.test.metadata.context.TestOperationContexts; import io.datahubproject.test.search.config.SearchCommonTestConfiguration; import java.io.IOException; +import java.util.Collections; import java.util.Map; import java.util.Optional; import java.util.Set; @@ -149,6 +150,7 @@ protected EntityIndexBuilders entityIndexBuildersHelper(OperationContext opConte Map.of(), true, false, + false, new ElasticSearchConfiguration(), gitVersion); SettingsBuilder settingsBuilder = new SettingsBuilder(null); @@ -252,7 +254,7 @@ public SearchService searchServiceHelper( ranker); // Build indices & write fixture data - indexBuilders.reindexAll(); + indexBuilders.reindexAll(Collections.emptySet()); FixtureReader.builder() .bulkProcessor(_bulkProcessor) diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index 34598821f43fdc..4cd818db34bf4b 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -39,6 +39,7 @@ import io.datahubproject.test.search.config.SearchCommonTestConfiguration; import io.datahubproject.test.search.config.SearchTestContainerConfiguration; import java.io.IOException; +import java.util.Collections; import java.util.Map; import java.util.Optional; import javax.annotation.Nonnull; @@ -100,6 +101,7 @@ protected EntityIndexBuilders entityIndexBuilders( Map.of(), true, false, + false, new ElasticSearchConfiguration(), gitVersion); SettingsBuilder settingsBuilder = new SettingsBuilder(null); @@ -151,6 +153,7 @@ protected ESIndexBuilder esIndexBuilder() { Map.of(), true, true, + false, new ElasticSearchConfiguration(), gitVersion); } @@ -174,7 +177,7 @@ protected ElasticSearchGraphService graphService( indexConvention, GraphQueryConfiguration.testDefaults), indexBuilder); - graphService.configure(); + graphService.reindexAll(Collections.emptySet()); return graphService; } @@ -225,7 +228,7 @@ protected SearchService searchService( ranker); // Build indices - indexBuilders.reindexAll(); + indexBuilders.reindexAll(Collections.emptySet()); return service; } diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java index f1fd371fdf7bd2..a71c40b70f2b41 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.AUTO_COMPLETE_ENTITY_TYPES; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES; +import static org.mockito.Mockito.mock; import com.datahub.authentication.Authentication; import com.datahub.plugins.auth.authorization.Authorizer; @@ -13,6 +14,7 @@ import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.SearchableEntityType; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.graph.LineageDirection; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.LineageSearchResult; @@ -183,7 +185,7 @@ public static LineageSearchResult lineage( .collect(Collectors.toList()), "*", hops, - ResolverUtils.buildFilter(filters, List.of()), + ResolverUtils.buildFilter(filters, List.of(), mock(AspectRetriever.class)), null, 0, 100); diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java index 0ddfd77399325e..ab6644ce6ff6de 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/config/SearchTestContainerConfiguration.java @@ -93,6 +93,7 @@ protected ESIndexBuilder getIndexBuilder( Map.of(), false, false, + false, new ElasticSearchConfiguration(), gitVersion); } diff --git a/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java index 35693ec96b3de3..489c424f565dd5 100644 --- a/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java +++ b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java @@ -23,6 +23,7 @@ "com.linkedin.metadata.dao.producer", "com.linkedin.gms.factory.config", "com.linkedin.gms.factory.entity.update.indices", + "com.linkedin.gms.factory.search", "com.linkedin.gms.factory.entityclient", "com.linkedin.gms.factory.form", "com.linkedin.gms.factory.incident", diff --git a/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java b/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java index b3f81551c830a8..47740b02d6166c 100644 --- a/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java +++ b/metadata-jobs/pe-consumer/src/test/java/com/datahub/event/hook/BusinessAttributeUpdateHookTest.java @@ -153,7 +153,7 @@ private void testMCLOnInvalidCategory() throws Exception { // verify Mockito.verifyNoInteractions(opContext.getRetrieverContext().get().getGraphRetriever()); - Mockito.verifyNoInteractions(opContext.getRetrieverContext().get().getAspectRetriever()); + Mockito.verifyNoInteractions(opContext.getAspectRetrieverOpt().get()); Mockito.verifyNoInteractions(mockUpdateIndicesService); } @@ -232,7 +232,7 @@ private OperationContext mockOperationContextWithGraph(List graph TestOperationContexts.systemContextNoSearchAuthorization(mockRetrieverContext); // reset mock for test - reset(opContext.getRetrieverContext().get().getAspectRetriever()); + reset(opContext.getAspectRetrieverOpt().get()); if (!graphEdges.isEmpty()) { diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java index de9d3419c216e0..af70b56f8ec111 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java @@ -4,6 +4,8 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import java.util.Collection; @@ -33,6 +35,12 @@ protected Stream applyMCPSideEffect( }); } + @Override + protected Stream postMCPSideEffect( + Collection collection, @Nonnull RetrieverContext retrieverContext) { + return Stream.empty(); + } + @Nonnull @Override public AspectPluginConfig getConfig() { diff --git a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl index 178d7b3cf4376a..bf0bf65099b2e8 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl @@ -78,5 +78,13 @@ record StructuredPropertyDefinition { "fieldType": "BOOLEAN" } immutable: boolean = false + + /** + * Definition version - Allows breaking schema changes. String is compared case-insensitive and new + * versions must be monotonically increasing. Cannot use periods/dots. + * Suggestions: v1, v2 + * 20240610, 20240611 + */ + version: optional string } diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 1e8044e3b5f86e..9c05c3d4851fc5 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -581,17 +581,22 @@ entities: events: plugins: aspectPayloadValidators: - - className: 'com.linkedin.metadata.aspect.validation.PropertyDefinitionValidator' + - className: 'com.linkedin.metadata.structuredproperties.validation.PropertyDefinitionValidator' + packageScan: + - 'com.linkedin.metadata.structuredproperties.validation' enabled: true supportedOperations: + - CREATE + - CREATE_ENTITY - UPSERT - - DELETE supportedEntityAspectNames: - entityName: structuredProperty aspectName: propertyDefinition - entityName: structuredProperty aspectName: structuredPropertyKey - - className: 'com.linkedin.metadata.aspect.validation.StructuredPropertiesValidator' + - className: 'com.linkedin.metadata.structuredproperties.validation.StructuredPropertiesValidator' + packageScan: + - 'com.linkedin.metadata.structuredproperties.validation' enabled: true supportedOperations: - CREATE @@ -608,8 +613,22 @@ plugins: supportedEntityAspectNames: - entityName: '*' aspectName: '*' + mcpSideEffects: + - className: 'com.linkedin.metadata.structuredproperties.hooks.PropertyDefinitionDeleteSideEffect' + packageScan: + - 'com.linkedin.metadata.structuredproperties.hooks' + enabled: true + supportedOperations: + - DELETE + supportedEntityAspectNames: + - entityName: structuredProperty + aspectName: propertyDefinition + - entityName: structuredProperty + aspectName: structuredPropertyKey mutationHooks: - - className: 'com.linkedin.metadata.aspect.hooks.StructuredPropertiesSoftDelete' + - className: 'com.linkedin.metadata.structuredproperties.hooks.StructuredPropertiesSoftDelete' + packageScan: + - 'com.linkedin.metadata.structuredproperties.hooks' enabled: true supportedEntityAspectNames: - entityName: '*' diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java index 56247d61337e8a..9928318268a3ea 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java @@ -7,6 +7,7 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.LineageFlags; import com.linkedin.metadata.query.SearchFlags; @@ -285,6 +286,15 @@ public Optional getRetrieverContext() { return Optional.ofNullable(retrieverContext); } + @Nullable + public AspectRetriever getAspectRetriever() { + return getAspectRetrieverOpt().orElse(null); + } + + public Optional getAspectRetrieverOpt() { + return getRetrieverContext().map(RetrieverContext::getAspectRetriever); + } + /** * Return a unique id for this context. Typically useful for building cache keys. We combine the * different context components to create a single string representation of the hashcode across diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java index 0d7e6359b771cf..9337fbfe3bb003 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/RetrieverContext.java @@ -2,6 +2,7 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.entity.SearchRetriever; import java.util.Optional; import javax.annotation.Nonnull; import lombok.Builder; @@ -14,6 +15,7 @@ public class RetrieverContext @Nonnull private final GraphRetriever graphRetriever; @Nonnull private final AspectRetriever aspectRetriever; + @Nonnull private final SearchRetriever searchRetriever; @Override public Optional getCacheKeyComponent() { diff --git a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java index 4c28ba037d3a14..0e8c165468a4b4 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/test/metadata/context/TestOperationContexts.java @@ -11,6 +11,7 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.GraphRetriever; import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.entity.SearchRetriever; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistryException; @@ -19,6 +20,8 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; @@ -55,7 +58,7 @@ public class TestOperationContexts { private static EntityRegistry defaultEntityRegistryInstance; - private static EntityRegistry defaultEntityRegistry() { + public static EntityRegistry defaultEntityRegistry() { if (defaultEntityRegistryInstance == null) { PathSpecBasedSchemaAnnotationVisitor.class .getClassLoader() @@ -84,6 +87,7 @@ public static AspectRetriever emptyAspectRetriever( } public static GraphRetriever emptyGraphRetriever = new EmptyGraphRetriever(); + public static SearchRetriever emptySearchRetriever = new EmptySearchRetriever(); public static RetrieverContext emptyRetrieverContext( @Nullable Supplier entityRegistrySupplier) { @@ -91,6 +95,7 @@ public static RetrieverContext emptyRetrieverContext( return RetrieverContext.builder() .aspectRetriever(emptyAspectRetriever(entityRegistrySupplier)) .graphRetriever(emptyGraphRetriever) + .searchRetriever(emptySearchRetriever) .build(); } @@ -121,6 +126,20 @@ public static OperationContext systemContextNoSearchAuthorization( null); } + public static OperationContext systemContextNoSearchAuthorization( + @Nullable AspectRetriever aspectRetriever) { + RetrieverContext retrieverContext = + RetrieverContext.builder() + .aspectRetriever(aspectRetriever) + .graphRetriever(emptyGraphRetriever) + .searchRetriever(emptySearchRetriever) + .build(); + return systemContextNoSearchAuthorization( + () -> retrieverContext.getAspectRetriever().getEntityRegistry(), + () -> retrieverContext, + null); + } + public static OperationContext systemContextNoSearchAuthorization( @Nullable Supplier retrieverContextSupplier, @Nullable IndexConvention indexConvention) { @@ -277,5 +296,21 @@ public RelatedEntitiesScrollResult scrollRelatedEntities( } } + public static class EmptySearchRetriever implements SearchRetriever { + + @Override + public ScrollResult scroll( + @Nonnull List entities, + @Nullable Filter filters, + @Nullable String scrollId, + int count) { + ScrollResult empty = new ScrollResult(); + empty.setEntities(new SearchEntityArray()); + empty.setNumEntities(0); + empty.setPageSize(0); + return empty; + } + } + private TestOperationContexts() {} } diff --git a/metadata-service/auth-impl/src/test/java/com/datahub/authentication/token/StatefulTokenServiceTest.java b/metadata-service/auth-impl/src/test/java/com/datahub/authentication/token/StatefulTokenServiceTest.java index bcea100ec733fa..d8b7c231535379 100644 --- a/metadata-service/auth-impl/src/test/java/com/datahub/authentication/token/StatefulTokenServiceTest.java +++ b/metadata-service/auth-impl/src/test/java/com/datahub/authentication/token/StatefulTokenServiceTest.java @@ -203,7 +203,8 @@ public void generateRevokeToken() throws TokenException { Mockito.when(mockService.exists(any(OperationContext.class), any(Urn.class), eq(true))) .thenReturn(true); - final RollbackRunResult result = new RollbackRunResult(ImmutableList.of(), 0); + final RollbackRunResult result = + new RollbackRunResult(ImmutableList.of(), 0, ImmutableList.of()); Mockito.when(mockService.deleteUrn(any(OperationContext.class), any(Urn.class))) .thenReturn(result); diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java index bb59949aa44bbe..6870d10c870016 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/context/SystemOperationContextFactory.java @@ -9,6 +9,8 @@ import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceAspectRetriever; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.search.SearchService; +import com.linkedin.metadata.search.SearchServiceSearchRetriever; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.OperationContextConfig; import io.datahubproject.metadata.context.RetrieverContext; @@ -42,7 +44,8 @@ protected OperationContext javaSystemOperationContext( @Nonnull final EntityRegistry entityRegistry, @Nonnull final EntityService entityService, @Nonnull final RestrictedService restrictedService, - @Nonnull final GraphRetriever graphRetriever) { + @Nonnull final GraphRetriever graphRetriever, + @Nonnull final SearchService searchService) { EntityServiceAspectRetriever entityServiceAspectRetriever = EntityServiceAspectRetriever.builder() @@ -50,6 +53,9 @@ protected OperationContext javaSystemOperationContext( .entityService(entityService) .build(); + SearchServiceSearchRetriever searchServiceSearchRetriever = + SearchServiceSearchRetriever.builder().searchService(searchService).build(); + OperationContext systemOperationContext = OperationContext.asSystem( operationContextConfig, @@ -60,15 +66,17 @@ protected OperationContext javaSystemOperationContext( RetrieverContext.builder() .aspectRetriever(entityServiceAspectRetriever) .graphRetriever(graphRetriever) + .searchRetriever(searchServiceSearchRetriever) .build()); entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext); + searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext); return systemOperationContext; } /** - * Used outside of GMS + * Used outside GMS * *

Entity Client and Aspect Retriever implemented by Restli call to GMS Entity Client and * Aspect Retriever client-side caching enabled @@ -82,11 +90,15 @@ protected OperationContext restliSystemOperationContext( @Nonnull @Qualifier("systemAuthentication") final Authentication systemAuthentication, @Nonnull final OperationContextConfig operationContextConfig, @Nonnull final RestrictedService restrictedService, - @Nonnull final GraphRetriever graphRetriever) { + @Nonnull final GraphRetriever graphRetriever, + @Nonnull final SearchService searchService) { EntityClientAspectRetriever entityServiceAspectRetriever = EntityClientAspectRetriever.builder().entityClient(systemEntityClient).build(); + SearchServiceSearchRetriever searchServiceSearchRetriever = + SearchServiceSearchRetriever.builder().searchService(searchService).build(); + OperationContext systemOperationContext = OperationContext.asSystem( operationContextConfig, @@ -97,9 +109,11 @@ protected OperationContext restliSystemOperationContext( RetrieverContext.builder() .aspectRetriever(entityServiceAspectRetriever) .graphRetriever(graphRetriever) + .searchRetriever(searchServiceSearchRetriever) .build()); entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext); + searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext); return systemOperationContext; } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java index ef7e8a8c6a8ac8..4513d9f065dcbb 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchIndexBuilderFactory.java @@ -57,6 +57,9 @@ public class ElasticSearchIndexBuilderFactory { @Value("#{new Boolean('${elasticsearch.index.enableMappingsReindex}')}") private boolean enableMappingsReindex; + @Value("#{new Boolean('${structuredProperties.systemUpdateEnabled}')}") + private boolean enableStructuredPropertiesReindex; + @Bean(name = "elasticSearchIndexSettingsOverrides") @Nonnull protected Map> getIndexSettingsOverrides( @@ -85,6 +88,7 @@ protected ESIndexBuilder getInstance( overrides, enableSettingsReindex, enableMappingsReindex, + enableStructuredPropertiesReindex, configurationProvider.getElasticSearch(), gitVersion); } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java index f242106e3fe4b4..638d1a6fd0c8ac 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java @@ -84,7 +84,7 @@ public void execute(@Nonnull OperationContext systemOperationContext) throws Exc .aspectName(DATA_PLATFORM_INSTANCE_ASPECT_NAME) .recordTemplate(dataPlatformInstance.get()) .auditStamp(aspectAuditStamp) - .build(systemOperationContext.getRetrieverContext().get().getAspectRetriever())); + .build(systemOperationContext.getAspectRetrieverOpt().get())); } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java index 648fd0f5853191..bfd45f0a52df21 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java @@ -14,6 +14,7 @@ import com.datahub.util.RecordUtils; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.data.ByteString; import com.linkedin.data.template.RecordTemplate; @@ -133,7 +134,8 @@ protected abstract AspectsBatch toMCPBatch( @Operation(summary = "Scroll entities") public ResponseEntity getEntities( @PathVariable("entityName") String entityName, - @RequestParam(value = "aspectNames", defaultValue = "") Set aspectNames, + @RequestParam(value = "aspectNames", defaultValue = "") Set aspects1, + @RequestParam(value = "aspects", defaultValue = "") Set aspects2, @RequestParam(value = "count", defaultValue = "10") Integer count, @RequestParam(value = "query", defaultValue = "*") String query, @RequestParam(value = "scrollId", required = false) String scrollId, @@ -187,7 +189,7 @@ public ResponseEntity getEntities( buildScrollResult( opContext, result.getEntities(), - aspectNames, + ImmutableSet.builder().addAll(aspects1).addAll(aspects2).build(), withSystemMetadata, result.getScrollId())); } @@ -199,7 +201,8 @@ public ResponseEntity getEntities( public ResponseEntity getEntity( @PathVariable("entityName") String entityName, @PathVariable("entityUrn") String entityUrn, - @RequestParam(value = "aspectNames", defaultValue = "") Set aspectNames, + @RequestParam(value = "aspectNames", defaultValue = "") Set aspects1, + @RequestParam(value = "aspects", defaultValue = "") Set aspects2, @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") Boolean withSystemMetadata) throws URISyntaxException { @@ -219,7 +222,12 @@ public ResponseEntity getEntity( authentication, true); - return buildEntityList(opContext, List.of(urn), aspectNames, withSystemMetadata).stream() + return buildEntityList( + opContext, + List.of(urn), + ImmutableSet.builder().addAll(aspects1).addAll(aspects2).build(), + withSystemMetadata) + .stream() .findFirst() .map(ResponseEntity::ok) .orElse(ResponseEntity.notFound().header(NOT_FOUND_HEADER, "ENTITY").build()); @@ -352,7 +360,7 @@ public void deleteEntity( authentication, true); - entityService.deleteAspect(opContext, entityUrn, entitySpec.getKeyAspectName(), Map.of(), true); + entityService.deleteUrn(opContext, urn); } @Tag(name = "Generic Entities") diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java index ed0e8c08585269..9872f45648d7b4 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java +++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java @@ -97,7 +97,7 @@ public void testAsyncDefaultAspects() throws URISyntaxException { .recordTemplate(mcp.getAspect()) .auditStamp(new AuditStamp()) .metadataChangeProposal(mcp) - .build(opContext.getRetrieverContext().get().getAspectRetriever()); + .build(opContext.getAspectRetrieverOpt().get()); when(aspectDao.runInTransactionWithRetry(any(), any(), anyInt())) .thenReturn( List.of(List.of( diff --git a/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java b/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java index d220883d24132f..aaf90d279e0bda 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java +++ b/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java @@ -40,9 +40,6 @@ public MockTimeseriesAspectService(long count, long filteredCount, String taskId this._taskId = taskId; } - @Override - public void configure() {} - @Override public long countByFilter( @Nonnull OperationContext operationContext, diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index 27358c4c0e2790..ac4c6895b757b6 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -443,12 +443,18 @@ void ingestEntity( void setRetentionService(RetentionService retentionService); - RollbackResult deleteAspect( + default RollbackResult deleteAspect( @Nonnull OperationContext opContext, String urn, String aspectName, @Nonnull Map conditions, - boolean hardDelete); + boolean hardDelete) { + AspectRowSummary aspectRowSummary = + new AspectRowSummary().setUrn(urn).setAspectName(aspectName); + return rollbackWithConditions(opContext, List.of(aspectRowSummary), conditions, hardDelete) + .getRollbackResults() + .get(0); + } RollbackRunResult deleteUrn(@Nonnull OperationContext opContext, Urn urn); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackResult.java index 9955a58c653395..143d6102f25233 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackResult.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackResult.java @@ -1,8 +1,13 @@ package com.linkedin.metadata.entity; +import static com.linkedin.metadata.utils.PegasusUtils.constructMCL; +import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; + +import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.SystemMetadata; import lombok.Value; @@ -18,4 +23,22 @@ public class RollbackResult { public ChangeType changeType; public Boolean keyAffected; public Integer additionalRowsAffected; + + public boolean isNoOp() { + return oldValue == newValue; + } + + public MetadataChangeLog toMCL(AuditStamp auditStamp) { + return constructMCL( + null, + urnToEntityName(urn), + urn, + changeType, + aspectName, + auditStamp, + newValue, + newSystemMetadata, + oldValue, + oldSystemMetadata); + } } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java index a8c558df77e57f..68ee66c2b50edf 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java @@ -8,4 +8,5 @@ public class RollbackRunResult { public List rowsRolledBack; public Integer rowsDeletedFromEntityDeletion; + public List rollbackResults; } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java index e85e0567f963ba..608a55674d8592 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java @@ -1,12 +1,19 @@ package com.linkedin.metadata.entity; +import static com.linkedin.metadata.utils.PegasusUtils.constructMCL; +import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; + import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.mxe.MetadataAuditOperation; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; import java.util.concurrent.Future; +import javax.annotation.Nullable; import lombok.Builder; import lombok.Value; @@ -15,13 +22,35 @@ public class UpdateAspectResult { Urn urn; ChangeMCP request; - RecordTemplate oldValue; + @Nullable RecordTemplate oldValue; RecordTemplate newValue; - SystemMetadata oldSystemMetadata; + @Nullable SystemMetadata oldSystemMetadata; SystemMetadata newSystemMetadata; MetadataAuditOperation operation; AuditStamp auditStamp; long maxVersion; + @Nullable MetadataChangeProposal mcp; + /* + Whether the MCL was written to Elasticsearch prior to emitting the MCL + */ boolean processedMCL; Future mclFuture; + + public boolean isNoOp() { + return oldValue == newValue; + } + + public MetadataChangeLog toMCL() { + return constructMCL( + request.getMetadataChangeProposal(), + urnToEntityName(urn), + urn, + isNoOp() ? ChangeType.RESTATE : ChangeType.UPSERT, + request.getAspectName(), + auditStamp, + newValue, + newSystemMetadata, + oldValue, + oldSystemMetadata); + } } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphService.java index ad8fdbbb068952..16305d6ec4bcf2 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphService.java @@ -314,7 +314,7 @@ void removeEdgesFromNode( @Nonnull final List relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter); - void configure(); + default void configure() {} /** Removes all edges and nodes from the graph. */ void clear(); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java index a253f9ffc25311..a3db4b029b68bb 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java @@ -15,7 +15,7 @@ public interface EntitySearchService { - void configure(); + default void configure() {} /** Clear all data within the service */ void clear(@Nonnull OperationContext opContext); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java index f06671ac3c314c..3880ad1d8da119 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java @@ -40,7 +40,7 @@ List findByRegistry( List listRuns( Integer pageOffset, Integer pageSize, boolean includeSoftDeleted); - void configure(); + default void configure() {} void clear(); } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java index accc0c8ceeb3b9..6b1f484ac0a518 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java @@ -18,7 +18,7 @@ public interface TimeseriesAspectService { /** Configure the Time-Series aspect service one time at boot-up. */ - void configure(); + default void configure() {} /** * Count the number of entries using a filter diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java index ffa0d600a2351d..07f91bb52fe109 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java @@ -85,8 +85,7 @@ private void writeSearchCsv(WebApplicationContext ctx, PrintWriter pw) { entitySpecOpt -> { EntitySpec entitySpec = entitySpecOpt.get(); SearchRequest searchRequest = - SearchRequestHandler.getBuilder( - entitySpec, searchConfiguration, null, aspectRetriever) + SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, null) .getSearchRequest( getOperationContext(ctx) .withSearchFlags( diff --git a/smoke-test/tests/structured_properties/test_structured_properties.py b/smoke-test/tests/structured_properties/test_structured_properties.py index 44beab930091de..bf1b5b1292750f 100644 --- a/smoke-test/tests/structured_properties/test_structured_properties.py +++ b/smoke-test/tests/structured_properties/test_structured_properties.py @@ -176,6 +176,15 @@ def get_property_from_entity( return None +def to_es_name(property_name=None, namespace=default_namespace, qualified_name=None): + if property_name: + namespace_field = namespace.replace(".", "_") + return f"structuredProperties.{namespace_field}_{property_name}" + else: + escaped_qualified_name = qualified_name.replace(".", "_") + return f"structuredProperties.{escaped_qualified_name}" + + # @tenacity.retry( # stop=tenacity.stop_after_attempt(sleep_times), # wait=tenacity.wait_fixed(sleep_sec), @@ -406,10 +415,6 @@ def test_dataset_yaml_loader(ingest_cleanup_data, graph): def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, caplog): - def to_es_name(property_name, namespace=default_namespace): - namespace_field = namespace.replace(".", "_") - return f"structuredProperties.{namespace_field}_{property_name}" - # Attach structured property to entity and to field field_property_name = f"deprecationDate{randint(10, 10000)}" @@ -573,28 +578,6 @@ def patch_one(prop_name, prop_value): assert actual_property_values == [property_value_other] -def test_dataset_structured_property_hard_delete(ingest_cleanup_data, graph, caplog): - property_name = f"hardDeleteTest{randint(10, 10000)}Property" - value_type = "string" - property_urn = f"urn:li:structuredProperty:{default_namespace}.{property_name}" - - create_property_definition( - property_name=property_name, graph=graph, value_type=value_type - ) - - test_property = StructuredProperties.from_datahub(graph=graph, urn=property_urn) - assert test_property is not None - - try: - graph.hard_delete_entity(urn=property_urn) - raise AssertionError("Should not be able to HARD delete structured property") - except Exception as e: - if "Hard delete of Structured Property Definitions is not supported" in str(e): - pass - else: - raise e - - def test_dataset_structured_property_soft_delete_validation( ingest_cleanup_data, graph, caplog ): @@ -685,10 +668,6 @@ def test_dataset_structured_property_soft_delete_read_mutation( def test_dataset_structured_property_soft_delete_search_filter_validation( ingest_cleanup_data, graph, caplog ): - def to_es_name(property_name, namespace=default_namespace): - namespace_field = namespace.replace(".", "_") - return f"structuredProperties.{namespace_field}_{property_name}" - # Create a test structured property dataset_property_name = f"softDeleteSearchFilter{randint(10, 10000)}" property_value = 30 @@ -744,3 +723,97 @@ def to_es_name(property_name, namespace=default_namespace): pass else: raise e + + +def test_dataset_structured_property_delete(ingest_cleanup_data, graph, caplog): + # Create property, assign value to target dataset urn + def create_property(target_dataset, prop_value): + property_name = f"hardDeleteTest{randint(10, 10000)}Property" + value_type = "string" + property_urn = f"urn:li:structuredProperty:{default_namespace}.{property_name}" + + create_property_definition( + property_name=property_name, + graph=graph, + value_type=value_type, + cardinality="SINGLE", + ) + + test_property = StructuredProperties.from_datahub(graph=graph, urn=property_urn) + assert test_property is not None + + # assign + dataset_patcher: DatasetPatchBuilder = DatasetPatchBuilder(urn=target_dataset) + dataset_patcher.set_structured_property( + StructuredPropertyUrn.make_structured_property_urn(property_urn), + prop_value, + ) + for mcp in dataset_patcher.build(): + graph.emit(mcp) + + return test_property + + # create and assign 2 structured properties with values + property1 = create_property(dataset_urns[0], "foo") + property2 = create_property(dataset_urns[0], "bar") + wait_for_writes_to_sync() + + # validate #1 & #2 properties assigned + assert get_property_from_entity( + dataset_urns[0], + property1.qualified_name, + graph=graph, + ) == ["foo"] + assert get_property_from_entity( + dataset_urns[0], + property2.qualified_name, + graph=graph, + ) == ["bar"] + + def validate_search(qualified_name, expected): + entity_urns = list( + graph.get_urns_by_filter( + extraFilters=[ + { + "field": to_es_name(qualified_name=qualified_name), + "negated": "false", + "condition": "EXISTS", + } + ] + ) + ) + assert entity_urns == expected + + # Validate search works for property #1 & #2 + validate_search(property1.qualified_name, expected=[dataset_urns[0]]) + validate_search(property2.qualified_name, expected=[dataset_urns[0]]) + + # delete the structured property #1 + graph.hard_delete_entity(urn=property1.urn) + wait_for_writes_to_sync() + + # validate property #1 deleted and property #2 remains + assert ( + get_property_from_entity( + dataset_urns[0], + property1.qualified_name, + graph=graph, + ) + is None + ) + assert get_property_from_entity( + dataset_urns[0], + property2.qualified_name, + graph=graph, + ) == ["bar"] + + # assert property 1 definition was removed + property1_definition = graph.get_aspect( + property1.urn, StructuredPropertyDefinitionClass + ) + assert property1_definition is None + + wait_for_writes_to_sync() + # Validate search works for property #1 & #2 + validate_search(property1.qualified_name, expected=[]) + validate_search(property2.qualified_name, expected=[dataset_urns[0]])