From 02416b7faf367a9a0bd36157326a9e0dcf3c6f08 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Thu, 12 Dec 2024 09:41:20 -0600 Subject: [PATCH 01/10] feat(openapi-v3): add minimal timeseries aspect support (#12096) --- .../java/com/linkedin/metadata/Constants.java | 2 + .../metadata/entity/EntityServiceImpl.java | 13 +- .../ElasticSearchTimeseriesAspectService.java | 84 +++++++++- .../TimeseriesAspectServiceTestBase.java | 4 +- .../TimeseriesAspectServiceUnitTest.java | 4 +- .../config/DataHubAppConfiguration.java | 3 + .../config/ExecutorServiceConfig.java | 16 ++ .../config/TimeseriesAspectServiceConfig.java | 14 ++ .../src/main/resources/application.yaml | 6 + ...cSearchTimeseriesAspectServiceFactory.java | 7 +- .../controller/GenericEntitiesController.java | 98 ++++++++--- .../v2/controller/EntityController.java | 5 +- .../openapi/v3/OpenAPIV3Generator.java | 61 ++++--- .../v3/controller/EntityController.java | 77 +++++++-- .../v3/controller/EntityControllerTest.java | 76 +++++++++ .../mock/MockTimeseriesAspectService.java | 12 ++ .../timeseries/TimeseriesAspectService.java | 18 ++ smoke-test/tests/openapi/test_openapi.py | 4 + smoke-test/tests/openapi/v3/timeseries.json | 156 ++++++++++++++++++ 19 files changed, 596 insertions(+), 64 deletions(-) create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/ExecutorServiceConfig.java create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/TimeseriesAspectServiceConfig.java create mode 100644 smoke-test/tests/openapi/v3/timeseries.json diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 797055d5fb6a93..ff6a79108600a3 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -108,6 +108,8 @@ public class Constants { // Common public static final String OWNERSHIP_ASPECT_NAME = "ownership"; + public static final String TIMESTAMP_MILLIS = "timestampMillis"; + public static final String INSTITUTIONAL_MEMORY_ASPECT_NAME = "institutionalMemory"; public static final String DATA_PLATFORM_INSTANCE_ASPECT_NAME = "dataPlatformInstance"; public static final String BROWSE_PATHS_ASPECT_NAME = "browsePaths"; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index d14990f93d22d9..9a05f54cf04c29 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -1265,6 +1265,7 @@ private Stream ingestTimeseriesProposal( return timeseriesResults.stream() .map( result -> { + MCPItem item = result.getFirst(); Optional, Boolean>> emissionStatus = result.getSecond(); emissionStatus.ifPresent( @@ -1276,10 +1277,16 @@ private Stream ingestTimeseriesProposal( } }); - MCPItem request = result.getFirst(); return IngestResult.builder() - .urn(request.getUrn()) - .request(request) + .urn(item.getUrn()) + .request(item) + .result( + UpdateAspectResult.builder() + .urn(item.getUrn()) + .newValue(item.getRecordTemplate()) + .auditStamp(item.getAuditStamp()) + .newSystemMetadata(item.getSystemMetadata()) + .build()) .publishedMCL( emissionStatus.map(status -> status.getFirst() != null).orElse(false)) .processedMCL(emissionStatus.map(Pair::getSecond).orElse(false)) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index 67518121edae4e..4d940c229dc9af 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -13,6 +13,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.ByteString; import com.linkedin.metadata.aspect.EnvelopedAspect; +import com.linkedin.metadata.config.TimeseriesAspectServiceConfig; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; @@ -53,8 +54,15 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -103,18 +111,29 @@ public class ElasticSearchTimeseriesAspectService private final RestHighLevelClient searchClient; private final ESAggregatedStatsDAO esAggregatedStatsDAO; private final QueryFilterRewriteChain queryFilterRewriteChain; + private final ExecutorService queryPool; public ElasticSearchTimeseriesAspectService( @Nonnull RestHighLevelClient searchClient, @Nonnull TimeseriesAspectIndexBuilders indexBuilders, @Nonnull ESBulkProcessor bulkProcessor, int numRetries, - @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { + @Nonnull QueryFilterRewriteChain queryFilterRewriteChain, + @Nonnull TimeseriesAspectServiceConfig timeseriesAspectServiceConfig) { this.indexBuilders = indexBuilders; this.searchClient = searchClient; this.bulkProcessor = bulkProcessor; this.numRetries = numRetries; this.queryFilterRewriteChain = queryFilterRewriteChain; + this.queryPool = + new ThreadPoolExecutor( + timeseriesAspectServiceConfig.getQuery().getConcurrency(), // core threads + timeseriesAspectServiceConfig.getQuery().getConcurrency(), // max threads + timeseriesAspectServiceConfig.getQuery().getKeepAlive(), + TimeUnit.SECONDS, // thread keep-alive time + new ArrayBlockingQueue<>( + timeseriesAspectServiceConfig.getQuery().getQueueSize()), // fixed size queue + new ThreadPoolExecutor.CallerRunsPolicy()); esAggregatedStatsDAO = new ESAggregatedStatsDAO(searchClient, queryFilterRewriteChain); } @@ -400,6 +419,69 @@ public List getAspectValues( .collect(Collectors.toList()); } + @Nonnull + @Override + public Map> getLatestTimeseriesAspectValues( + @Nonnull OperationContext opContext, + @Nonnull Set urns, + @Nonnull Set aspectNames, + @Nullable Map endTimeMillis) { + Map>>> futures = + urns.stream() + .map( + urn -> { + List>> aspectFutures = + aspectNames.stream() + .map( + aspectName -> + queryPool.submit( + () -> { + List oneResultList = + getAspectValues( + opContext, + urn, + urn.getEntityType(), + aspectName, + null, + endTimeMillis == null + ? null + : endTimeMillis.get(aspectName), + 1, + null, + null); + return !oneResultList.isEmpty() + ? Pair.of(aspectName, oneResultList.get(0)) + : null; + })) + .collect(Collectors.toList()); + + return Map.entry(urn, aspectFutures); + }) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + return futures.entrySet().stream() + .map( + e -> + Map.entry( + e.getKey(), + e.getValue().stream() + .map( + f -> { + try { + return f.get(); + } catch (InterruptedException | ExecutionException ex) { + throw new RuntimeException(ex); + } + }) + .filter(Objects::nonNull) + .collect(Collectors.toList()))) + .collect( + Collectors.toMap( + Map.Entry::getKey, + e -> + e.getValue().stream().collect(Collectors.toMap(Pair::getKey, Pair::getValue)))); + } + @Override @Nonnull public GenericTable getAggregatedStats( diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index faf616b0fb3cff..e8420e92e5a5fe 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -26,6 +26,7 @@ import com.linkedin.data.template.StringMap; import com.linkedin.data.template.StringMapArray; import com.linkedin.metadata.aspect.EnvelopedAspect; +import com.linkedin.metadata.config.TimeseriesAspectServiceConfig; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.DataSchemaFactory; import com.linkedin.metadata.models.EntitySpec; @@ -151,7 +152,8 @@ private ElasticSearchTimeseriesAspectService buildService() { opContext.getSearchContext().getIndexConvention()), getBulkProcessor(), 1, - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + TimeseriesAspectServiceConfig.builder().build()); } /* diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java index db9d8b450ef7a6..9a21c5337db864 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceUnitTest.java @@ -5,6 +5,7 @@ import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.NumericNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.linkedin.metadata.config.TimeseriesAspectServiceConfig; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.timeseries.TimeseriesAspectService; @@ -44,7 +45,8 @@ public class TimeseriesAspectServiceUnitTest { _timeseriesAspectIndexBuilders, _bulkProcessor, 0, - QueryFilterRewriteChain.EMPTY); + QueryFilterRewriteChain.EMPTY, + TimeseriesAspectServiceConfig.builder().build()); private final OperationContext opContext = TestOperationContexts.systemContextNoSearchAuthorization(_indexConvention); diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java index cc96429c65e76b..fb7a5103952afe 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubAppConfiguration.java @@ -58,4 +58,7 @@ public class DataHubAppConfiguration { /** MCP throttling configuration */ private MetadataChangeProposalConfig metadataChangeProposal; + + /** Timeseries Aspect Service configuration */ + private TimeseriesAspectServiceConfig timeseriesAspectService; } diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/ExecutorServiceConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/ExecutorServiceConfig.java new file mode 100644 index 00000000000000..d319ce213e303d --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/ExecutorServiceConfig.java @@ -0,0 +1,16 @@ +package com.linkedin.metadata.config; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder(toBuilder = true) +@AllArgsConstructor +@NoArgsConstructor +public class ExecutorServiceConfig { + @Builder.Default private int concurrency = 2; + @Builder.Default private int queueSize = 100; + @Builder.Default private int keepAlive = 60; +} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/TimeseriesAspectServiceConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/TimeseriesAspectServiceConfig.java new file mode 100644 index 00000000000000..3708e05d5cb67f --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/TimeseriesAspectServiceConfig.java @@ -0,0 +1,14 @@ +package com.linkedin.metadata.config; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder(toBuilder = true) +@AllArgsConstructor +@NoArgsConstructor +public class TimeseriesAspectServiceConfig { + @Builder.Default private ExecutorServiceConfig query = ExecutorServiceConfig.builder().build(); +} diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index e97120ec751185..9348416606d0a9 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -118,6 +118,12 @@ searchService: pageSize: ${SEARCH_SERVICE_FILTER_DOMAIN_EXPANSION_PAGE_SIZE:100} limit: ${SEARCH_SERVICE_FILTER_DOMAIN_EXPANSION_LIMIT:100} +timeseriesAspectService: + query: + concurrency: ${TIMESERIES_ASPECT_SERVICE_QUERY_CONCURRENCY:10} # parallel threads + queueSize: ${TIMESERIES_ASPECT_SERVICE_QUERY)QUEUE_SIZE:500} + threadKeepAlive: ${TIMESERIES_ASPECT_SERVICE_QUERY_THREAD_KEEP_ALIVE:60} + configEntityRegistry: path: ${ENTITY_REGISTRY_CONFIG_PATH:../../metadata-models/src/main/resources/entity-registry.yml} # Priority is given to the `path` setting above (outside jar) diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java index e26de0e7301951..85c58533df85f5 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/timeseries/ElasticSearchTimeseriesAspectServiceFactory.java @@ -1,5 +1,6 @@ package com.linkedin.gms.factory.timeseries; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -27,13 +28,15 @@ public class ElasticSearchTimeseriesAspectServiceFactory { @Bean(name = "elasticSearchTimeseriesAspectService") @Nonnull protected ElasticSearchTimeseriesAspectService getInstance( - final QueryFilterRewriteChain queryFilterRewriteChain) { + final QueryFilterRewriteChain queryFilterRewriteChain, + final ConfigurationProvider configurationProvider) { return new ElasticSearchTimeseriesAspectService( components.getSearchClient(), new TimeseriesAspectIndexBuilders( components.getIndexBuilder(), entityRegistry, components.getIndexConvention()), components.getBulkProcessor(), components.getNumRetries(), - queryFilterRewriteChain); + queryFilterRewriteChain, + configurationProvider.getTimeseriesAspectService()); } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java index 32252e80330646..579a62c084999a 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/controller/GenericEntitiesController.java @@ -1,5 +1,6 @@ package io.datahubproject.openapi.controller; +import static com.linkedin.metadata.Constants.TIMESTAMP_MILLIS; import static com.linkedin.metadata.authorization.ApiOperation.CREATE; import static com.linkedin.metadata.authorization.ApiOperation.DELETE; import static com.linkedin.metadata.authorization.ApiOperation.EXISTS; @@ -32,14 +33,20 @@ import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.search.SearchService; +import com.linkedin.metadata.search.utils.QueryUtils; +import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.metadata.utils.CriterionUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.SearchUtil; import com.linkedin.mxe.SystemMetadata; +import com.linkedin.timeseries.TimeseriesAspectBase; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; @@ -84,6 +91,7 @@ public abstract class GenericEntitiesController< @Autowired protected EntityRegistry entityRegistry; @Autowired protected SearchService searchService; @Autowired protected EntityService entityService; + @Autowired protected TimeseriesAspectService timeseriesAspectService; @Autowired protected AuthorizerChain authorizationChain; @Autowired protected ObjectMapper objectMapper; @@ -119,8 +127,8 @@ protected List buildEntityList( boolean expandEmpty) throws URISyntaxException { - LinkedHashMap> versionMap = - resolveAspectNames( + LinkedHashMap> aspectSpecMap = + resolveAspectSpecs( urns.stream() .map( urn -> @@ -141,7 +149,7 @@ protected List buildEntityList( expandEmpty); return buildEntityVersionedAspectList( - opContext, urns, versionMap, withSystemMetadata, expandEmpty); + opContext, urns, aspectSpecMap, withSystemMetadata, expandEmpty); } /** @@ -158,7 +166,7 @@ protected List buildEntityList( protected abstract List buildEntityVersionedAspectList( @Nonnull OperationContext opContext, Collection requestedUrns, - LinkedHashMap> fetchUrnAspectVersions, + LinkedHashMap> fetchUrnAspectVersions, boolean withSystemMetadata, boolean expandEmpty) throws URISyntaxException; @@ -390,7 +398,8 @@ public ResponseEntity getAspect( buildEntityVersionedAspectList( opContext, List.of(urn), - new LinkedHashMap<>(Map.of(urn, Map.of(aspectName, version))), + resolveAspectSpecs( + new LinkedHashMap<>(Map.of(urn, Map.of(aspectName, version))), 0L, true), withSystemMetadata, true); } @@ -561,9 +570,30 @@ public void deleteAspect( lookupAspectSpec(urn, aspectName) .ifPresent( - aspectSpec -> + aspectSpec -> { + if (aspectSpec.isTimeseries()) { + Map> latestMap = + timeseriesAspectService.getLatestTimeseriesAspectValues( + opContext, Set.of(urn), Set.of(aspectSpec.getName()), null); + com.linkedin.metadata.aspect.EnvelopedAspect latestAspect = + latestMap.getOrDefault(urn, Map.of()).get(aspectSpec.getName()); + if (latestAspect != null) { + Long latestTs = + new TimeseriesAspectBase(toRecordTemplate(aspectSpec, latestAspect).data()) + .getTimestampMillis(); + timeseriesAspectService.deleteAspectValues( + opContext, + urn.getEntityType(), + aspectSpec.getName(), + QueryUtils.newFilter( + CriterionUtils.buildCriterion( + TIMESTAMP_MILLIS, Condition.EQUAL, String.valueOf(latestTs)))); + } + } else { entityService.deleteAspect( - opContext, entityUrn, aspectSpec.getName(), Map.of(), true)); + opContext, entityUrn, aspectSpec.getName(), Map.of(), true); + } + }); } @Tag(name = "Generic Aspects") @@ -627,18 +657,19 @@ public ResponseEntity createAspect( if (!async) { return ResponseEntity.of( results.stream() - .filter(item -> aspectName.equals(item.getRequest().getAspectName())) + .filter(item -> aspectSpec.getName().equals(item.getRequest().getAspectName())) .findFirst() .map( result -> - buildGenericEntity(aspectName, result.getResult(), withSystemMetadata))); + buildGenericEntity( + aspectSpec.getName(), result.getResult(), withSystemMetadata))); } else { return results.stream() - .filter(item -> aspectName.equals(item.getRequest().getAspectName())) + .filter(item -> aspectSpec.getName().equals(item.getRequest().getAspectName())) .map( result -> ResponseEntity.accepted() - .body(buildGenericEntity(aspectName, result, withSystemMetadata))) + .body(buildGenericEntity(aspectSpec.getName(), result, withSystemMetadata))) .findFirst() .orElse(ResponseEntity.accepted().build()); } @@ -739,7 +770,7 @@ protected Boolean exists( * @param expandEmpty whether to expand empty aspect names to all aspect names * @return updated map */ - protected LinkedHashMap> resolveAspectNames( + protected LinkedHashMap> resolveAspectSpecs( LinkedHashMap> requestedAspectNames, @Nonnull T defaultValue, boolean expandEmpty) { @@ -749,10 +780,9 @@ protected LinkedHashMap> resolveAspectNames( final Urn urn = entry.getKey(); if (expandEmpty && (entry.getValue().isEmpty() || entry.getValue().containsKey(""))) { // All aspects specified - Set allNames = - entityRegistry.getEntitySpec(urn.getEntityType()).getAspectSpecs().stream() - .map(AspectSpec::getName) - .collect(Collectors.toSet()); + Set allNames = + new HashSet<>( + entityRegistry.getEntitySpec(urn.getEntityType()).getAspectSpecs()); return Map.entry( urn, allNames.stream() @@ -762,15 +792,14 @@ protected LinkedHashMap> resolveAspectNames( aspectName, entry.getValue().getOrDefault("", defaultValue))) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); } else if (!entry.getValue().keySet().isEmpty()) { - final Map normalizedNames = + final Map normalizedNames = entry.getValue().keySet().stream() .map( requestAspectName -> Map.entry( requestAspectName, lookupAspectSpec(urn, requestAspectName))) .filter(aspectSpecEntry -> aspectSpecEntry.getValue().isPresent()) - .collect( - Collectors.toMap(Map.Entry::getKey, e -> e.getValue().get().getName())); + .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().get())); return Map.entry( urn, entry.getValue().entrySet().stream() @@ -781,7 +810,7 @@ requestAspectName, lookupAspectSpec(urn, requestAspectName))) normalizedNames.get(reqEntry.getKey()), reqEntry.getValue())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); } else { - return (Map.Entry>) null; + return (Map.Entry>) null; } }) .filter(Objects::nonNull) @@ -795,6 +824,27 @@ requestAspectName, lookupAspectSpec(urn, requestAspectName))) LinkedHashMap::new)); } + protected static LinkedHashMap> aspectSpecsToAspectNames( + LinkedHashMap> urnAspectSpecsMap, boolean timeseries) { + return urnAspectSpecsMap.entrySet().stream() + .map( + e -> + Map.entry( + e.getKey(), + e.getValue().entrySet().stream() + .filter(a -> timeseries == a.getKey().isTimeseries()) + .map(a -> Map.entry(a.getKey().getName(), a.getValue())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))) + .collect( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (a, b) -> { + throw new IllegalStateException("Duplicate key"); + }, + LinkedHashMap::new)); + } + protected Map> toAspectMap( Urn urn, List aspects, boolean withSystemMetadata) { return aspects.stream() @@ -818,6 +868,14 @@ protected RecordTemplate toRecordTemplate( aspectSpec.getDataTemplateClass(), envelopedAspect.getValue().data()); } + protected RecordTemplate toRecordTemplate( + AspectSpec aspectSpec, com.linkedin.metadata.aspect.EnvelopedAspect envelopedAspect) { + return GenericRecordUtils.deserializeAspect( + envelopedAspect.getAspect().getValue(), + envelopedAspect.getAspect().getContentType(), + aspectSpec); + } + protected abstract ChangeMCP toUpsertItem( @Nonnull AspectRetriever aspectRetriever, Urn entityUrn, diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index 05c5b8ee025ddb..56a7955b9fe871 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -219,13 +219,14 @@ protected AspectsBatch toMCPBatch( protected List buildEntityVersionedAspectList( @Nonnull OperationContext opContext, Collection requestedUrns, - LinkedHashMap> urnAspectVersions, + LinkedHashMap> urnAspectVersions, boolean withSystemMetadata, boolean expandEmpty) throws URISyntaxException { + Map> aspects = entityService.getEnvelopedVersionedAspects( - opContext, resolveAspectNames(urnAspectVersions, 0L, true), true); + opContext, aspectSpecsToAspectNames(urnAspectVersions, false), true); return urnAspectVersions.keySet().stream() .map( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index 3c35a5c1984c1d..c6b8d579d879e0 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -200,8 +200,7 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { String.format( "/v3/entity/%s/{urn}/%s", e.getName().toLowerCase(), a.getName().toLowerCase()), - buildSingleEntityAspectPath( - e, a.getName(), a.getPegasusSchema().getName()))); + buildSingleEntityAspectPath(e, a))); }); return new OpenAPI().openapi("3.0.1").info(info).paths(paths).components(components); } @@ -998,10 +997,10 @@ private static Schema buildAspectPatchSchema() { } private static PathItem buildSingleEntityAspectPath( - final EntitySpec entity, final String aspect, final String upperFirstAspect) { + final EntitySpec entity, final AspectSpec aspectSpec) { final String upperFirstEntity = toUpperFirst(entity.getName()); - List tags = List.of(aspect + " Aspect"); + List tags = List.of(aspectSpec.getName() + " Aspect"); // Get Operation final Parameter getParameter = new Parameter() @@ -1013,7 +1012,10 @@ private static PathItem buildSingleEntityAspectPath( new Parameter() .in(NAME_QUERY) .name(NAME_VERSION) - .description("Return a specific aspect version.") + .description( + aspectSpec.isTimeseries() + ? "This aspect is a `timeseries` aspect, version=0 indicates the most recent aspect should be return. Otherwise return the most recent <= to version as epoch milliseconds." + : "Return a specific aspect version of the aspect.") .schema(new Schema().type(TYPE_INTEGER)._default(0).minimum(BigDecimal.ZERO)); final ApiResponse successApiResponse = new ApiResponse() @@ -1028,25 +1030,27 @@ private static PathItem buildSingleEntityAspectPath( .$ref( String.format( "#/components/schemas/%s%s", - upperFirstAspect, ASPECT_RESPONSE_SUFFIX))))); + aspectSpec.getPegasusSchema().getName(), + ASPECT_RESPONSE_SUFFIX))))); final Operation getOperation = new Operation() - .summary(String.format("Get %s for %s.", aspect, entity.getName())) + .summary(String.format("Get %s for %s.", aspectSpec.getName(), entity.getName())) .tags(tags) .parameters(List.of(getParameter, versionParameter)) .responses(new ApiResponses().addApiResponse("200", successApiResponse)); // Head Operation final ApiResponse successHeadResponse = new ApiResponse() - .description(String.format("%s on %s exists.", aspect, entity.getName())) + .description(String.format("%s on %s exists.", aspectSpec.getName(), entity.getName())) .content(new Content().addMediaType("application/json", new MediaType())); final ApiResponse notFoundHeadResponse = new ApiResponse() - .description(String.format("%s on %s does not exist.", aspect, entity.getName())) + .description( + String.format("%s on %s does not exist.", aspectSpec.getName(), entity.getName())) .content(new Content().addMediaType("application/json", new MediaType())); final Operation headOperation = new Operation() - .summary(String.format("%s on %s existence.", aspect, upperFirstEntity)) + .summary(String.format("%s on %s existence.", aspectSpec.getName(), upperFirstEntity)) .tags(tags) .parameters( List.of( @@ -1062,17 +1066,21 @@ private static PathItem buildSingleEntityAspectPath( // Delete Operation final ApiResponse successDeleteResponse = new ApiResponse() - .description(String.format("Delete %s on %s entity.", aspect, upperFirstEntity)) + .description( + String.format("Delete %s on %s entity.", aspectSpec.getName(), upperFirstEntity)) .content(new Content().addMediaType("application/json", new MediaType())); final Operation deleteOperation = new Operation() - .summary(String.format("Delete %s on entity %s", aspect, upperFirstEntity)) + .summary( + String.format("Delete %s on entity %s", aspectSpec.getName(), upperFirstEntity)) .tags(tags) .responses(new ApiResponses().addApiResponse("200", successDeleteResponse)); // Post Operation final ApiResponse successPostResponse = new ApiResponse() - .description(String.format("Create aspect %s on %s entity.", aspect, upperFirstEntity)) + .description( + String.format( + "Create aspect %s on %s entity.", aspectSpec.getName(), upperFirstEntity)) .content( new Content() .addMediaType( @@ -1083,10 +1091,13 @@ private static PathItem buildSingleEntityAspectPath( .$ref( String.format( "#/components/schemas/%s%s", - upperFirstAspect, ASPECT_RESPONSE_SUFFIX))))); + aspectSpec.getPegasusSchema().getName(), + ASPECT_RESPONSE_SUFFIX))))); final RequestBody requestBody = new RequestBody() - .description(String.format("Create aspect %s on %s entity.", aspect, upperFirstEntity)) + .description( + String.format( + "Create aspect %s on %s entity.", aspectSpec.getName(), upperFirstEntity)) .required(true) .content( new Content() @@ -1098,10 +1109,12 @@ private static PathItem buildSingleEntityAspectPath( .$ref( String.format( "#/components/schemas/%s%s", - upperFirstAspect, ASPECT_REQUEST_SUFFIX))))); + aspectSpec.getPegasusSchema().getName(), + ASPECT_REQUEST_SUFFIX))))); final Operation postOperation = new Operation() - .summary(String.format("Create aspect %s on %s ", aspect, upperFirstEntity)) + .summary( + String.format("Create aspect %s on %s ", aspectSpec.getName(), upperFirstEntity)) .tags(tags) .parameters( List.of( @@ -1130,7 +1143,9 @@ private static PathItem buildSingleEntityAspectPath( // Patch Operation final ApiResponse successPatchResponse = new ApiResponse() - .description(String.format("Patch aspect %s on %s entity.", aspect, upperFirstEntity)) + .description( + String.format( + "Patch aspect %s on %s entity.", aspectSpec.getName(), upperFirstEntity)) .content( new Content() .addMediaType( @@ -1141,10 +1156,13 @@ private static PathItem buildSingleEntityAspectPath( .$ref( String.format( "#/components/schemas/%s%s", - upperFirstAspect, ASPECT_RESPONSE_SUFFIX))))); + aspectSpec.getPegasusSchema().getName(), + ASPECT_RESPONSE_SUFFIX))))); final RequestBody patchRequestBody = new RequestBody() - .description(String.format("Patch aspect %s on %s entity.", aspect, upperFirstEntity)) + .description( + String.format( + "Patch aspect %s on %s entity.", aspectSpec.getName(), upperFirstEntity)) .required(true) .content( new Content() @@ -1161,7 +1179,8 @@ private static PathItem buildSingleEntityAspectPath( .name(NAME_SYSTEM_METADATA) .description("Include systemMetadata with response.") .schema(new Schema().type(TYPE_BOOLEAN)._default(false)))) - .summary(String.format("Patch aspect %s on %s ", aspect, upperFirstEntity)) + .summary( + String.format("Patch aspect %s on %s ", aspectSpec.getName(), upperFirstEntity)) .tags(tags) .requestBody(patchRequestBody) .responses(new ApiResponses().addApiResponse("200", successPatchResponse)); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index 5544fb845b2687..ce7fd73f99b9e5 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -99,7 +99,7 @@ public ResponseEntity> getEntityBatch( @RequestBody @Nonnull String jsonEntityList) throws URISyntaxException, JsonProcessingException { - LinkedHashMap> requestMap = toEntityVersionRequest(jsonEntityList); + LinkedHashMap> requestMap = toEntityVersionRequest(jsonEntityList); Authentication authentication = AuthenticationContext.getAuthentication(); OperationContext opContext = @@ -243,7 +243,7 @@ public GenericEntityScrollResultV3 buildScrollResult( protected List buildEntityVersionedAspectList( @Nonnull OperationContext opContext, Collection requestedUrns, - LinkedHashMap> urnAspectVersions, + LinkedHashMap> urnAspectVersions, boolean withSystemMetadata, boolean expandEmpty) throws URISyntaxException { @@ -251,15 +251,48 @@ protected List buildEntityVersionedAspectList( if (!urnAspectVersions.isEmpty()) { Map> aspects = entityService.getEnvelopedVersionedAspects( - opContext, resolveAspectNames(urnAspectVersions, 0L, expandEmpty), false); + opContext, aspectSpecsToAspectNames(urnAspectVersions, false), false); + + Map> timeseriesAspects = + aspectSpecsToAspectNames(urnAspectVersions, true).entrySet().stream() + .map( + e -> { + // 0 is considered latest due to overlap with versioned and timeseries + Map endTimeMilliMap = + e.getValue().entrySet().stream() + .filter(endTEntry -> endTEntry.getValue() != 0L) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + return Map.entry( + e.getKey(), + timeseriesAspectService + .getLatestTimeseriesAspectValues( + opContext, + Set.of(e.getKey()), + e.getValue().keySet(), + endTimeMilliMap) + .getOrDefault(e.getKey(), Map.of())); + }) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); return urnAspectVersions.keySet().stream() - .filter(urn -> aspects.containsKey(urn) && !aspects.get(urn).isEmpty()) + .filter( + urn -> + (aspects.containsKey(urn) && !aspects.get(urn).isEmpty()) + || (timeseriesAspects.containsKey(urn) + && !timeseriesAspects.get(urn).isEmpty())) .map( - u -> - GenericEntityV3.builder() - .build( - objectMapper, u, toAspectItemMap(u, aspects.get(u), withSystemMetadata))) + u -> { + Map aspectItemMap = new HashMap<>(); + if (aspects.containsKey(u)) { + aspectItemMap.putAll(toAspectItemMap(u, aspects.get(u), withSystemMetadata)); + } + if (timeseriesAspects.containsKey(u)) { + aspectItemMap.putAll( + toTimeseriesAspectItemMap(u, timeseriesAspects.get(u), withSystemMetadata)); + } + + return GenericEntityV3.builder().build(objectMapper, u, aspectItemMap); + }) .collect(Collectors.toList()); } else if (!expandEmpty) { return requestedUrns.stream() @@ -285,6 +318,24 @@ private Map toAspectItemMap( .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } + private Map toTimeseriesAspectItemMap( + Urn urn, + Map aspects, + boolean withSystemMetadata) { + return aspects.entrySet().stream() + .map( + e -> + Map.entry( + e.getKey(), + AspectItem.builder() + .aspect( + toRecordTemplate(lookupAspectSpec(urn, e.getKey()).get(), e.getValue())) + .systemMetadata( + withSystemMetadata ? e.getValue().getSystemMetadata() : null) + .build())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + @Override protected List buildEntityList( OperationContext opContext, @@ -369,11 +420,11 @@ private List toRecordTemplates( expandEmpty); } - private LinkedHashMap> toEntityVersionRequest( + private LinkedHashMap> toEntityVersionRequest( @Nonnull String entityArrayList) throws JsonProcessingException, InvalidUrnException { JsonNode entities = objectMapper.readTree(entityArrayList); - LinkedHashMap> items = new LinkedHashMap<>(); + LinkedHashMap> items = new LinkedHashMap<>(); if (entities.isArray()) { Iterator entityItr = entities.iterator(); while (entityItr.hasNext()) { @@ -404,10 +455,10 @@ private LinkedHashMap> toEntityVersionRequest( items .get(entityUrn) .put( - aspectSpec.getName(), + aspectSpec, Long.parseLong(headers.getOrDefault(HTTP_HEADER_IF_VERSION_MATCH, "0"))); } else { - items.get(entityUrn).put(aspectSpec.getName(), 0L); + items.get(entityUrn).put(aspectSpec, 0L); } } } @@ -416,7 +467,7 @@ private LinkedHashMap> toEntityVersionRequest( if (items.get(entityUrn).isEmpty()) { for (AspectSpec aspectSpec : entityRegistry.getEntitySpec(entityUrn.getEntityType()).getAspectSpecs()) { - items.get(entityUrn).put(aspectSpec.getName(), 0L); + items.get(entityUrn).put(aspectSpec, 0L); } } } diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java index 821c517c67f6c6..952dc31c5ba386 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java @@ -1,6 +1,7 @@ package io.datahubproject.openapi.v3.controller; import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.DATASET_PROFILE_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; import static com.linkedin.metadata.utils.GenericRecordUtils.JSON; @@ -32,6 +33,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.dataset.DatasetProfile; import com.linkedin.entity.Aspect; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.metadata.aspect.batch.AspectsBatch; @@ -46,6 +48,7 @@ import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.search.SearchService; +import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.SearchUtil; import com.linkedin.mxe.GenericAspect; @@ -57,6 +60,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; @@ -85,6 +89,7 @@ public class EntityControllerTest extends AbstractTestNGSpringContextTests { @Autowired private MockMvc mockMvc; @Autowired private SearchService mockSearchService; @Autowired private EntityService mockEntityService; + @Autowired private TimeseriesAspectService mockTimeseriesAspectService; @Autowired private EntityRegistry entityRegistry; @Autowired private OperationContext opContext; @@ -314,10 +319,76 @@ public void testAlternativeMCPValidation() throws InvalidUrnException, JsonProce propertyDefinition.data().get("entityTypes"), List.of("urn:li:entityType:datahub.dataset")); } + @Test + public void testTimeseriesAspect() throws Exception { + Urn TEST_URN = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"); + DatasetProfile firstDatasetProfile = + new DatasetProfile() + .setRowCount(1) + .setColumnCount(10) + .setMessageId("testOld") + .setTimestampMillis(100); + DatasetProfile secondDatasetProfile = + new DatasetProfile() + .setRowCount(10) + .setColumnCount(100) + .setMessageId("testLatest") + .setTimestampMillis(200); + + // Mock expected timeseries service response + when(mockTimeseriesAspectService.getLatestTimeseriesAspectValues( + any(OperationContext.class), + eq(Set.of(TEST_URN)), + eq(Set.of(DATASET_PROFILE_ASPECT_NAME)), + eq(Map.of(DATASET_PROFILE_ASPECT_NAME, 150L)))) + .thenReturn( + Map.of( + TEST_URN, + Map.of( + DATASET_PROFILE_ASPECT_NAME, + new com.linkedin.metadata.aspect.EnvelopedAspect() + .setAspect(GenericRecordUtils.serializeAspect(firstDatasetProfile))))); + + when(mockTimeseriesAspectService.getLatestTimeseriesAspectValues( + any(OperationContext.class), + eq(Set.of(TEST_URN)), + eq(Set.of(DATASET_PROFILE_ASPECT_NAME)), + eq(Map.of()))) + .thenReturn( + Map.of( + TEST_URN, + Map.of( + DATASET_PROFILE_ASPECT_NAME, + new com.linkedin.metadata.aspect.EnvelopedAspect() + .setAspect(GenericRecordUtils.serializeAspect(secondDatasetProfile))))); + + // test timeseries latest aspect + mockMvc + .perform( + MockMvcRequestBuilders.get("/v3/entity/dataset/{urn}/datasetprofile", TEST_URN) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect(MockMvcResultMatchers.jsonPath("$.value.rowCount").value(10)) + .andExpect(MockMvcResultMatchers.jsonPath("$.value.columnCount").value(100)) + .andExpect(MockMvcResultMatchers.jsonPath("$.value.messageId").value("testLatest")); + + // test oldd aspect + mockMvc + .perform( + MockMvcRequestBuilders.get("/v3/entity/dataset/{urn}/datasetprofile", TEST_URN) + .param("version", "150") + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect(MockMvcResultMatchers.jsonPath("$.value.rowCount").value(1)) + .andExpect(MockMvcResultMatchers.jsonPath("$.value.columnCount").value(10)) + .andExpect(MockMvcResultMatchers.jsonPath("$.value.messageId").value("testOld")); + } + @TestConfiguration public static class EntityControllerTestConfig { @MockBean public EntityServiceImpl entityService; @MockBean public SearchService searchService; + @MockBean public TimeseriesAspectService timeseriesAspectService; @Bean public ObjectMapper objectMapper() { @@ -354,5 +425,10 @@ public AuthorizerChain authorizerChain() { return authorizerChain; } + + @Bean + public TimeseriesAspectService timeseriesAspectService() { + return timeseriesAspectService; + } } } diff --git a/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java b/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java index 7ed183e975f3b9..1aea837818fe82 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java +++ b/metadata-service/restli-servlet-impl/src/test/java/mock/MockTimeseriesAspectService.java @@ -15,6 +15,8 @@ import com.linkedin.timeseries.TimeseriesIndexSizeResult; import io.datahubproject.metadata.context.OperationContext; import java.util.List; +import java.util.Map; +import java.util.Set; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -67,6 +69,16 @@ public List getAspectValues( return List.of(); } + @Nonnull + @Override + public Map> getLatestTimeseriesAspectValues( + @Nonnull OperationContext opContext, + @Nonnull Set urns, + @Nonnull Set aspectNames, + @Nullable Map beforeTimeMillis) { + return Map.of(); + } + @Nonnull @Override public GenericTable getAggregatedStats( diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java index 68c82f0ef2e0da..b10debb7a61fc9 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java @@ -12,6 +12,8 @@ import com.linkedin.timeseries.TimeseriesIndexSizeResult; import io.datahubproject.metadata.context.OperationContext; import java.util.List; +import java.util.Map; +import java.util.Set; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -116,6 +118,22 @@ List getAspectValues( @Nullable final Filter filter, @Nullable final SortCriterion sort); + /** + * Returns the latest value for the given URNs and aspects + * + * @param opContext operation context + * @param urns the urns + * @param aspectNames the aspects + * @param endTimeMillis fetch latest aspect before this time in milliseconds for each aspect + * @return Map of the urns + */ + @Nonnull + Map> getLatestTimeseriesAspectValues( + @Nonnull OperationContext opContext, + @Nonnull final Set urns, + @Nonnull final Set aspectNames, + @Nullable final Map endTimeMillis); + /** * Perform a arbitrary aggregation query over a set of Time-Series aspects. This is used to answer * arbitrary questions about the Time-Series aspects that we have. diff --git a/smoke-test/tests/openapi/test_openapi.py b/smoke-test/tests/openapi/test_openapi.py index 0217b185570be1..dbb28fb9a2e319 100644 --- a/smoke-test/tests/openapi/test_openapi.py +++ b/smoke-test/tests/openapi/test_openapi.py @@ -2,6 +2,7 @@ import glob import json import logging +import time from deepdiff import DeepDiff @@ -32,6 +33,9 @@ def evaluate_test(auth_session, test_name, test_data): description = req_resp["request"].pop("description") else: description = None + if "wait" in req_resp["request"]: + time.sleep(int(req_resp["request"]["wait"])) + continue url = req_resp["request"]["url"] actual_resp = execute_request(auth_session, req_resp["request"]) try: diff --git a/smoke-test/tests/openapi/v3/timeseries.json b/smoke-test/tests/openapi/v3/timeseries.json new file mode 100644 index 00000000000000..7f4c46de259604 --- /dev/null +++ b/smoke-test/tests/openapi/v3/timeseries.json @@ -0,0 +1,156 @@ +[ + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimeseriesV3%2CPROD%29", + "description": "Remove test dataset", + "method": "delete" + } + }, + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimeseriesV3%2CPROD%29/datasetProfile", + "description": "Create timeseries old", + "params": { + "createIfNotExists": "false", + "async": "false" + }, + "json": { + "value": { + "rowCount": 100, + "messageId": "old profile", + "timestampMillis": 100 + } + } + }, + "response": { + "json": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimeseriesV3,PROD)", + "datasetProfile": { + "value": { + "messageId": "old profile", + "timestampMillis": 100, + "rowCount": 100 + } + } + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimeseriesV3%2CPROD%29/datasetProfile", + "description": "Create timeseries new", + "params": { + "createIfNotExists": "false", + "async": "false" + }, + "json": { + "value": { + "rowCount": 200, + "messageId": "new profile", + "timestampMillis": 200 + } + } + }, + "response": { + "json": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:test,datasetTimeseriesV3,PROD)", + "datasetProfile": { + "value": { + "messageId": "new profile", + "timestampMillis": 200, + "rowCount": 200 + } + } + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimeseriesV3%2CPROD%29/datasetProfile", + "method": "get", + "description": "Get latest profile" + }, + "response": { + "json": { + "value": { + "messageId": "new profile", + "rowCount": 200, + "timestampMillis": 200 + } + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimeseriesV3%2CPROD%29/datasetProfile", + "method": "get", + "description": "Get older profile", + "params": { + "version": "150" + } + }, + "response": { + "json": { + "value": { + "messageId": "old profile", + "rowCount": 100, + "timestampMillis": 100 + } + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimeseriesV3%2CPROD%29/datasetProfile", + "description": "Remove test profile", + "method": "delete" + } + }, + { + "request": { + "description": "Elasticsearch refresh interval", + "wait": 2 + } + }, + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimeseriesV3%2CPROD%29/datasetProfile", + "method": "get", + "description": "Get older profile after delete of new profile" + }, + "response": { + "json": { + "value": { + "messageId": "old profile", + "rowCount": 100, + "timestampMillis": 100 + } + } + } + }, + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimeseriesV3%2CPROD%29/datasetProfile", + "description": "Remove test profile", + "method": "delete" + } + }, + { + "request": { + "description": "Elasticsearch refresh interval", + "wait": 2 + } + }, + { + "request": { + "url": "/openapi/v3/entity/dataset/urn%3Ali%3Adataset%3A%28urn%3Ali%3AdataPlatform%3Atest%2CdatasetTimeseriesV3%2CPROD%29/datasetProfile", + "method": "get", + "description": "Expected no remaining values after timeseries aspect removals" + }, + "response": { + "status_codes": [ + 404 + ] + } + } +] \ No newline at end of file From a7e03a3663c7de864a78acc4cf3ca0998b846c72 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Thu, 12 Dec 2024 12:32:11 -0500 Subject: [PATCH 02/10] feat(forms) Clean up form prompts on structured property deletion (#12053) --- .../entity/DeleteEntityUtilsTest.java | 73 ++++++ .../pegasus/com/linkedin/form/FormPrompt.pdl | 4 + .../factories/BootstrapManagerFactory.java | 6 +- .../steps/RestoreFormInfoIndicesStep.java | 129 ++++++++++ .../steps/RestoreFormInfoIndicesStepTest.java | 242 ++++++++++++++++++ .../metadata/entity/DeleteEntityService.java | 154 +++++------ .../metadata/entity/DeleteEntityUtils.java | 166 ++++++++++++ 7 files changed, 678 insertions(+), 96 deletions(-) create mode 100644 metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreFormInfoIndicesStep.java create mode 100644 metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/RestoreFormInfoIndicesStepTest.java diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityUtilsTest.java index 943ad2967de429..f210ae94d0d70e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityUtilsTest.java @@ -1,14 +1,31 @@ package com.linkedin.metadata.entity; import com.datahub.util.RecordUtils; +import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.DataList; import com.linkedin.data.DataMap; import com.linkedin.data.schema.DataSchema; import com.linkedin.data.schema.PathSpec; import com.linkedin.data.schema.grammar.PdlSchemaParser; import com.linkedin.data.schema.resolver.DefaultDataSchemaResolver; +import com.linkedin.data.template.StringArray; import com.linkedin.entity.Aspect; +import com.linkedin.form.FormInfo; +import com.linkedin.form.FormPrompt; +import com.linkedin.form.FormPromptArray; +import com.linkedin.form.StructuredPropertyParams; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; import com.linkedin.schema.SchemaMetadata; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; import junit.framework.TestCase; import org.testng.annotations.Test; @@ -359,4 +376,60 @@ public void testSchemaMetadataDelete() { .get("tags")) .size()); } + + @Test + public void testRemovePromptsFromFormInfo() { + Urn deletedPropertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:1"); + Urn existingPropertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:2"); + List prompts = new ArrayList<>(); + prompts.add( + new FormPrompt() + .setId("1") + .setStructuredPropertyParams( + new StructuredPropertyParams().setUrn(deletedPropertyUrn))); + prompts.add( + new FormPrompt() + .setId("2") + .setStructuredPropertyParams( + new StructuredPropertyParams().setUrn(existingPropertyUrn))); + FormInfo formInfo = new FormInfo().setPrompts(new FormPromptArray(prompts)); + + FormInfo updatedFormInfo = + DeleteEntityUtils.removePromptsFromFormInfoAspect(formInfo, deletedPropertyUrn); + + assertEquals(updatedFormInfo.getPrompts().size(), 1); + assertEquals( + updatedFormInfo.getPrompts(), + formInfo.getPrompts().stream() + .filter(prompt -> !prompt.getId().equals("1")) + .collect(Collectors.toList())); + } + + @Test + public void testFilterForStructuredPropDeletion() { + Urn deletedPropertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:1"); + + final CriterionArray criterionArray = new CriterionArray(); + criterionArray.add( + new Criterion() + .setField("structuredPropertyPromptUrns") + .setValues(new StringArray(deletedPropertyUrn.toString())) + .setNegated(false) + .setValue("") + .setCondition(Condition.EQUAL)); + Filter expectedFilter = + new Filter() + .setOr( + new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(criterionArray))); + + assertEquals( + DeleteEntityUtils.getFilterForStructuredPropertyDeletion(deletedPropertyUrn), + expectedFilter); + } + + @Test + public void testEntityNamesForStructuredPropDeletion() { + assertEquals( + DeleteEntityUtils.getEntityNamesForStructuredPropertyDeletion(), ImmutableList.of("form")); + } } diff --git a/metadata-models/src/main/pegasus/com/linkedin/form/FormPrompt.pdl b/metadata-models/src/main/pegasus/com/linkedin/form/FormPrompt.pdl index f84a8a719f07a4..55e1a2a8fc1b72 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/form/FormPrompt.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/form/FormPrompt.pdl @@ -43,6 +43,10 @@ record FormPrompt { /** * The structured property that is required on this entity */ + @Searchable = { + "fieldType": "URN", + "fieldName": "structuredPropertyPromptUrns", + } urn: Urn } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java index ffc739e905cd68..3f8d24b9b61a93 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/factories/BootstrapManagerFactory.java @@ -18,6 +18,7 @@ import com.linkedin.metadata.boot.steps.RemoveClientIdAspectStep; import com.linkedin.metadata.boot.steps.RestoreColumnLineageIndices; import com.linkedin.metadata.boot.steps.RestoreDbtSiblingsIndices; +import com.linkedin.metadata.boot.steps.RestoreFormInfoIndicesStep; import com.linkedin.metadata.boot.steps.RestoreGlossaryIndices; import com.linkedin.metadata.boot.steps.WaitForSystemUpdateStep; import com.linkedin.metadata.entity.AspectMigrationsDao; @@ -110,6 +111,8 @@ protected BootstrapManager createInstance( final WaitForSystemUpdateStep waitForSystemUpdateStep = new WaitForSystemUpdateStep(_dataHubUpgradeKafkaListener, _configurationProvider); final IngestEntityTypesStep ingestEntityTypesStep = new IngestEntityTypesStep(_entityService); + final RestoreFormInfoIndicesStep restoreFormInfoIndicesStep = + new RestoreFormInfoIndicesStep(_entityService); final List finalSteps = new ArrayList<>( @@ -124,7 +127,8 @@ protected BootstrapManager createInstance( restoreDbtSiblingsIndices, indexDataPlatformsStep, restoreColumnLineageIndices, - ingestEntityTypesStep)); + ingestEntityTypesStep, + restoreFormInfoIndicesStep)); return new BootstrapManager(finalSteps); } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreFormInfoIndicesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreFormInfoIndicesStep.java new file mode 100644 index 00000000000000..c5dbea5965a66a --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreFormInfoIndicesStep.java @@ -0,0 +1,129 @@ +package com.linkedin.metadata.boot.steps; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.form.FormInfo; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.boot.UpgradeStep; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ListResult; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.query.ExtraInfo; +import io.datahubproject.metadata.context.OperationContext; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class RestoreFormInfoIndicesStep extends UpgradeStep { + private static final String VERSION = "1"; + private static final String UPGRADE_ID = "restore-form-info-indices"; + private static final Integer BATCH_SIZE = 1000; + + public RestoreFormInfoIndicesStep(@Nonnull final EntityService entityService) { + super(entityService, VERSION, UPGRADE_ID); + } + + @Override + public void upgrade(@Nonnull OperationContext systemOperationContext) throws Exception { + final AuditStamp auditStamp = + new AuditStamp() + .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) + .setTime(System.currentTimeMillis()); + + final int totalFormCount = getAndRestoreFormInfoIndices(systemOperationContext, 0, auditStamp); + int formCount = BATCH_SIZE; + while (formCount < totalFormCount) { + getAndRestoreFormInfoIndices(systemOperationContext, formCount, auditStamp); + formCount += BATCH_SIZE; + } + } + + @Nonnull + @Override + public ExecutionMode getExecutionMode() { + return ExecutionMode.ASYNC; + } + + private int getAndRestoreFormInfoIndices( + @Nonnull OperationContext systemOperationContext, int start, AuditStamp auditStamp) { + final AspectSpec formInfoAspectSpec = + systemOperationContext + .getEntityRegistry() + .getEntitySpec(Constants.FORM_ENTITY_NAME) + .getAspectSpec(Constants.FORM_INFO_ASPECT_NAME); + + final ListResult latestAspects = + entityService.listLatestAspects( + systemOperationContext, + Constants.FORM_ENTITY_NAME, + Constants.FORM_INFO_ASPECT_NAME, + start, + BATCH_SIZE); + + if (latestAspects.getTotalCount() == 0 + || latestAspects.getValues() == null + || latestAspects.getMetadata() == null) { + log.debug("Found 0 formInfo aspects for forms. Skipping migration."); + return 0; + } + + if (latestAspects.getValues().size() != latestAspects.getMetadata().getExtraInfos().size()) { + // Bad result -- we should log that we cannot migrate this batch of formInfos. + log.warn( + "Failed to match formInfo aspects with corresponding urns. Found mismatched length between aspects ({})" + + "and metadata ({}) for metadata {}", + latestAspects.getValues().size(), + latestAspects.getMetadata().getExtraInfos().size(), + latestAspects.getMetadata()); + return latestAspects.getTotalCount(); + } + + List> futures = new LinkedList<>(); + for (int i = 0; i < latestAspects.getValues().size(); i++) { + ExtraInfo info = latestAspects.getMetadata().getExtraInfos().get(i); + RecordTemplate formInfoRecord = latestAspects.getValues().get(i); + Urn urn = info.getUrn(); + FormInfo formInfo = (FormInfo) formInfoRecord; + if (formInfo == null) { + log.warn("Received null formInfo for urn {}", urn); + continue; + } + + futures.add( + entityService + .alwaysProduceMCLAsync( + systemOperationContext, + urn, + Constants.FORM_ENTITY_NAME, + Constants.FORM_INFO_ASPECT_NAME, + formInfoAspectSpec, + null, + formInfo, + null, + null, + auditStamp, + ChangeType.RESTATE) + .getFirst()); + } + + futures.stream() + .filter(Objects::nonNull) + .forEach( + f -> { + try { + f.get(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + + return latestAspects.getTotalCount(); + } +} diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/RestoreFormInfoIndicesStepTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/RestoreFormInfoIndicesStepTest.java new file mode 100644 index 00000000000000..9705ecbfcb739a --- /dev/null +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/RestoreFormInfoIndicesStepTest.java @@ -0,0 +1,242 @@ +package com.linkedin.metadata.boot.steps; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.form.FormInfo; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ListResult; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.ExtraInfo; +import com.linkedin.metadata.query.ExtraInfoArray; +import com.linkedin.metadata.query.ListResultMetadata; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; +import jakarta.annotation.Nonnull; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Future; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class RestoreFormInfoIndicesStepTest { + + private static final String VERSION_1 = "1"; + private static final String VERSION_2 = "2"; + private static final String FORM_INFO_UPGRADE_URN = + String.format( + "urn:li:%s:%s", Constants.DATA_HUB_UPGRADE_ENTITY_NAME, "restore-form-info-indices"); + private final Urn formUrn = UrnUtils.getUrn("urn:li:form:test"); + + @Test + public void testExecuteFirstTime() throws Exception { + final EntityService mockService = Mockito.mock(EntityService.class); + final EntityRegistry mockRegistry = Mockito.mock(EntityRegistry.class); + final OperationContext mockContext = mock(OperationContext.class); + when(mockContext.getEntityRegistry()).thenReturn(mockRegistry); + + mockGetUpgradeStep(mockContext, false, VERSION_1, mockService); + mockGetFormInfo(mockContext, formUrn, mockService); + + final AspectSpec aspectSpec = mockAspectSpecs(mockRegistry); + + final RestoreFormInfoIndicesStep restoreIndicesStep = + new RestoreFormInfoIndicesStep(mockService); + restoreIndicesStep.execute(mockContext); + + Mockito.verify(mockRegistry, Mockito.times(1)).getEntitySpec(Constants.FORM_ENTITY_NAME); + // creates upgradeRequest and upgradeResult aspects + Mockito.verify(mockService, Mockito.times(2)) + .ingestProposal( + any(OperationContext.class), + any(MetadataChangeProposal.class), + any(AuditStamp.class), + Mockito.eq(false)); + Mockito.verify(mockService, Mockito.times(1)) + .alwaysProduceMCLAsync( + any(OperationContext.class), + Mockito.eq(formUrn), + Mockito.eq(Constants.FORM_ENTITY_NAME), + Mockito.eq(Constants.FORM_INFO_ASPECT_NAME), + Mockito.eq(aspectSpec), + Mockito.eq(null), + any(), + Mockito.eq(null), + Mockito.eq(null), + any(), + Mockito.eq(ChangeType.RESTATE)); + } + + @Test + public void testExecuteWithNewVersion() throws Exception { + final EntityService mockService = Mockito.mock(EntityService.class); + final EntityRegistry mockRegistry = Mockito.mock(EntityRegistry.class); + final OperationContext mockContext = mock(OperationContext.class); + when(mockContext.getEntityRegistry()).thenReturn(mockRegistry); + + mockGetUpgradeStep(mockContext, true, VERSION_2, mockService); + mockGetFormInfo(mockContext, formUrn, mockService); + + final AspectSpec aspectSpec = mockAspectSpecs(mockRegistry); + + final RestoreFormInfoIndicesStep restoreIndicesStep = + new RestoreFormInfoIndicesStep(mockService); + restoreIndicesStep.execute(mockContext); + + Mockito.verify(mockRegistry, Mockito.times(1)).getEntitySpec(Constants.FORM_ENTITY_NAME); + // creates upgradeRequest and upgradeResult aspects + Mockito.verify(mockService, Mockito.times(2)) + .ingestProposal( + any(OperationContext.class), + any(MetadataChangeProposal.class), + any(AuditStamp.class), + Mockito.eq(false)); + Mockito.verify(mockService, Mockito.times(1)) + .alwaysProduceMCLAsync( + any(OperationContext.class), + Mockito.eq(formUrn), + Mockito.eq(Constants.FORM_ENTITY_NAME), + Mockito.eq(Constants.FORM_INFO_ASPECT_NAME), + Mockito.eq(aspectSpec), + Mockito.eq(null), + any(), + Mockito.eq(null), + Mockito.eq(null), + any(), + Mockito.eq(ChangeType.RESTATE)); + } + + @Test + public void testDoesNotExecuteWithSameVersion() throws Exception { + final EntityService mockService = Mockito.mock(EntityService.class); + final EntityRegistry mockRegistry = Mockito.mock(EntityRegistry.class); + final OperationContext mockContext = mock(OperationContext.class); + when(mockContext.getEntityRegistry()).thenReturn(mockRegistry); + + mockGetUpgradeStep(mockContext, true, VERSION_1, mockService); + mockGetFormInfo(mockContext, formUrn, mockService); + + final AspectSpec aspectSpec = mockAspectSpecs(mockRegistry); + + final RestoreFormInfoIndicesStep restoreIndicesStep = + new RestoreFormInfoIndicesStep(mockService); + restoreIndicesStep.execute(mockContext); + + Mockito.verify(mockRegistry, Mockito.times(0)).getEntitySpec(Constants.FORM_ENTITY_NAME); + // creates upgradeRequest and upgradeResult aspects + Mockito.verify(mockService, Mockito.times(0)) + .ingestProposal( + any(OperationContext.class), + any(MetadataChangeProposal.class), + any(AuditStamp.class), + Mockito.eq(false)); + Mockito.verify(mockService, Mockito.times(0)) + .alwaysProduceMCLAsync( + any(OperationContext.class), + Mockito.eq(formUrn), + Mockito.eq(Constants.FORM_ENTITY_NAME), + Mockito.eq(Constants.FORM_INFO_ASPECT_NAME), + Mockito.eq(aspectSpec), + Mockito.eq(null), + any(), + Mockito.eq(null), + Mockito.eq(null), + any(), + Mockito.eq(ChangeType.RESTATE)); + } + + private void mockGetFormInfo( + @Nonnull OperationContext mockContext, + @Nonnull Urn formUrn, + @Nonnull EntityService mockService) { + final List extraInfos = + ImmutableList.of( + new ExtraInfo() + .setUrn(formUrn) + .setVersion(0L) + .setAudit( + new AuditStamp() + .setActor(UrnUtils.getUrn("urn:li:corpuser:test")) + .setTime(0L))); + + when(mockService.alwaysProduceMCLAsync( + any(OperationContext.class), + any(Urn.class), + Mockito.anyString(), + Mockito.anyString(), + any(AspectSpec.class), + Mockito.eq(null), + any(), + any(), + any(), + any(), + any(ChangeType.class))) + .thenReturn(Pair.of(Mockito.mock(Future.class), false)); + + when(mockService.listLatestAspects( + any(OperationContext.class), + Mockito.eq(Constants.FORM_ENTITY_NAME), + Mockito.eq(Constants.FORM_INFO_ASPECT_NAME), + Mockito.eq(0), + Mockito.eq(1000))) + .thenReturn( + new ListResult<>( + ImmutableList.of(new FormInfo()), + new ListResultMetadata().setExtraInfos(new ExtraInfoArray(extraInfos)), + 1, + false, + 1, + 1, + 1)); + } + + private AspectSpec mockAspectSpecs(@Nonnull EntityRegistry mockRegistry) { + final EntitySpec entitySpec = Mockito.mock(EntitySpec.class); + final AspectSpec aspectSpec = Mockito.mock(AspectSpec.class); + // Mock for formInfo + when(mockRegistry.getEntitySpec(Constants.FORM_ENTITY_NAME)).thenReturn(entitySpec); + when(entitySpec.getAspectSpec(Constants.FORM_INFO_ASPECT_NAME)).thenReturn(aspectSpec); + + return aspectSpec; + } + + private void mockGetUpgradeStep( + @Nonnull OperationContext mockContext, + boolean shouldReturnResponse, + @Nonnull String version, + @Nonnull EntityService mockService) + throws Exception { + + final Urn upgradeEntityUrn = UrnUtils.getUrn(FORM_INFO_UPGRADE_URN); + final com.linkedin.upgrade.DataHubUpgradeRequest upgradeRequest = + new com.linkedin.upgrade.DataHubUpgradeRequest().setVersion(version); + final Map upgradeRequestAspects = new HashMap<>(); + upgradeRequestAspects.put( + Constants.DATA_HUB_UPGRADE_REQUEST_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(upgradeRequest.data()))); + final EntityResponse response = + new EntityResponse().setAspects(new EnvelopedAspectMap(upgradeRequestAspects)); + when(mockService.getEntityV2( + mockContext, + Constants.DATA_HUB_UPGRADE_ENTITY_NAME, + upgradeEntityUrn, + Collections.singleton(Constants.DATA_HUB_UPGRADE_REQUEST_ASPECT_NAME))) + .thenReturn(shouldReturnResponse ? response : null); + } +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java index a3c57a19eddd55..2259b9365ff640 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java @@ -1,16 +1,11 @@ package com.linkedin.metadata.entity; import static com.linkedin.metadata.search.utils.QueryUtils.*; -import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.linkedin.common.AuditStamp; -import com.linkedin.common.FormAssociation; -import com.linkedin.common.FormAssociationArray; -import com.linkedin.common.FormVerificationAssociation; -import com.linkedin.common.FormVerificationAssociationArray; import com.linkedin.common.Forms; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -20,6 +15,7 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.form.FormInfo; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.models.graph.RelatedEntity; import com.linkedin.metadata.graph.GraphService; @@ -28,10 +24,6 @@ import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.RelationshipFieldSpec; import com.linkedin.metadata.models.extractor.FieldExtractor; -import com.linkedin.metadata.query.filter.Condition; -import com.linkedin.metadata.query.filter.ConjunctiveCriterion; -import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.run.DeleteReferencesResponse; @@ -568,65 +560,44 @@ private AssetScrollResult getAssetsReferencingUrn( result.assets = new ArrayList<>(); if (deletedUrn.getEntityType().equals("form")) { - // first, get all entities with this form assigned on it - final CriterionArray incompleteFormsArray = new CriterionArray(); - incompleteFormsArray.add( - buildCriterion("incompleteForms", Condition.EQUAL, deletedUrn.toString())); - final CriterionArray completedFormsArray = new CriterionArray(); - completedFormsArray.add( - buildCriterion("completedForms", Condition.EQUAL, deletedUrn.toString())); - // next, get all metadata tests created for this form - final CriterionArray metadataTestSourceArray = new CriterionArray(); - metadataTestSourceArray.add( - buildCriterion("sourceEntity", Condition.EQUAL, deletedUrn.toString())); - metadataTestSourceArray.add(buildCriterion("sourceType", Condition.EQUAL, "FORMS")); - Filter filter = - new Filter() - .setOr( - new ConjunctiveCriterionArray( - new ConjunctiveCriterion().setAnd(incompleteFormsArray), - new ConjunctiveCriterion().setAnd(completedFormsArray), - new ConjunctiveCriterion().setAnd(metadataTestSourceArray))); - ScrollResult scrollResult = - _searchService.structuredScroll( - opContext, - ImmutableList.of( - "dataset", - "dataJob", - "dataFlow", - "chart", - "dashboard", - "corpuser", - "corpGroup", - "domain", - "container", - "glossaryTerm", - "glossaryNode", - "mlModel", - "mlModelGroup", - "mlFeatureTable", - "mlFeature", - "mlPrimaryKey", - "schemaField", - "dataProduct", - "test"), - "*", - filter, - null, - scrollId, - "5m", - dryRun ? 1 : BATCH_SIZE); // need to pass in 1 for count otherwise get index error - if (scrollResult.getNumEntities() == 0 || scrollResult.getEntities().size() == 0) { - return result; - } - result.scrollId = scrollResult.getScrollId(); - result.totalAssetCount = scrollResult.getNumEntities(); - result.assets = - scrollResult.getEntities().stream() - .map(SearchEntity::getEntity) - .collect(Collectors.toList()); + Filter filter = DeleteEntityUtils.getFilterForFormDeletion(deletedUrn); + List entityNames = DeleteEntityUtils.getEntityNamesForFormDeletion(); + return scrollForAssets(opContext, result, filter, entityNames, scrollId, dryRun); + } + if (deletedUrn.getEntityType().equals("structuredProperty")) { + Filter filter = DeleteEntityUtils.getFilterForStructuredPropertyDeletion(deletedUrn); + List entityNames = DeleteEntityUtils.getEntityNamesForStructuredPropertyDeletion(); + return scrollForAssets(opContext, result, filter, entityNames, scrollId, dryRun); + } + return result; + } + + private AssetScrollResult scrollForAssets( + @Nonnull OperationContext opContext, + AssetScrollResult result, + final @Nullable Filter filter, + final @Nonnull List entityNames, + @Nullable String scrollId, + final boolean dryRun) { + ScrollResult scrollResult = + _searchService.structuredScroll( + opContext, + entityNames, + "*", + filter, + null, + scrollId, + "5m", + dryRun ? 1 : BATCH_SIZE); // need to pass in 1 for count otherwise get index error + if (scrollResult.getNumEntities() == 0 || scrollResult.getEntities().size() == 0) { return result; } + result.scrollId = scrollResult.getScrollId(); + result.totalAssetCount = scrollResult.getNumEntities(); + result.assets = + scrollResult.getEntities().stream() + .map(SearchEntity::getEntity) + .collect(Collectors.toList()); return result; } @@ -641,7 +612,7 @@ private List deleteSearchReferencesForAsset( } List mcps = new ArrayList<>(); - List aspectsToUpdate = getAspectsToUpdate(deletedUrn); + List aspectsToUpdate = getAspectsToUpdate(deletedUrn, assetUrn); aspectsToUpdate.forEach( aspectName -> { try { @@ -667,10 +638,15 @@ private List deleteSearchReferencesForAsset( *

TODO: extend this to support other types of deletes and be more dynamic depending on aspects * that the asset has */ - private List getAspectsToUpdate(@Nonnull final Urn deletedUrn) { + private List getAspectsToUpdate( + @Nonnull final Urn deletedUrn, @Nonnull final Urn assetUrn) { if (deletedUrn.getEntityType().equals("form")) { return ImmutableList.of("forms"); } + if (deletedUrn.getEntityType().equals("structuredProperty") + && assetUrn.getEntityType().equals("form")) { + return ImmutableList.of("formInfo"); + } return new ArrayList<>(); } @@ -697,6 +673,9 @@ private MetadataChangeProposal updateAspectForSearchReference( if (aspectName.equals("forms")) { return updateFormsAspect(opContext, assetUrn, deletedUrn); } + if (aspectName.equals("formInfo") && deletedUrn.getEntityType().equals("structuredProperty")) { + return updateFormInfoAspect(opContext, assetUrn, deletedUrn); + } return null; } @@ -708,35 +687,20 @@ private MetadataChangeProposal updateFormsAspect( RecordTemplate record = _entityService.getLatestAspect(opContext, assetUrn, "forms"); if (record == null) return null; - Forms formsAspect = new Forms(record.data()); - final AtomicReference updatedAspect; - try { - updatedAspect = new AtomicReference<>(formsAspect.copy()); - } catch (Exception e) { - throw new RuntimeException("Failed to copy the forms aspect for updating", e); - } - - List incompleteForms = - formsAspect.getIncompleteForms().stream() - .filter(incompleteForm -> !incompleteForm.getUrn().equals(deletedUrn)) - .collect(Collectors.toList()); - List completedForms = - formsAspect.getCompletedForms().stream() - .filter(completedForm -> !completedForm.getUrn().equals(deletedUrn)) - .collect(Collectors.toList()); - final List verifications = - formsAspect.getVerifications().stream() - .filter(verification -> !verification.getForm().equals(deletedUrn)) - .collect(Collectors.toList()); + return DeleteEntityUtils.removeFormFromFormsAspect( + new Forms(record.data()), assetUrn, deletedUrn); + } - updatedAspect.get().setIncompleteForms(new FormAssociationArray(incompleteForms)); - updatedAspect.get().setCompletedForms(new FormAssociationArray(completedForms)); - updatedAspect.get().setVerifications(new FormVerificationAssociationArray(verifications)); + @Nullable + private MetadataChangeProposal updateFormInfoAspect( + @Nonnull OperationContext opContext, + @Nonnull final Urn assetUrn, + @Nonnull final Urn deletedUrn) { + RecordTemplate record = _entityService.getLatestAspect(opContext, assetUrn, "formInfo"); + if (record == null) return null; - if (!formsAspect.equals(updatedAspect.get())) { - return AspectUtils.buildMetadataChangeProposal(assetUrn, "forms", updatedAspect.get()); - } - return null; + return DeleteEntityUtils.createFormInfoUpdateProposal( + new FormInfo(record.data()), assetUrn, deletedUrn); } private AuditStamp createAuditStamp() { diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java index 0a8b5880e5bce9..20dc104e1b436e 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java @@ -1,5 +1,14 @@ package com.linkedin.metadata.entity; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.FormAssociation; +import com.linkedin.common.FormAssociationArray; +import com.linkedin.common.FormVerificationAssociation; +import com.linkedin.common.FormVerificationAssociationArray; +import com.linkedin.common.Forms; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataComplex; import com.linkedin.data.DataList; import com.linkedin.data.DataMap; @@ -9,8 +18,22 @@ import com.linkedin.data.schema.RecordDataSchema; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.Aspect; +import com.linkedin.form.FormInfo; +import com.linkedin.form.FormPrompt; +import com.linkedin.form.FormPromptArray; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.utils.CriterionUtils; +import com.linkedin.mxe.MetadataChangeProposal; import java.util.List; import java.util.ListIterator; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; /** @@ -207,4 +230,147 @@ private static DataComplex removeValueFromArray( } return aspectList; } + + /* + * Form Deletion Section + */ + + // We need to update assets that have this form on them in one way or another + public static Filter getFilterForFormDeletion(@Nonnull final Urn deletedUrn) { + // first, get all entities with this form assigned on it + final CriterionArray incompleteFormsArray = new CriterionArray(); + incompleteFormsArray.add( + buildCriterion("incompleteForms", Condition.EQUAL, deletedUrn.toString())); + final CriterionArray completedFormsArray = new CriterionArray(); + completedFormsArray.add( + buildCriterion("completedForms", Condition.EQUAL, deletedUrn.toString())); + // next, get all metadata tests created for this form + final CriterionArray metadataTestSourceArray = new CriterionArray(); + metadataTestSourceArray.add( + buildCriterion("sourceEntity", Condition.EQUAL, deletedUrn.toString())); + metadataTestSourceArray.add(buildCriterion("sourceType", Condition.EQUAL, "FORMS")); + return new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion().setAnd(incompleteFormsArray), + new ConjunctiveCriterion().setAnd(completedFormsArray), + new ConjunctiveCriterion().setAnd(metadataTestSourceArray))); + } + + @Nullable + public static MetadataChangeProposal removeFormFromFormsAspect( + @Nonnull Forms formsAspect, @Nonnull final Urn assetUrn, @Nonnull final Urn deletedUrn) { + final AtomicReference updatedAspect; + try { + updatedAspect = new AtomicReference<>(formsAspect.copy()); + } catch (Exception e) { + throw new RuntimeException("Failed to copy the forms aspect for updating", e); + } + + List incompleteForms = + formsAspect.getIncompleteForms().stream() + .filter(incompleteForm -> !incompleteForm.getUrn().equals(deletedUrn)) + .collect(Collectors.toList()); + List completedForms = + formsAspect.getCompletedForms().stream() + .filter(completedForm -> !completedForm.getUrn().equals(deletedUrn)) + .collect(Collectors.toList()); + final List verifications = + formsAspect.getVerifications().stream() + .filter(verification -> !verification.getForm().equals(deletedUrn)) + .collect(Collectors.toList()); + + updatedAspect.get().setIncompleteForms(new FormAssociationArray(incompleteForms)); + updatedAspect.get().setCompletedForms(new FormAssociationArray(completedForms)); + updatedAspect.get().setVerifications(new FormVerificationAssociationArray(verifications)); + + if (!formsAspect.equals(updatedAspect.get())) { + return AspectUtils.buildMetadataChangeProposal(assetUrn, "forms", updatedAspect.get()); + } + return null; + } + + // all assets that could have a form associated with them + public static List getEntityNamesForFormDeletion() { + return ImmutableList.of( + "dataset", + "dataJob", + "dataFlow", + "chart", + "dashboard", + "corpuser", + "corpGroup", + "domain", + "container", + "glossaryTerm", + "glossaryNode", + "mlModel", + "mlModelGroup", + "mlFeatureTable", + "mlFeature", + "mlPrimaryKey", + "schemaField", + "dataProduct", + "test"); + } + + /* + * Structured Property Deletion Section + */ + + // get forms that have this structured property referenced in a prompt + public static Filter getFilterForStructuredPropertyDeletion(@Nonnull final Urn deletedUrn) { + final CriterionArray criterionArray = new CriterionArray(); + criterionArray.add( + CriterionUtils.buildCriterion( + "structuredPropertyPromptUrns", Condition.EQUAL, deletedUrn.toString())); + return new Filter() + .setOr(new ConjunctiveCriterionArray(new ConjunctiveCriterion().setAnd(criterionArray))); + } + + // only need to update forms manually when deleting structured props + public static List getEntityNamesForStructuredPropertyDeletion() { + return ImmutableList.of("form"); + } + + @Nullable + public static MetadataChangeProposal createFormInfoUpdateProposal( + @Nonnull FormInfo formsAspect, @Nonnull final Urn assetUrn, @Nonnull final Urn deletedUrn) { + final FormInfo updatedFormInfo = removePromptsFromFormInfoAspect(formsAspect, deletedUrn); + + if (!formsAspect.equals(updatedFormInfo)) { + return AspectUtils.buildMetadataChangeProposal(assetUrn, "formInfo", updatedFormInfo); + } + + return null; + } + + // remove any prompts referencing the deleted structured property urn + @Nonnull + public static FormInfo removePromptsFromFormInfoAspect( + @Nonnull FormInfo formsAspect, @Nonnull final Urn deletedUrn) { + final AtomicReference updatedAspect; + try { + updatedAspect = new AtomicReference<>(formsAspect.copy()); + } catch (Exception e) { + throw new RuntimeException("Failed to copy the formInfo aspect for updating", e); + } + + // filter out any prompt that has this structured property referenced on it + List filteredPrompts = + formsAspect.getPrompts().stream() + .filter( + prompt -> { + if (prompt.getStructuredPropertyParams() != null + && prompt.getStructuredPropertyParams().getUrn() != null) { + return !prompt.getStructuredPropertyParams().getUrn().equals(deletedUrn); + } + return true; + }) + .collect(Collectors.toList()); + + updatedAspect.get().setPrompts(new FormPromptArray(filteredPrompts)); + + return updatedAspect.get(); + } } From 613b433d18cf95f3b47d97c0fdfab557a9c82a3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Thu, 12 Dec 2024 19:11:54 +0100 Subject: [PATCH 03/10] fix(datahub-client): adds missing archiveAppendix to artifactid when publishing (#12112) --- metadata-integration/java/datahub-client/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle index 7dad21a6417fc4..3e940b0f32248f 100644 --- a/metadata-integration/java/datahub-client/build.gradle +++ b/metadata-integration/java/datahub-client/build.gradle @@ -196,7 +196,7 @@ publishing { pom { name = 'Datahub Client' group = 'io.acryl' - artifactId = 'datahub-client' + artifactId = 'datahub-client' + (project.hasProperty('archiveAppendix') ? "-${archiveAppendix}" : '') description = 'DataHub Java client for metadata integration' url = 'https://datahubproject.io' artifacts = [shadowJar, javadocJar, sourcesJar] From 3afc0782f019558a706d65af0838b112b8396015 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Dec 2024 12:12:51 -0600 Subject: [PATCH 04/10] chore(deps): bump nanoid from 3.3.6 to 3.3.8 in /datahub-web-react (#12086) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- datahub-web-react/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock index 13b5bbf638c15e..dc7260efd183fd 100644 --- a/datahub-web-react/yarn.lock +++ b/datahub-web-react/yarn.lock @@ -8921,9 +8921,9 @@ nanoevents@^5.1.13: integrity sha512-JFAeG9fp0QZnRoESHjkbVFbZ9BkOXkkagUVwZVo/pkSX+Fq1VKlY+5og/8X9CYc6C7vje/CV+bwJ5M2X0+IY9Q== nanoid@^3.3.6: - version "3.3.6" - resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.6.tgz#443380c856d6e9f9824267d960b4236ad583ea4c" - integrity sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA== + version "3.3.8" + resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.8.tgz#b1be3030bee36aaff18bacb375e5cce521684baf" + integrity sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w== natural-compare-lite@^1.4.0: version "1.4.0" From 7f846dcbdbc2325ca6b99871678c6d3681cc3d64 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Dec 2024 13:03:19 -0600 Subject: [PATCH 05/10] chore(deps): bump nanoid from 3.3.7 to 3.3.8 in /docs-website (#12114) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- docs-website/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs-website/yarn.lock b/docs-website/yarn.lock index 9c82b27c3b61f3..bad57c8c6c9001 100644 --- a/docs-website/yarn.lock +++ b/docs-website/yarn.lock @@ -8262,9 +8262,9 @@ multicast-dns@^7.2.5: thunky "^1.0.2" nanoid@^3.3.7: - version "3.3.7" - resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.7.tgz#d0c301a691bc8d54efa0a2226ccf3fe2fd656bd8" - integrity sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g== + version "3.3.8" + resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.8.tgz#b1be3030bee36aaff18bacb375e5cce521684baf" + integrity sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w== napi-build-utils@^1.0.1: version "1.0.2" From d2eaf0c83c1ecd62030e1f6f4c8df5c770d33da2 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Thu, 12 Dec 2024 14:27:37 -0500 Subject: [PATCH 06/10] feat(structuredProperties): add hide property and show as badge validators (#12099) Co-authored-by: RyanHolstien --- .../entity/GenericScrollIterator.java | 35 ++++ .../PropertyDefinitionDeleteSideEffect.java | 71 +------- .../util/EntityWithPropertyIterator.java | 76 +++++++++ .../validation/HidePropertyValidator.java | 63 +++++++ .../ShowPropertyAsBadgeValidator.java | 144 ++++++++++++++++ .../validators/HidePropertyValidatorTest.java | 55 ++++++ .../ShowPropertyAsBadgeValidatorTest.java | 160 ++++++++++++++++++ .../SpringStandardPluginConfiguration.java | 40 +++++ 8 files changed, 574 insertions(+), 70 deletions(-) create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/entity/GenericScrollIterator.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/util/EntityWithPropertyIterator.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/HidePropertyValidator.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/ShowPropertyAsBadgeValidator.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/HidePropertyValidatorTest.java create mode 100644 metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java diff --git a/entity-registry/src/main/java/com/linkedin/metadata/entity/GenericScrollIterator.java b/entity-registry/src/main/java/com/linkedin/metadata/entity/GenericScrollIterator.java new file mode 100644 index 00000000000000..3f393e0b894aaa --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/entity/GenericScrollIterator.java @@ -0,0 +1,35 @@ +package com.linkedin.metadata.entity; + +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.ScrollResult; +import java.util.Iterator; +import java.util.List; +import javax.annotation.Nonnull; +import lombok.Builder; + +/** + * Fetches pages of structured properties which have been applied to an entity urn with a specified + * filter + */ +@Builder +public class GenericScrollIterator implements Iterator { + @Nonnull private final Filter filter; + @Nonnull private final List entities; + @Nonnull private final SearchRetriever searchRetriever; + private int count; + @Builder.Default private String scrollId = null; + @Builder.Default private boolean started = false; + + @Override + public boolean hasNext() { + return !started || scrollId != null; + } + + @Override + public ScrollResult next() { + started = true; + ScrollResult result = searchRetriever.scroll(entities, filter, scrollId, count); + scrollId = result.getScrollId(); + return result; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java index 134c65d2b5fae3..5bbc25cfc8ddcc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java @@ -3,8 +3,6 @@ import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTIES_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_KEY_ASPECT_NAME; -import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX; -import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; @@ -17,30 +15,19 @@ import com.linkedin.metadata.aspect.patch.PatchOperationType; import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; -import com.linkedin.metadata.entity.SearchRetriever; import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.models.StructuredPropertyUtils; -import com.linkedin.metadata.query.filter.ConjunctiveCriterion; -import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; -import com.linkedin.metadata.query.filter.CriterionArray; -import com.linkedin.metadata.query.filter.Filter; -import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.structuredproperties.util.EntityWithPropertyIterator; import com.linkedin.structured.StructuredPropertyDefinition; import java.util.Collection; -import java.util.Collections; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Spliterator; import java.util.Spliterators; -import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import lombok.Builder; import lombok.Getter; import lombok.Setter; import lombok.experimental.Accessors; @@ -141,60 +128,4 @@ private static Stream generatePatchMCPs( .build(retrieverContext.getAspectRetriever().getEntityRegistry()); })); } - - /** - * Fetches pages of entity urns which have a value for the given structured property definition - */ - @Builder - public static class EntityWithPropertyIterator implements Iterator { - @Nonnull private final Urn propertyUrn; - @Nullable private final StructuredPropertyDefinition definition; - @Nonnull private final SearchRetriever searchRetriever; - private int count; - @Builder.Default private String scrollId = null; - @Builder.Default private boolean started = false; - - private List getEntities() { - if (definition != null && definition.getEntityTypes() != null) { - return definition.getEntityTypes().stream() - .map(StructuredPropertyUtils::getValueTypeId) - .collect(Collectors.toList()); - } else { - return Collections.emptyList(); - } - } - - private Filter getFilter() { - Filter propertyFilter = new Filter(); - final ConjunctiveCriterionArray disjunction = new ConjunctiveCriterionArray(); - final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); - final CriterionArray andCriterion = new CriterionArray(); - - // Cannot rely on automatic field name since the definition is deleted - final Criterion propertyExistsCriterion = - buildExistsCriterion( - STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX - + StructuredPropertyUtils.toElasticsearchFieldName(propertyUrn, definition)); - - andCriterion.add(propertyExistsCriterion); - conjunction.setAnd(andCriterion); - disjunction.add(conjunction); - propertyFilter.setOr(disjunction); - - return propertyFilter; - } - - @Override - public boolean hasNext() { - return !started || scrollId != null; - } - - @Override - public ScrollResult next() { - started = true; - ScrollResult result = searchRetriever.scroll(getEntities(), getFilter(), scrollId, count); - scrollId = result.getScrollId(); - return result; - } - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/util/EntityWithPropertyIterator.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/util/EntityWithPropertyIterator.java new file mode 100644 index 00000000000000..ba501f9b43d6b2 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/util/EntityWithPropertyIterator.java @@ -0,0 +1,76 @@ +package com.linkedin.metadata.structuredproperties.util; + +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX; +import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.models.StructuredPropertyUtils; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.structured.StructuredPropertyDefinition; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; + +/** Fetches pages of entity urns which have a value for the given structured property definition */ +@Builder +public class EntityWithPropertyIterator implements Iterator { + @Nonnull private final Urn propertyUrn; + @Nullable private final StructuredPropertyDefinition definition; + @Nonnull private final SearchRetriever searchRetriever; + private int count; + @Builder.Default private String scrollId = null; + @Builder.Default private boolean started = false; + + private List getEntities() { + if (definition != null && definition.getEntityTypes() != null) { + return definition.getEntityTypes().stream() + .map(StructuredPropertyUtils::getValueTypeId) + .collect(Collectors.toList()); + } else { + return Collections.emptyList(); + } + } + + private Filter getFilter() { + Filter propertyFilter = new Filter(); + final ConjunctiveCriterionArray disjunction = new ConjunctiveCriterionArray(); + final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); + final CriterionArray andCriterion = new CriterionArray(); + + // Cannot rely on automatic field name since the definition is deleted + final Criterion propertyExistsCriterion = + buildExistsCriterion( + STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX + + StructuredPropertyUtils.toElasticsearchFieldName(propertyUrn, definition)); + + andCriterion.add(propertyExistsCriterion); + conjunction.setAnd(andCriterion); + disjunction.add(conjunction); + propertyFilter.setOr(disjunction); + + return propertyFilter; + } + + @Override + public boolean hasNext() { + return !started || scrollId != null; + } + + @Override + public ScrollResult next() { + started = true; + ScrollResult result = searchRetriever.scroll(getEntities(), getFilter(), scrollId, count); + scrollId = result.getScrollId(); + return result; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/HidePropertyValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/HidePropertyValidator.java new file mode 100644 index 00000000000000..9a238d7df77505 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/HidePropertyValidator.java @@ -0,0 +1,63 @@ +package com.linkedin.metadata.structuredproperties.validation; + +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME; + +import com.google.common.annotations.VisibleForTesting; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.metadata.models.StructuredPropertyUtils; +import com.linkedin.structured.StructuredPropertySettings; +import java.util.Collection; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; + +@Setter +@Getter +@Slf4j +@Accessors(chain = true) +public class HidePropertyValidator extends AspectPayloadValidator { + + @Nonnull private AspectPluginConfig config; + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + return validateSettingsUpserts( + mcpItems.stream() + .filter(i -> STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME.equals(i.getAspectName())) + .collect(Collectors.toList())); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return Stream.empty(); + } + + @VisibleForTesting + public static Stream validateSettingsUpserts( + @Nonnull Collection mcpItems) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + for (BatchItem mcpItem : mcpItems) { + StructuredPropertySettings structuredPropertySettings = + mcpItem.getAspect(StructuredPropertySettings.class); + boolean isValid = + StructuredPropertyUtils.validatePropertySettings(structuredPropertySettings, false); + if (!isValid) { + exceptions.addException(mcpItem, StructuredPropertyUtils.INVALID_SETTINGS_MESSAGE); + } + } + return exceptions.streamAllExceptions(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/ShowPropertyAsBadgeValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/ShowPropertyAsBadgeValidator.java new file mode 100644 index 00000000000000..31f04e0b9c75c1 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/validation/ShowPropertyAsBadgeValidator.java @@ -0,0 +1,144 @@ +package com.linkedin.metadata.structuredproperties.validation; + +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + +import com.datahub.util.RecordUtils; +import com.google.common.annotations.VisibleForTesting; +import com.linkedin.entity.Aspect; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; +import com.linkedin.metadata.entity.GenericScrollIterator; +import com.linkedin.metadata.models.StructuredPropertyUtils; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.structured.StructuredPropertySettings; +import java.util.Collection; +import java.util.Collections; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections.CollectionUtils; + +@Setter +@Getter +@Slf4j +@Accessors(chain = true) +public class ShowPropertyAsBadgeValidator extends AspectPayloadValidator { + + @Nonnull private AspectPluginConfig config; + + private static final String SHOW_ASSET_AS_BADGE_FIELD = "showAsAssetBadge"; + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + return validateSettingsUpserts( + mcpItems.stream() + .filter(i -> STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME.equals(i.getAspectName())) + .collect(Collectors.toList()), + retrieverContext); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return Stream.empty(); + } + + @VisibleForTesting + public static Stream validateSettingsUpserts( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + for (BatchItem mcpItem : mcpItems) { + StructuredPropertySettings structuredPropertySettings = + mcpItem.getAspect(StructuredPropertySettings.class); + if (structuredPropertySettings.isShowAsAssetBadge()) { + // Search for any structured properties that have showAsAssetBadge set, should only ever be + // one at most. + GenericScrollIterator scrollIterator = + GenericScrollIterator.builder() + .searchRetriever(retrieverContext.getSearchRetriever()) + .count(10) // Get first 10, should only ever be one, but this gives us more info if + // we're in a bad state + .filter(getFilter()) + .entities(Collections.singletonList(STRUCTURED_PROPERTY_ENTITY_NAME)) + .build(); + // Only need to get first set, if there are more then will have to resolve bad state + ScrollResult scrollResult = scrollIterator.next(); + if (CollectionUtils.isNotEmpty(scrollResult.getEntities())) { + if (scrollResult.getEntities().size() > 1) { + // If it's greater than one, don't bother querying DB since we for sure are in a bad + // state + exceptions.addException( + mcpItem, + StructuredPropertyUtils.ONLY_ONE_BADGE + + scrollResult.getEntities().stream() + .map(SearchEntity::getEntity) + .collect(Collectors.toList())); + } else { + // If there is just one, verify against DB to make sure we're not hitting a timing issue + // with eventual consistency + AspectRetriever aspectRetriever = retrieverContext.getAspectRetriever(); + Optional propertySettings = + Optional.ofNullable( + aspectRetriever.getLatestAspectObject( + scrollResult.getEntities().get(0).getEntity(), + STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME)); + if (propertySettings.isPresent()) { + StructuredPropertySettings dbBadgeSettings = + RecordUtils.toRecordTemplate( + StructuredPropertySettings.class, propertySettings.get().data()); + if (dbBadgeSettings.isShowAsAssetBadge()) { + exceptions.addException( + mcpItem, + StructuredPropertyUtils.ONLY_ONE_BADGE + + scrollResult.getEntities().stream() + .map(SearchEntity::getEntity) + .collect(Collectors.toList())); + } + } + } + } + } + } + return exceptions.streamAllExceptions(); + } + + private static Filter getFilter() { + Filter propertyFilter = new Filter(); + final ConjunctiveCriterionArray disjunction = new ConjunctiveCriterionArray(); + final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); + final CriterionArray andCriterion = new CriterionArray(); + + final Criterion propertyExistsCriterion = + buildCriterion(SHOW_ASSET_AS_BADGE_FIELD, Condition.EQUAL, "true"); + + andCriterion.add(propertyExistsCriterion); + conjunction.setAnd(andCriterion); + disjunction.add(conjunction); + propertyFilter.setOr(disjunction); + + return propertyFilter; + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/HidePropertyValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/HidePropertyValidatorTest.java new file mode 100644 index 00000000000000..3f664da0dde8a4 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/HidePropertyValidatorTest.java @@ -0,0 +1,55 @@ +package com.linkedin.metadata.structuredproperties.validators; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.structuredproperties.validation.HidePropertyValidator; +import com.linkedin.structured.StructuredPropertySettings; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class HidePropertyValidatorTest { + + private static final EntityRegistry TEST_REGISTRY = new TestEntityRegistry(); + + private static final Urn TEST_PROPERTY_URN = + UrnUtils.getUrn("urn:li:structuredProperty:io.acryl.privacy.retentionTime"); + + @Test + public void testValidUpsert() { + + StructuredPropertySettings propertySettings = + new StructuredPropertySettings() + .setIsHidden(false) + .setShowAsAssetBadge(true) + .setShowInAssetSummary(true) + .setShowInSearchFilters(true); + + boolean isValid = + HidePropertyValidator.validateSettingsUpserts( + TestMCP.ofOneUpsertItem(TEST_PROPERTY_URN, propertySettings, TEST_REGISTRY)) + .findAny() + .isEmpty(); + Assert.assertTrue(isValid); + } + + @Test + public void testInvalidUpsert() { + + StructuredPropertySettings propertySettings = + new StructuredPropertySettings() + .setIsHidden(true) + .setShowAsAssetBadge(true) + .setShowInAssetSummary(true) + .setShowInSearchFilters(true); + + boolean isValid = + HidePropertyValidator.validateSettingsUpserts( + TestMCP.ofOneUpsertItem(TEST_PROPERTY_URN, propertySettings, TEST_REGISTRY)) + .findAny() + .isEmpty(); + Assert.assertFalse(isValid); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java new file mode 100644 index 00000000000000..2503faa00f6e71 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/validators/ShowPropertyAsBadgeValidatorTest.java @@ -0,0 +1,160 @@ +package com.linkedin.metadata.structuredproperties.validators; + +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.entity.SearchRetriever; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.ScrollResult; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.structuredproperties.validation.ShowPropertyAsBadgeValidator; +import com.linkedin.structured.StructuredPropertySettings; +import com.linkedin.test.metadata.aspect.MockAspectRetriever; +import com.linkedin.test.metadata.aspect.TestEntityRegistry; +import com.linkedin.test.metadata.aspect.batch.TestMCP; +import java.util.Collections; +import java.util.stream.Stream; +import org.mockito.Mockito; +import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class ShowPropertyAsBadgeValidatorTest { + + private static final EntityRegistry TEST_REGISTRY = new TestEntityRegistry(); + private static final Urn TEST_PROPERTY_URN = + UrnUtils.getUrn("urn:li:structuredProperty:io.acryl.privacy.retentionTime"); + private static final Urn EXISTING_BADGE_URN = + UrnUtils.getUrn("urn:li:structuredProperty:io.acryl.privacy.existingBadge"); + + private SearchRetriever mockSearchRetriever; + private MockAspectRetriever mockAspectRetriever; + private GraphRetriever mockGraphRetriever; + private RetrieverContext retrieverContext; + + @BeforeMethod + public void setup() { + mockSearchRetriever = Mockito.mock(SearchRetriever.class); + + StructuredPropertySettings propertySettings = + new StructuredPropertySettings() + .setShowAsAssetBadge(true) + .setShowInAssetSummary(true) + .setShowInSearchFilters(true); + mockAspectRetriever = + new MockAspectRetriever( + ImmutableMap.of( + TEST_PROPERTY_URN, + Collections.singletonList(propertySettings), + EXISTING_BADGE_URN, + Collections.singletonList(propertySettings))); + mockGraphRetriever = Mockito.mock(GraphRetriever.class); + retrieverContext = + io.datahubproject.metadata.context.RetrieverContext.builder() + .aspectRetriever(mockAspectRetriever) + .searchRetriever(mockSearchRetriever) + .graphRetriever(mockGraphRetriever) + .build(); + } + + @Test + public void testValidUpsert() { + + // Create settings with showAsAssetBadge = true + StructuredPropertySettings propertySettings = + new StructuredPropertySettings() + .setShowAsAssetBadge(true) + .setShowInAssetSummary(true) + .setShowInSearchFilters(true); + + Mockito.when( + mockSearchRetriever.scroll( + Mockito.eq(Collections.singletonList(STRUCTURED_PROPERTY_ENTITY_NAME)), + Mockito.any(Filter.class), + Mockito.eq(null), + Mockito.eq(10))) + .thenReturn(new ScrollResult().setEntities(new SearchEntityArray())); + + // Test validation + Stream validationResult = + ShowPropertyAsBadgeValidator.validateSettingsUpserts( + TestMCP.ofOneUpsertItem(TEST_PROPERTY_URN, propertySettings, TEST_REGISTRY), + retrieverContext); + + // Assert no validation exceptions + Assert.assertTrue(validationResult.findAny().isEmpty()); + } + + @Test + public void testInvalidUpsertWithExistingBadge() { + + // Create settings with showAsAssetBadge = true + StructuredPropertySettings propertySettings = + new StructuredPropertySettings() + .setShowAsAssetBadge(true) + .setShowInAssetSummary(true) + .setShowInSearchFilters(true); + + // Mock search results with an existing badge + SearchEntity existingBadge = new SearchEntity(); + existingBadge.setEntity(EXISTING_BADGE_URN); + ScrollResult mockResult = new ScrollResult(); + mockResult.setEntities(new SearchEntityArray(Collections.singletonList(existingBadge))); + Mockito.when( + mockSearchRetriever.scroll( + Mockito.eq(Collections.singletonList(STRUCTURED_PROPERTY_ENTITY_NAME)), + Mockito.any(Filter.class), + Mockito.eq(null), + Mockito.eq(10))) + .thenReturn(mockResult); + + // Test validation + Stream validationResult = + ShowPropertyAsBadgeValidator.validateSettingsUpserts( + TestMCP.ofOneUpsertItem(TEST_PROPERTY_URN, propertySettings, TEST_REGISTRY), + retrieverContext); + + // Assert validation exception exists + Assert.assertFalse(validationResult.findAny().isEmpty()); + } + + @Test + public void testValidUpsertWithShowAsAssetBadgeFalse() { + + // Create settings with showAsAssetBadge = false + StructuredPropertySettings propertySettings = + new StructuredPropertySettings() + .setShowAsAssetBadge(false) + .setShowInAssetSummary(true) + .setShowInSearchFilters(true); + + // Mock search results with an existing badge (shouldn't matter since we're setting false) + SearchEntity existingBadge = new SearchEntity(); + existingBadge.setEntity(EXISTING_BADGE_URN); + ScrollResult mockResult = new ScrollResult(); + mockResult.setEntities(new SearchEntityArray(Collections.singletonList(existingBadge))); + Mockito.when( + mockSearchRetriever.scroll( + Mockito.eq(Collections.singletonList(STRUCTURED_PROPERTY_ENTITY_NAME)), + Mockito.any(Filter.class), + Mockito.eq(null), + Mockito.eq(10))) + .thenReturn(mockResult); + + // Test validation + Stream validationResult = + ShowPropertyAsBadgeValidator.validateSettingsUpserts( + TestMCP.ofOneUpsertItem(TEST_PROPERTY_URN, propertySettings, TEST_REGISTRY), + retrieverContext); + + // Assert no validation exceptions + Assert.assertTrue(validationResult.findAny().isEmpty()); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java index 26e0da8e6fb990..2349dbd169f1d9 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/plugins/SpringStandardPluginConfiguration.java @@ -4,6 +4,8 @@ import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_ENTITY_NAME; import static com.linkedin.metadata.Constants.EXECUTION_REQUEST_RESULT_ASPECT_NAME; import static com.linkedin.metadata.Constants.SCHEMA_METADATA_ASPECT_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME; +import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.hooks.IgnoreUnknownMutator; @@ -15,6 +17,8 @@ import com.linkedin.metadata.aspect.validation.FieldPathValidator; import com.linkedin.metadata.dataproducts.sideeffects.DataProductUnsetSideEffect; import com.linkedin.metadata.schemafields.sideeffects.SchemaFieldSideEffect; +import com.linkedin.metadata.structuredproperties.validation.HidePropertyValidator; +import com.linkedin.metadata.structuredproperties.validation.ShowPropertyAsBadgeValidator; import com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGeneratorRegistry; import com.linkedin.metadata.timeline.eventgenerator.SchemaMetadataChangeEventGenerator; import java.util.List; @@ -149,4 +153,40 @@ public AspectPayloadValidator dataHubExecutionRequestResultValidator() { .build())) .build()); } + + @Bean + public AspectPayloadValidator hidePropertyValidator() { + return new HidePropertyValidator() + .setConfig( + AspectPluginConfig.builder() + .className(HidePropertyValidator.class.getName()) + .enabled(true) + .supportedOperations( + List.of("UPSERT", "UPDATE", "CREATE", "CREATE_ENTITY", "RESTATE", "PATCH")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) + .aspectName(STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME) + .build())) + .build()); + } + + @Bean + public AspectPayloadValidator showPropertyAsAssetBadgeValidator() { + return new ShowPropertyAsBadgeValidator() + .setConfig( + AspectPluginConfig.builder() + .className(ShowPropertyAsBadgeValidator.class.getName()) + .enabled(true) + .supportedOperations( + List.of("UPSERT", "UPDATE", "CREATE", "CREATE_ENTITY", "RESTATE", "PATCH")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) + .aspectName(STRUCTURED_PROPERTY_SETTINGS_ASPECT_NAME) + .build())) + .build()); + } } From 4683bc73a3448cddff99641ccd81bbce05b2b127 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 12 Dec 2024 16:51:08 -0500 Subject: [PATCH 07/10] fix(ingest/snowflake): further improve dot handling (#12110) --- .../src/datahub/ingestion/source/snowflake/snowflake_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py index d8c3075bd921b9..8e0c97aa135e84 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py @@ -119,7 +119,6 @@ def is_dataset_pattern_allowed( ) -> bool: if not dataset_type or not dataset_name: return True - dataset_params = dataset_name.split(".") if dataset_type.lower() not in ( SnowflakeObjectDomain.TABLE, SnowflakeObjectDomain.EXTERNAL_TABLE, @@ -131,6 +130,7 @@ def is_dataset_pattern_allowed( if _is_sys_table(dataset_name): return False + dataset_params = _split_qualified_name(dataset_name) if len(dataset_params) != 3: self.structured_reporter.info( title="Unexpected dataset pattern", From e730afdb6837f43e6abbf43ae27ccb55288103aa Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 12 Dec 2024 16:51:18 -0500 Subject: [PATCH 08/10] feat(ingest): improve query fingerprinting (#12104) --- .../sql_parsing/sql_parsing_aggregator.py | 3 +-- .../src/datahub/sql_parsing/sqlglot_utils.py | 10 +++++++-- .../aggregator_goldens/test_table_rename.json | 14 ++++++------ .../aggregator_goldens/test_table_swap.json | 22 +++++++++---------- .../test_table_swap_with_temp.json | 12 +++++----- .../unit/sql_parsing/test_sqlglot_utils.py | 21 ++++++++++++++++++ 6 files changed, 54 insertions(+), 28 deletions(-) diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index 44f0d7be7aad9a..79ea98d1c7f54e 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -1383,8 +1383,7 @@ def _query_urn(cls, query_id: QueryId) -> str: return QueryUrn(query_id).urn() @classmethod - def _composite_query_id(cls, composed_of_queries: Iterable[QueryId]) -> str: - composed_of_queries = list(composed_of_queries) + def _composite_query_id(cls, composed_of_queries: List[QueryId]) -> str: combined = json.dumps(composed_of_queries) return f"composite_{generate_hash(combined)}" diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py index bd98557e08aacc..57a5cc3c9a6574 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py @@ -121,7 +121,7 @@ def _expression_to_string( # Remove /* */ comments. re.compile(r"/\*.*?\*/", re.DOTALL): "", # Remove -- comments. - re.compile(r"--.*$"): "", + re.compile(r"--.*$", re.MULTILINE): "", # Replace all runs of whitespace with a single space. re.compile(r"\s+"): " ", # Remove leading and trailing whitespace and trailing semicolons. @@ -131,10 +131,16 @@ def _expression_to_string( # Replace anything that looks like a string with a placeholder. re.compile(r"'[^']*'"): "?", # Replace sequences of IN/VALUES with a single placeholder. - re.compile(r"\b(IN|VALUES)\s*\(\?(?:, \?)*\)", re.IGNORECASE): r"\1 (?)", + # The r" ?" makes it more robust to uneven spacing. + re.compile(r"\b(IN|VALUES)\s*\( ?\?(?:, ?\?)* ?\)", re.IGNORECASE): r"\1 (?)", # Normalize parenthesis spacing. re.compile(r"\( "): "(", re.compile(r" \)"): ")", + # Fix up spaces before commas in column lists. + # e.g. "col1 , col2" -> "col1, col2" + # e.g. "col1,col2" -> "col1, col2" + re.compile(r"\b ,"): ",", + re.compile(r"\b,\b"): ", ", } _TABLE_NAME_NORMALIZATION_RULES = { # Replace UUID-like strings with a placeholder (both - and _ variants). diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json index 750b2c4a92fd0b..2d32e1328fbb4f 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_rename.json @@ -133,7 +133,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo_staging,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:a30d42497a737321ece461fa17344c3ba3588fdee736016acb59a00cec955a0c" + "query": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4" } ], "fineGrainedLineages": [ @@ -147,7 +147,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),a)" ], "confidenceScore": 1.0, - "query": "urn:li:query:a30d42497a737321ece461fa17344c3ba3588fdee736016acb59a00cec955a0c" + "query": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4" }, { "upstreamType": "FIELD_SET", @@ -159,7 +159,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),b)" ], "confidenceScore": 1.0, - "query": "urn:li:query:a30d42497a737321ece461fa17344c3ba3588fdee736016acb59a00cec955a0c" + "query": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4" }, { "upstreamType": "FIELD_SET", @@ -171,7 +171,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.foo,PROD),c)" ], "confidenceScore": 1.0, - "query": "urn:li:query:a30d42497a737321ece461fa17344c3ba3588fdee736016acb59a00cec955a0c" + "query": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4" } ] } @@ -179,7 +179,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:a30d42497a737321ece461fa17344c3ba3588fdee736016acb59a00cec955a0c", + "entityUrn": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -202,7 +202,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:a30d42497a737321ece461fa17344c3ba3588fdee736016acb59a00cec955a0c", + "entityUrn": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -229,7 +229,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:a30d42497a737321ece461fa17344c3ba3588fdee736016acb59a00cec955a0c", + "entityUrn": "urn:li:query:88d742bcc0216d6ccb50c7430d1d97494d5dfcfa90160ffa123108844ad261e4", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json index 171a1bd3753e24..af0fca485777ff 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap.json @@ -133,7 +133,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_swap,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:3865108263e5f0670e6506f5747392f8315a72039cbfde1c4be4dd9a71bdd500" + "query": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405" } ], "fineGrainedLineages": [ @@ -147,7 +147,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),a)" ], "confidenceScore": 1.0, - "query": "urn:li:query:3865108263e5f0670e6506f5747392f8315a72039cbfde1c4be4dd9a71bdd500" + "query": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405" }, { "upstreamType": "FIELD_SET", @@ -159,7 +159,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),b)" ], "confidenceScore": 1.0, - "query": "urn:li:query:3865108263e5f0670e6506f5747392f8315a72039cbfde1c4be4dd9a71bdd500" + "query": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405" }, { "upstreamType": "FIELD_SET", @@ -171,7 +171,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),c)" ], "confidenceScore": 1.0, - "query": "urn:li:query:3865108263e5f0670e6506f5747392f8315a72039cbfde1c4be4dd9a71bdd500" + "query": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405" } ] } @@ -179,7 +179,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:3865108263e5f0670e6506f5747392f8315a72039cbfde1c4be4dd9a71bdd500", + "entityUrn": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -202,7 +202,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:3865108263e5f0670e6506f5747392f8315a72039cbfde1c4be4dd9a71bdd500", + "entityUrn": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -229,7 +229,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:3865108263e5f0670e6506f5747392f8315a72039cbfde1c4be4dd9a71bdd500", + "entityUrn": "urn:li:query:b256c8cc8f386b209ef8da55485d46c3fbd471b942f804d370e24350b3087405", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -411,7 +411,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:d29a1c8ed6d4d77efb290260234e5eee56f98311a5631d0a12213798077d1a68" + "query": "urn:li:query:3886d427c84692923797048da6d3991693e89ce44e10d1917c12e8b6fd493904" }, { "auditStamp": { @@ -432,7 +432,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:d29a1c8ed6d4d77efb290260234e5eee56f98311a5631d0a12213798077d1a68", + "entityUrn": "urn:li:query:3886d427c84692923797048da6d3991693e89ce44e10d1917c12e8b6fd493904", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -455,7 +455,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:d29a1c8ed6d4d77efb290260234e5eee56f98311a5631d0a12213798077d1a68", + "entityUrn": "urn:li:query:3886d427c84692923797048da6d3991693e89ce44e10d1917c12e8b6fd493904", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -473,7 +473,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:d29a1c8ed6d4d77efb290260234e5eee56f98311a5631d0a12213798077d1a68", + "entityUrn": "urn:li:query:3886d427c84692923797048da6d3991693e89ce44e10d1917c12e8b6fd493904", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { diff --git a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json index ba83917ca5c1a1..ceaaf8f6887c7c 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json +++ b/metadata-ingestion/tests/unit/sql_parsing/aggregator_goldens/test_table_swap_with_temp.json @@ -133,7 +133,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:composite_5f9c1232994672c5fb7621f8384f6600b6d4ed5acfccc4eb396fb446b3fb1bce" + "query": "urn:li:query:composite_9e36ef19163461d35b618fd1eea2a3f6a5d10a23a979a6d5ef688b31f277abb3" }, { "auditStamp": { @@ -146,7 +146,7 @@ }, "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info_dep,PROD)", "type": "TRANSFORMED", - "query": "urn:li:query:composite_5f9c1232994672c5fb7621f8384f6600b6d4ed5acfccc4eb396fb446b3fb1bce" + "query": "urn:li:query:composite_9e36ef19163461d35b618fd1eea2a3f6a5d10a23a979a6d5ef688b31f277abb3" } ], "fineGrainedLineages": [ @@ -161,7 +161,7 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,dev.public.person_info,PROD),a)" ], "confidenceScore": 1.0, - "query": "urn:li:query:composite_5f9c1232994672c5fb7621f8384f6600b6d4ed5acfccc4eb396fb446b3fb1bce" + "query": "urn:li:query:composite_9e36ef19163461d35b618fd1eea2a3f6a5d10a23a979a6d5ef688b31f277abb3" } ] } @@ -169,7 +169,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:composite_5f9c1232994672c5fb7621f8384f6600b6d4ed5acfccc4eb396fb446b3fb1bce", + "entityUrn": "urn:li:query:composite_9e36ef19163461d35b618fd1eea2a3f6a5d10a23a979a6d5ef688b31f277abb3", "changeType": "UPSERT", "aspectName": "queryProperties", "aspect": { @@ -192,7 +192,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:composite_5f9c1232994672c5fb7621f8384f6600b6d4ed5acfccc4eb396fb446b3fb1bce", + "entityUrn": "urn:li:query:composite_9e36ef19163461d35b618fd1eea2a3f6a5d10a23a979a6d5ef688b31f277abb3", "changeType": "UPSERT", "aspectName": "querySubjects", "aspect": { @@ -219,7 +219,7 @@ }, { "entityType": "query", - "entityUrn": "urn:li:query:composite_5f9c1232994672c5fb7621f8384f6600b6d4ed5acfccc4eb396fb446b3fb1bce", + "entityUrn": "urn:li:query:composite_9e36ef19163461d35b618fd1eea2a3f6a5d10a23a979a6d5ef688b31f277abb3", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py index 4e8ba8aa6b7770..dbe24ade6944f6 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py @@ -73,6 +73,12 @@ class QueryGeneralizationTestMode(Enum): "SELECT * FROM foo", QueryGeneralizationTestMode.BOTH, ), + ( + "SELECT a\n -- comment--\n,b --another comment\n FROM books", + "redshift", + "SELECT a, b FROM books", + QueryGeneralizationTestMode.BOTH, + ), # Parameter normalization. ( "UPDATE \"books\" SET page_count = page_count + 1, author_count = author_count + 1 WHERE book_title = 'My New Book'", @@ -105,6 +111,21 @@ class QueryGeneralizationTestMode(Enum): "INSERT INTO MyTable (Column1, Column2, Column3) VALUES (?)", QueryGeneralizationTestMode.BOTH, ), + ( + # Uneven spacing within the IN clause. + "SELECT * FROM books WHERE zip_code IN (123,345, 423 )", + "redshift", + "SELECT * FROM books WHERE zip_code IN (?)", + QueryGeneralizationTestMode.BOTH, + ), + # Uneven spacing in the column list. + # This isn't perfect e.g. we still have issues with function calls inside selects. + ( + "SELECT a\n ,b FROM books", + "redshift", + "SELECT a, b FROM books", + QueryGeneralizationTestMode.BOTH, + ), ( textwrap.dedent( """\ From bc4c7c633e04a36d64118d48910b313e0e2a889e Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 12 Dec 2024 16:51:24 -0500 Subject: [PATCH 09/10] docs(ingest): add docs on the SQL parser (#12103) --- docs-website/generateDocsDir.ts | 8 +++- docs-website/sidebars.js | 1 + docs/lineage/airflow.md | 2 +- docs/lineage/sql_parsing.md | 63 ++++++++++++++++++++++++++++ metadata-ingestion/scripts/docgen.py | 4 +- 5 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 docs/lineage/sql_parsing.md diff --git a/docs-website/generateDocsDir.ts b/docs-website/generateDocsDir.ts index 032e912c7190e1..0f7e347da64eba 100644 --- a/docs-website/generateDocsDir.ts +++ b/docs-website/generateDocsDir.ts @@ -284,6 +284,10 @@ function markdown_add_slug( // ); // } +function trim_anchor_link(url: string): string { + return url.replace(/#.+$/, ""); +} + function new_url(original: string, filepath: string): string { if (original.toLowerCase().startsWith(HOSTED_SITE_URL)) { // For absolute links to the hosted docs site, we transform them into local ones. @@ -313,7 +317,7 @@ function new_url(original: string, filepath: string): string { } // Now we assume this is a local reference. - const suffix = path.extname(original); + const suffix = path.extname(trim_anchor_link(original)); if ( suffix == "" || [ @@ -335,7 +339,7 @@ function new_url(original: string, filepath: string): string { // A reference to a file or directory in the Github repo. const relation = path.dirname(filepath); const updated_path = path.normalize(`${relation}/${original}`); - const check_path = updated_path.replace(/#.+$/, ""); + const check_path = trim_anchor_link(updated_path); if ( !fs.existsSync(`../${check_path}`) && actually_in_sidebar(filepath) && diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 2f1ac047720978..c18d8671318f64 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -541,6 +541,7 @@ module.exports = { }, "docs/platform-instances", + "docs/lineage/sql_parsing", "metadata-ingestion/docs/dev_guides/stateful", "metadata-ingestion/docs/dev_guides/classification", "metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source", diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md index 2bd58334933fb7..72b5cbf57592d3 100644 --- a/docs/lineage/airflow.md +++ b/docs/lineage/airflow.md @@ -164,7 +164,7 @@ Only the v2 plugin supports automatic lineage extraction. If you're using the v1 To automatically extract lineage information, the v2 plugin builds on top of Airflow's built-in [OpenLineage extractors](https://openlineage.io/docs/integrations/airflow/default-extractors). As such, we support a superset of the default operators that Airflow/OpenLineage supports. -The SQL-related extractors have been updated to use [DataHub's SQL lineage parser](https://blog.datahubproject.io/extracting-column-level-lineage-from-sql-779b8ce17567), which is more robust than the built-in one and uses DataHub's metadata information to generate column-level lineage. +The SQL-related extractors have been updated to use [DataHub's SQL lineage parser](./sql_parsing.md), which is more robust than the built-in one and uses DataHub's metadata information to generate column-level lineage. Supported operators: diff --git a/docs/lineage/sql_parsing.md b/docs/lineage/sql_parsing.md new file mode 100644 index 00000000000000..76161de0040f4c --- /dev/null +++ b/docs/lineage/sql_parsing.md @@ -0,0 +1,63 @@ +--- +title: SQL Parsing +--- + +# The DataHub SQL Parser + +Many data platforms are built on top of SQL, which means deeply understanding SQL queries is critical for understanding column-level lineage, usage, and more. + +DataHub's SQL parser is built on top of [sqlglot](https://github.com/tobymao/sqlglot) and adds a number of additional features to improve the accuracy of SQL parsing. + +In our benchmarks, the DataHub SQL parser generates lineage with 97-99% accuracy and outperforms other SQL parsers by a wide margin. + +We've published a blog post on some of the technical details of the parser: [Extracting Column Lineage from SQL Queries](https://blog.datahubproject.io/extracting-column-level-lineage-from-sql-779b8ce17567). + +## Built-in SQL Parsing Support + +If you're using a tool that DataHub already [integrates with](https://datahubproject.io/integrations), check the documentation for that specific integration. +Most of our integrations, including Snowflake, BigQuery, Redshift, dbt, Looker, PowerBI, Airflow, etc, use the SQL parser to generate column-level lineage and usage statistics. + +If you’re using a different database system for which we don’t support column-level lineage out of the box, but you do have a database query log available, the [SQL queries](../generated/ingestion/sources/sql-queries.md) connector can generate column-level lineage and table/column usage statistics from the query log. + +## SDK Support + +Our SDK provides a [`DataHubGraph.parse_sql_lineage()`](../../python-sdk/clients.md#datahub.ingestion.graph.client.DataHubGraph.parse_sql_lineage) method for programmatically parsing SQL queries. + +The resulting object contains a `sql_parsing_result.debug_info.confidence_score` field, which is a 0-1 value indicating the confidence of the parser. + +There are also a number of utilities in the `datahub.sql_parsing` module. The `SqlParsingAggregator` is particularly useful, as it can also resolve lineage across temp tables and table renames/swaps. +Note that these utilities are not officially part of the DataHub SDK and hence do not have the same level of stability and support as the rest of the SDK. + +## Capabilities + +### Supported + +- Table-level lineage for `SELECT`, `CREATE`, `INSERT`, `UPDATE`, `DELETE`, and `MERGE` statements +- Column-level lineage for `SELECT` (including `SELECT INTO`), `CREATE VIEW`, `CREATE TABLE AS SELECT` (CTAS), `INSERT`, and `UPDATE` statements +- Subqueries +- CTEs +- `UNION ALL` constructs - will merge lineage across the clauses of the `UNION` +- `SELECT *` and similar expressions will automatically be expanded with the table schemas registered in DataHub. This includes support for platform instances. +- Automatic handling for systems where table and column names are case insensitive. Generally requires that `convert_urns_to_lowercase` is enabled when the corresponding table schemas were ingested into DataHub. + - Specifically, we'll do fuzzy matching against the table names and schemas to resolve the correct URNs. We do not support having multiple tables/columns that only differ in casing. +- For BigQuery, sharded table suffixes will automatically be normalized. For example, `proj.dataset.table_20230616` will be normalized to `proj.dataset.table_yyyymmdd`. This matches the behavior of our BigQuery ingestion connector, and hence will result in lineage linking up correctly. + +### Not supported + +- Scalar `UDFs` - We will generate lineage pointing at the columns that are inputs to the UDF, but will not be able to understand the UDF itself. +- Tabular `UDFs` +- `json_extract` and similar functions +- `UNNEST` - We will do a best-effort job, but cannot reliably generate column-level lineage in the presence of `UNNEST` constructs. +- Structs - We will do a best-effort attempt to resolve struct subfields, but it is not guaranteed. This will only impact column-level lineage. +- Snowflake's multi-table inserts +- Multi-statement SQL / SQL scripting + +### Limitations + +- We only support the 20+ SQL dialects supported by the underlying [sqlglot](https://github.com/tobymao/sqlglot) library. +- There's a few SQL syntaxes that we don't support yet, but intend to support in the future. + - `INSERT INTO (col1_new, col2_new) SELECT col1_old, col2_old FROM ...`. We only support `INSERT INTO` statements that either (1) don't specify a column list, or (2) specify a column list that matches the columns in the `SELECT` clause. + - `MERGE INTO` statements - We don't generate column-level lineage for these. +- In cases where the table schema information in DataHub is outdated or otherwise incorrect, we may not be able to generate accurate column-level lineage. +- We trip over BigQuery queries that use the `_partitiontime` and `_partitiondate` pseudo-columns with a table name prefix e.g. `my_table._partitiontime` fails. However, unqualified references like `_partitiontime` and `_partitiondate` will be fine. +- We do not consider columns referenced in `WHERE`, `GROUP BY`, `ORDER BY`, etc. clauses to be part of lineage. For example, `SELECT col1, col2 FROM upstream_table WHERE col3 = 3` will not generate any lineage related to `col3`. diff --git a/metadata-ingestion/scripts/docgen.py b/metadata-ingestion/scripts/docgen.py index 797a2f698c2f40..402cd8a8141990 100644 --- a/metadata-ingestion/scripts/docgen.py +++ b/metadata-ingestion/scripts/docgen.py @@ -918,7 +918,7 @@ def generate(

-By default, The UI shows the latest version of the lineage. The time picker can be used to filter out edges within the latest version to exclude those that were last updated outside of the time window. Selecting time windows in the patch will not show you historical lineages. It will only filter the view of the latest version of the lineage. +By default, the UI shows the latest version of the lineage. The time picker can be used to filter out edges within the latest version to exclude those that were last updated outside of the time window. Selecting time windows in the patch will not show you historical lineages. It will only filter the view of the latest version of the lineage.

@@ -969,7 +969,7 @@ def generate( ## Lineage Support DataHub supports **[automatic table- and column-level lineage detection](#automatic-lineage-extraction-support)** from BigQuery, Snowflake, dbt, Looker, PowerBI, and 20+ modern data tools. -For data tools with limited native lineage tracking, **DataHub's SQL Parser** detects lineage with 97–99% accuracy, ensuring teams will have high quality lineage graphs across all corners of their data stack. +For data tools with limited native lineage tracking, [**DataHub's SQL Parser**](../../lineage/sql_parsing.md) detects lineage with 97-99% accuracy, ensuring teams will have high quality lineage graphs across all corners of their data stack. ### Types of Lineage Connections From eee49b3cb889c11d34d1f0090269390c20ce3900 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 13 Dec 2024 11:47:49 +0530 Subject: [PATCH 10/10] fix(ui): dereference issues (#12109) --- .../profile/schema/components/StructuredPropValues.tsx | 4 ++-- .../containers/profile/header/StructuredPropertyBadge.tsx | 8 ++++---- .../SidebarStructuredPropsSection.tsx | 2 +- .../src/app/entity/shared/entityForm/Form.tsx | 2 +- .../tabs/Properties/Edit/EditStructuredPropertyModal.tsx | 2 +- .../govern/structuredProperties/StructuredProperties.tsx | 2 +- .../govern/structuredProperties/ViewAdvancedOptions.tsx | 2 +- .../structuredProperties/ViewStructuredPropsDrawer.tsx | 4 ++-- .../source/executions/ExecutionRequestDetailsModal.tsx | 2 +- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/datahub-web-react/src/app/entity/dataset/profile/schema/components/StructuredPropValues.tsx b/datahub-web-react/src/app/entity/dataset/profile/schema/components/StructuredPropValues.tsx index 4cba36b9375dbc..91379ce97fe22f 100644 --- a/datahub-web-react/src/app/entity/dataset/profile/schema/components/StructuredPropValues.tsx +++ b/datahub-web-react/src/app/entity/dataset/profile/schema/components/StructuredPropValues.tsx @@ -24,11 +24,11 @@ const StructuredPropValues = ({ schemaFieldEntity, propColumn }: Props) => { const entityRegistry = useEntityRegistry(); const property = schemaFieldEntity.structuredProperties?.properties?.find( - (prop) => prop.structuredProperty.urn === propColumn?.entity.urn, + (prop) => prop.structuredProperty.urn === propColumn?.entity?.urn, ); const propRow = property ? mapStructuredPropertyToPropertyRow(property) : undefined; const values = propRow?.values; - const isRichText = propRow?.dataType?.info.type === StdDataType.RichText; + const isRichText = propRow?.dataType?.info?.type === StdDataType.RichText; const hasMoreValues = values && values.length > 2; const displayedValues = hasMoreValues ? values.slice(0, 1) : values; diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/StructuredPropertyBadge.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/StructuredPropertyBadge.tsx index 64953928990ee5..fc7892f6ba6cca 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/header/StructuredPropertyBadge.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/StructuredPropertyBadge.tsx @@ -41,8 +41,8 @@ const StructuredPropertyBadge = ({ structuredProperties }: Props) => { if (!badgeStructuredProperty) return null; - const propertyValue = propRow?.values[0].value; - const relatedDescription = propRow?.structuredProperty.definition.allowedValues?.find( + const propertyValue = propRow?.values[0]?.value; + const relatedDescription = propRow?.structuredProperty?.definition?.allowedValues?.find( (v) => getStructuredPropertyValue(v.value) === propertyValue, )?.description; @@ -56,7 +56,7 @@ const StructuredPropertyBadge = ({ structuredProperties }: Props) => { Value - {propRow?.values[0].value} + {propRow?.values[0]?.value} {relatedDescription && ( @@ -79,7 +79,7 @@ const StructuredPropertyBadge = ({ structuredProperties }: Props) => { > { property, currentProperties, ); - const isRichText = propertyRow?.dataType?.info.type === StdDataType.RichText; + const isRichText = propertyRow?.dataType?.info?.type === StdDataType.RichText; const values = propertyRow?.values; const hasMultipleValues = values && values.length > 1; const propertyName = getDisplayName(property.entity as StructuredPropertyEntity); diff --git a/datahub-web-react/src/app/entity/shared/entityForm/Form.tsx b/datahub-web-react/src/app/entity/shared/entityForm/Form.tsx index 05fd91d4680ac7..b5f3cd770078a0 100644 --- a/datahub-web-react/src/app/entity/shared/entityForm/Form.tsx +++ b/datahub-web-react/src/app/entity/shared/entityForm/Form.tsx @@ -57,7 +57,7 @@ function Form({ formUrn }: Props) { const title = formAssociation?.form?.info?.name; const associatedUrn = formAssociation?.associatedUrn; const description = formAssociation?.form?.info?.description; - const owners = formAssociation?.form.ownership?.owners; + const owners = formAssociation?.form?.ownership?.owners; return ( diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditStructuredPropertyModal.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditStructuredPropertyModal.tsx index 1714f0d1872e37..92691b1ad2239a 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditStructuredPropertyModal.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditStructuredPropertyModal.tsx @@ -99,7 +99,7 @@ export default function EditStructuredPropertyModal({ return ( { const searchAcrossEntities = data?.searchAcrossEntities; const noOfProperties = searchAcrossEntities?.searchResults?.length; - const badgeProperty = searchAcrossEntities?.searchResults.find( + const badgeProperty = searchAcrossEntities?.searchResults?.find( (prop) => (prop.entity as StructuredPropertyEntity).settings?.showAsAssetBadge, )?.entity; diff --git a/datahub-web-react/src/app/govern/structuredProperties/ViewAdvancedOptions.tsx b/datahub-web-react/src/app/govern/structuredProperties/ViewAdvancedOptions.tsx index 1f08995e237ec5..25f1d672390426 100644 --- a/datahub-web-react/src/app/govern/structuredProperties/ViewAdvancedOptions.tsx +++ b/datahub-web-react/src/app/govern/structuredProperties/ViewAdvancedOptions.tsx @@ -32,7 +32,7 @@ const ViewAdvancedOptions = ({ propEntity }: Props) => { {propEntity && ( Qualified Name - {propEntity?.definition.qualifiedName} + {propEntity?.definition?.qualifiedName} )} diff --git a/datahub-web-react/src/app/govern/structuredProperties/ViewStructuredPropsDrawer.tsx b/datahub-web-react/src/app/govern/structuredProperties/ViewStructuredPropsDrawer.tsx index 2fd36a29c8e760..bc91a90989d2c8 100644 --- a/datahub-web-react/src/app/govern/structuredProperties/ViewStructuredPropsDrawer.tsx +++ b/datahub-web-react/src/app/govern/structuredProperties/ViewStructuredPropsDrawer.tsx @@ -40,9 +40,9 @@ const ViewStructuredPropsDrawer = ({ const selectedPropEntity = selectedProperty && (selectedProperty?.entity as StructuredPropertyEntity); - const allowedValues = selectedPropEntity?.definition.allowedValues; + const allowedValues = selectedPropEntity?.definition?.allowedValues; - const allowedTypes = selectedPropEntity?.definition.typeQualifier?.allowedTypes; + const allowedTypes = selectedPropEntity?.definition?.typeQualifier?.allowedTypes; const propType = getValueTypeLabel( selectedPropEntity.definition.valueType.urn, diff --git a/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx b/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx index f624d7e6ca796f..a7e6f516bb7943 100644 --- a/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx +++ b/datahub-web-react/src/app/ingest/source/executions/ExecutionRequestDetailsModal.tsx @@ -156,7 +156,7 @@ export const ExecutionDetailsModal = ({ urn, open, onClose }: Props) => { (status && {getExecutionRequestSummaryText(status)}) || undefined; - const recipeJson = data?.executionRequest?.input.arguments?.find((arg) => arg.key === 'recipe')?.value; + const recipeJson = data?.executionRequest?.input?.arguments?.find((arg) => arg.key === 'recipe')?.value; let recipeYaml: string; try { recipeYaml = recipeJson && YAML.stringify(JSON.parse(recipeJson), 8, 2).trim();