From 41f7fc9949967d0fc871adf875ba35c2def09856 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Mon, 25 Nov 2024 09:49:16 -0500 Subject: [PATCH 1/6] fix(ui/graphql) Handle groups in institutionalMemory aspect (#11934) --- .../datahub/graphql/GmsGraphQLEngine.java | 25 +++++++++++---- .../resolvers/type/ResolvedActorResolver.java | 25 +++++++++++++++ .../InstitutionalMemoryMetadataMapper.java | 1 + .../common/mappers/ResolvedActorMapper.java | 31 +++++++++++++++++++ .../src/main/resources/entity.graphql | 10 +++++- datahub-web-react/src/Mocks.tsx | 6 ++++ .../containers/profile/sidebar/LinkButton.tsx | 2 +- .../Documentation/components/LinkList.tsx | 8 ++--- .../src/graphql-mock/mutationHelper.ts | 1 + datahub-web-react/src/graphql/domain.graphql | 5 ++- .../src/graphql/fragments.graphql | 16 ++++++++-- 11 files changed, 111 insertions(+), 19 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/type/ResolvedActorResolver.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/ResolvedActorMapper.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 5b265b6714452..d1da55268a50d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -63,6 +63,7 @@ import com.linkedin.datahub.graphql.generated.Domain; import com.linkedin.datahub.graphql.generated.ERModelRelationship; import com.linkedin.datahub.graphql.generated.ERModelRelationshipProperties; +import com.linkedin.datahub.graphql.generated.Entity; import com.linkedin.datahub.graphql.generated.EntityPath; import com.linkedin.datahub.graphql.generated.EntityRelationship; import com.linkedin.datahub.graphql.generated.EntityRelationshipLegacy; @@ -312,6 +313,7 @@ import com.linkedin.datahub.graphql.resolvers.type.HyperParameterValueTypeResolver; import com.linkedin.datahub.graphql.resolvers.type.PlatformSchemaUnionTypeResolver; import com.linkedin.datahub.graphql.resolvers.type.PropertyValueResolver; +import com.linkedin.datahub.graphql.resolvers.type.ResolvedActorResolver; import com.linkedin.datahub.graphql.resolvers.type.ResultsTypeResolver; import com.linkedin.datahub.graphql.resolvers.type.TimeSeriesAspectInterfaceTypeResolver; import com.linkedin.datahub.graphql.resolvers.user.CreateNativeUserResetTokenResolver; @@ -1730,12 +1732,22 @@ private void configureDatasetResolvers(final RuntimeWiring.Builder builder) { .type( "InstitutionalMemoryMetadata", typeWiring -> - typeWiring.dataFetcher( - "author", - new LoadableTypeResolver<>( - corpUserType, - (env) -> - ((InstitutionalMemoryMetadata) env.getSource()).getAuthor().getUrn()))) + typeWiring + .dataFetcher( + "author", + new LoadableTypeResolver<>( + corpUserType, + (env) -> + ((InstitutionalMemoryMetadata) env.getSource()) + .getAuthor() + .getUrn())) + .dataFetcher( + "actor", + new EntityTypeResolver( + this.entityTypes, + (env) -> + (Entity) + ((InstitutionalMemoryMetadata) env.getSource()).getActor()))) .type( "DatasetStatsSummary", typeWiring -> @@ -2242,6 +2254,7 @@ private void configureTypeResolvers(final RuntimeWiring.Builder builder) { "HyperParameterValueType", typeWiring -> typeWiring.typeResolver(new HyperParameterValueTypeResolver())) .type("PropertyValue", typeWiring -> typeWiring.typeResolver(new PropertyValueResolver())) + .type("ResolvedActor", typeWiring -> typeWiring.typeResolver(new ResolvedActorResolver())) .type("Aspect", typeWiring -> typeWiring.typeResolver(new AspectInterfaceTypeResolver())) .type( "TimeSeriesAspect", diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/type/ResolvedActorResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/type/ResolvedActorResolver.java new file mode 100644 index 0000000000000..7ae719a23b00a --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/type/ResolvedActorResolver.java @@ -0,0 +1,25 @@ +package com.linkedin.datahub.graphql.resolvers.type; + +import com.linkedin.datahub.graphql.generated.CorpGroup; +import com.linkedin.datahub.graphql.generated.CorpUser; +import graphql.TypeResolutionEnvironment; +import graphql.schema.GraphQLObjectType; +import graphql.schema.TypeResolver; + +public class ResolvedActorResolver implements TypeResolver { + + public static final String CORP_USER = "CorpUser"; + public static final String CORP_GROUP = "CorpGroup"; + + @Override + public GraphQLObjectType getType(TypeResolutionEnvironment env) { + if (env.getObject() instanceof CorpUser) { + return env.getSchema().getObjectType(CORP_USER); + } else if (env.getObject() instanceof CorpGroup) { + return env.getSchema().getObjectType(CORP_GROUP); + } else { + throw new RuntimeException( + "Unrecognized object type provided to type resolver, Type:" + env.getObject().toString()); + } + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/InstitutionalMemoryMetadataMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/InstitutionalMemoryMetadataMapper.java index 7c6de02ecc876..9781643c414c8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/InstitutionalMemoryMetadataMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/InstitutionalMemoryMetadataMapper.java @@ -28,6 +28,7 @@ public InstitutionalMemoryMetadata apply( result.setDescription(input.getDescription()); // deprecated field result.setLabel(input.getDescription()); result.setAuthor(getAuthor(input.getCreateStamp().getActor().toString())); + result.setActor(ResolvedActorMapper.map(input.getCreateStamp().getActor())); result.setCreated(AuditStampMapper.map(context, input.getCreateStamp())); result.setAssociatedUrn(entityUrn.toString()); return result; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/ResolvedActorMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/ResolvedActorMapper.java new file mode 100644 index 0000000000000..c00ffd0b828b1 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/ResolvedActorMapper.java @@ -0,0 +1,31 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.CorpGroup; +import com.linkedin.datahub.graphql.generated.CorpUser; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.ResolvedActor; +import com.linkedin.metadata.Constants; +import javax.annotation.Nonnull; + +public class ResolvedActorMapper { + + public static final ResolvedActorMapper INSTANCE = new ResolvedActorMapper(); + + public static ResolvedActor map(@Nonnull final Urn actorUrn) { + return INSTANCE.apply(actorUrn); + } + + public ResolvedActor apply(@Nonnull final Urn actorUrn) { + if (actorUrn.getEntityType().equals(Constants.CORP_GROUP_ENTITY_NAME)) { + CorpGroup partialGroup = new CorpGroup(); + partialGroup.setUrn(actorUrn.toString()); + partialGroup.setType(EntityType.CORP_GROUP); + return partialGroup; + } + CorpUser partialUser = new CorpUser(); + partialUser.setUrn(actorUrn.toString()); + partialUser.setType(EntityType.CORP_USER); + return (ResolvedActor) partialUser; + } +} diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 732a782139b61..049527e5d77e3 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -3005,8 +3005,14 @@ type InstitutionalMemoryMetadata { """ The author of this metadata + Deprecated! Use actor instead for users or groups. """ - author: CorpUser! + author: CorpUser! @deprecated(reason: "Use `actor`") + + """ + The author of this metadata + """ + actor: ResolvedActor! """ An AuditStamp corresponding to the creation of this resource @@ -3834,6 +3840,8 @@ enum CorpUserStatus { ACTIVE } +union ResolvedActor = CorpUser | CorpGroup + """ A DataHub User entity, which represents a Person on the Metadata Entity Graph """ diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index aed672a34e7ca..329d6250e576a 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -566,6 +566,12 @@ export const dataset3 = { username: 'datahub', type: EntityType.CorpUser, }, + actor: { + __typename: 'CorpUser', + urn: 'urn:li:corpuser:datahub', + username: 'datahub', + type: EntityType.CorpUser, + }, description: 'This only points to Google', label: 'This only points to Google', created: { diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/LinkButton.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/LinkButton.tsx index 0ce3c9641d559..c3896baedace7 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/LinkButton.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/LinkButton.tsx @@ -29,7 +29,7 @@ export default function LinkButton({ link }: Props) { href={link.url} target="_blank" rel="noreferrer" - key={`${link.label}-${link.url}-${link.author}`} + key={`${link.label}-${link.url}-${link.actor.urn}`} > {link.description || link.label} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx index 7212198bbf61c..6eb680785599e 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx @@ -3,7 +3,7 @@ import { Link } from 'react-router-dom'; import styled from 'styled-components/macro'; import { message, Button, List, Typography, Modal, Form, Input } from 'antd'; import { LinkOutlined, DeleteOutlined, EditOutlined } from '@ant-design/icons'; -import { EntityType, InstitutionalMemoryMetadata } from '../../../../../../types.generated'; +import { InstitutionalMemoryMetadata } from '../../../../../../types.generated'; import { useEntityData, useMutationUrn } from '../../../EntityContext'; import { useEntityRegistry } from '../../../../../useEntityRegistry'; import { ANTD_GRAY } from '../../../constants'; @@ -182,10 +182,8 @@ export const LinkList = ({ refetch }: LinkListProps) => { description={ <> Added {formatDateString(link.created.time)} by{' '} - - {link.author.username} + + {entityRegistry.getDisplayName(link.actor.type, link.actor)} } diff --git a/datahub-web-react/src/graphql-mock/mutationHelper.ts b/datahub-web-react/src/graphql-mock/mutationHelper.ts index a97b41b53bc65..0cf4f5f87f29c 100644 --- a/datahub-web-react/src/graphql-mock/mutationHelper.ts +++ b/datahub-web-react/src/graphql-mock/mutationHelper.ts @@ -99,6 +99,7 @@ export const updateEntityLink = ({ entity, institutionalMemory }: UpdateEntityLi description: e.description as string, label: e.description as string, author: { urn: e.author, username: '', type: EntityType.CorpUser }, + actor: { urn: e.author, username: '', type: EntityType.CorpUser }, created: { time: Date.now(), actor: getActor(), __typename: 'AuditStamp' }, associatedUrn: dataEntity.urn, }; diff --git a/datahub-web-react/src/graphql/domain.graphql b/datahub-web-react/src/graphql/domain.graphql index 3897a2ced85b8..2e96a78b0f44b 100644 --- a/datahub-web-react/src/graphql/domain.graphql +++ b/datahub-web-react/src/graphql/domain.graphql @@ -19,9 +19,8 @@ query getDomain($urn: String!) { institutionalMemory { elements { url - author { - urn - username + actor { + ...resolvedActorFields } description created { diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 7ce4082c42f61..67dbdbbb22f30 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -202,12 +202,22 @@ fragment embedFields on Embed { renderUrl } +fragment resolvedActorFields on ResolvedActor { + ... on CorpUser { + urn + ...entityDisplayNameFields + } + ... on CorpGroup { + urn + ...entityDisplayNameFields + } +} + fragment institutionalMemoryFields on InstitutionalMemory { elements { url - author { - urn - username + actor { + ...resolvedActorFields } description created { From 5da39044e3350cc2da1258219b9a653b88174654 Mon Sep 17 00:00:00 2001 From: Pinaki Bhattacharjee Date: Mon, 25 Nov 2024 23:43:47 +0530 Subject: [PATCH 2/6] fix(browseDAO): Handle null browse path from ES in BrowseDAO (#11875) --- .../metadata/search/elasticsearch/query/ESBrowseDAO.java | 6 +++++- .../linkedin/metadata/search/query/BrowseDAOTest.java | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index 61bba11098fae..35f133cc794f2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -36,6 +36,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -425,7 +426,10 @@ public List getBrowsePaths( if (!sourceMap.containsKey(BROWSE_PATH)) { return Collections.emptyList(); } - return (List) sourceMap.get(BROWSE_PATH); + List browsePaths = + ((List) sourceMap.get(BROWSE_PATH)) + .stream().filter(Objects::nonNull).collect(Collectors.toList()); + return browsePaths; } public BrowseResultV2 browseV2( diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java index e71865921678b..2ee96dd101da5 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/BrowseDAOTest.java @@ -102,5 +102,14 @@ public void testGetBrowsePath() throws Exception { List browsePaths = browseDAO.getBrowsePaths(opContext, "dataset", dummyUrn); assertEquals(browsePaths.size(), 1); assertEquals(browsePaths.get(0), "foo"); + + // Test the case of null browsePaths field + sourceMap.put("browsePaths", Collections.singletonList(null)); + when(mockSearchHit.getSourceAsMap()).thenReturn(sourceMap); + when(mockSearchHits.getHits()).thenReturn(new SearchHit[] {mockSearchHit}); + when(mockSearchResponse.getHits()).thenReturn(mockSearchHits); + when(mockClient.search(any(), eq(RequestOptions.DEFAULT))).thenReturn(mockSearchResponse); + List nullBrowsePaths = browseDAO.getBrowsePaths(opContext, "dataset", dummyUrn); + assertEquals(nullBrowsePaths.size(), 0); } } From 418259b8c4d48c9607efd4a3671fa08bde955816 Mon Sep 17 00:00:00 2001 From: JaeMoo Han Date: Tue, 26 Nov 2024 05:13:26 +0900 Subject: [PATCH 3/6] fix(airflow): remove trino to presto mapping (#11925) Co-authored-by: Harshal Sheth --- .../src/datahub_airflow_plugin/_datahub_ol_adapter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py index 7d35791bf1db4..69de61aced0a5 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_ol_adapter.py @@ -8,7 +8,6 @@ OL_SCHEME_TWEAKS = { "sqlserver": "mssql", - "trino": "presto", "awsathena": "athena", } From 90fe14aadbd12e457229e1927e8e213e8cc80959 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Tue, 26 Nov 2024 01:44:10 +0530 Subject: [PATCH 4/6] docs(ingest/bigquery): add partition support capability (#11940) --- .../src/datahub/ingestion/source/bigquery_v2/bigquery.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 76c2fbf48ccab..16a5268a2dea7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -95,6 +95,10 @@ def cleanup(config: BigQueryV2Config) -> None: "Optionally enabled via `classification.enabled`", supported=True, ) +@capability( + SourceCapability.PARTITION_SUPPORT, + "Enabled by default, partition keys and clustering keys are supported.", +) class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource): def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): super().__init__(config, ctx) From 32ef3894400ac3aaac6f9d5b780aa58a9f6fb48c Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Mon, 25 Nov 2024 21:20:02 -0600 Subject: [PATCH 5/6] fix(dataProduct): reduce write fan-out for unset side effect (#11951) --- .../DataProductUnsetSideEffect.java | 61 +++++----- .../DataProductUnsetSideEffectTest.java | 107 ++++++++++++++++++ 2 files changed, 141 insertions(+), 27 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffect.java b/metadata-io/src/main/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffect.java index 544040d14f8b7..9c4bb52f014fc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffect.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffect.java @@ -27,6 +27,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; @@ -70,6 +71,7 @@ private static Stream generatePatchRemove( log.error("Unable to process data product properties for urn: {}", mclItem.getUrn()); return Stream.empty(); } + Map> patchOpMap = new HashMap<>(); for (DataProductAssociation dataProductAssociation : Optional.ofNullable(dataProductProperties.getAssets()) .orElse(new DataProductAssociationArray())) { @@ -93,40 +95,45 @@ private static Stream generatePatchRemove( if (!result.getEntities().isEmpty()) { for (RelatedEntities entity : result.getEntities()) { if (!mclItem.getUrn().equals(UrnUtils.getUrn(entity.getSourceUrn()))) { - EntitySpec entitySpec = - retrieverContext - .getAspectRetriever() - .getEntityRegistry() - .getEntitySpec(DATA_PRODUCT_ENTITY_NAME); GenericJsonPatch.PatchOp patchOp = new GenericJsonPatch.PatchOp(); patchOp.setOp(PatchOperationType.REMOVE.getValue()); patchOp.setPath(String.format("/assets/%s", entity.getDestinationUrn())); - mcpItems.add( - PatchItemImpl.builder() - .urn(UrnUtils.getUrn(entity.getSourceUrn())) - .entitySpec( - retrieverContext - .getAspectRetriever() - .getEntityRegistry() - .getEntitySpec(DATA_PRODUCT_ENTITY_NAME)) - .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) - .aspectSpec(entitySpec.getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)) - .patch( - GenericJsonPatch.builder() - .arrayPrimaryKeys( - Map.of( - DataProductPropertiesTemplate.ASSETS_FIELD_NAME, - List.of(DataProductPropertiesTemplate.KEY_FIELD_NAME))) - .patch(List.of(patchOp)) - .build() - .getJsonPatch()) - .auditStamp(mclItem.getAuditStamp()) - .systemMetadata(mclItem.getSystemMetadata()) - .build(retrieverContext.getAspectRetriever().getEntityRegistry())); + patchOpMap + .computeIfAbsent(entity.getSourceUrn(), urn -> new ArrayList<>()) + .add(patchOp); } } } } + for (String urn : patchOpMap.keySet()) { + EntitySpec entitySpec = + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME); + mcpItems.add( + PatchItemImpl.builder() + .urn(UrnUtils.getUrn(urn)) + .entitySpec( + retrieverContext + .getAspectRetriever() + .getEntityRegistry() + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME)) + .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) + .aspectSpec(entitySpec.getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)) + .patch( + GenericJsonPatch.builder() + .arrayPrimaryKeys( + Map.of( + DataProductPropertiesTemplate.ASSETS_FIELD_NAME, + List.of(DataProductPropertiesTemplate.KEY_FIELD_NAME))) + .patch(patchOpMap.get(urn)) + .build() + .getJsonPatch()) + .auditStamp(mclItem.getAuditStamp()) + .systemMetadata(mclItem.getSystemMetadata()) + .build(retrieverContext.getAspectRetriever().getEntityRegistry())); + } return mcpItems.stream(); } return Stream.empty(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java index 1151014bf1162..12dd57f94da23 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/dataproducts/sideeffects/DataProductUnsetSideEffectTest.java @@ -34,6 +34,8 @@ import com.linkedin.metadata.utils.AuditStampUtils; import com.linkedin.test.metadata.aspect.TestEntityRegistry; import io.datahubproject.metadata.context.RetrieverContext; +import jakarta.json.JsonArray; +import jakarta.json.JsonObject; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -251,6 +253,111 @@ public void testDPRemoveOld() { .build(mockAspectRetriever.getEntityRegistry()))); } + @Test + public void testBulkAssetMove() { + DataProductUnsetSideEffect test = new DataProductUnsetSideEffect(); + test.setConfig(TEST_PLUGIN_CONFIG); + + // Create 100 dataset URNs and set up their existing relationships + List datasetUrns = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + Urn datasetUrn = + UrnUtils.getUrn( + String.format("urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_%d,PROD)", i)); + datasetUrns.add(datasetUrn); + + // Mock the existing relationship for each dataset with the old data product + RelatedEntities relatedEntities = + new RelatedEntities( + "DataProductContains", + TEST_PRODUCT_URN_2.toString(), // Old data product + datasetUrn.toString(), + RelationshipDirection.INCOMING, + null); + + List relatedEntitiesList = new ArrayList<>(); + relatedEntitiesList.add(relatedEntities); + RelatedEntitiesScrollResult relatedEntitiesScrollResult = + new RelatedEntitiesScrollResult(1, 10, null, relatedEntitiesList); + + when(retrieverContext + .getGraphRetriever() + .scrollRelatedEntities( + eq(null), + eq(QueryUtils.newFilter("urn", datasetUrn.toString())), + eq(null), + eq(EMPTY_FILTER), + eq(ImmutableList.of("DataProductContains")), + eq( + QueryUtils.newRelationshipFilter( + EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(Collections.emptyList()), + eq(null), + eq(10), + eq(null), + eq(null))) + .thenReturn(relatedEntitiesScrollResult); + } + + // Create data product properties with all 100 assets + DataProductProperties dataProductProperties = new DataProductProperties(); + DataProductAssociationArray dataProductAssociations = new DataProductAssociationArray(); + for (Urn datasetUrn : datasetUrns) { + DataProductAssociation association = new DataProductAssociation(); + association.setDestinationUrn(datasetUrn); + dataProductAssociations.add(association); + } + dataProductProperties.setAssets(dataProductAssociations); + + // Run test + ChangeItemImpl dataProductPropertiesChangeItem = + ChangeItemImpl.builder() + .urn(TEST_PRODUCT_URN) // New data product + .aspectName(DATA_PRODUCT_PROPERTIES_ASPECT_NAME) + .changeType(ChangeType.UPSERT) + .entitySpec(TEST_REGISTRY.getEntitySpec(DATA_PRODUCT_ENTITY_NAME)) + .aspectSpec( + TEST_REGISTRY + .getEntitySpec(DATA_PRODUCT_ENTITY_NAME) + .getAspectSpec(DATA_PRODUCT_PROPERTIES_ASPECT_NAME)) + .recordTemplate(dataProductProperties) + .auditStamp(AuditStampUtils.createDefaultAuditStamp()) + .build(mockAspectRetriever); + + List testOutput = + test.postMCPSideEffect( + List.of( + MCLItemImpl.builder() + .build( + dataProductPropertiesChangeItem, + null, + null, + retrieverContext.getAspectRetriever())), + retrieverContext) + .toList(); + + // Verify test + assertEquals(testOutput.size(), 1, "Expected one patch to remove assets from old data product"); + + MCPItem patchItem = testOutput.get(0); + assertEquals( + patchItem.getUrn(), TEST_PRODUCT_URN_2, "Patch should target the old data product"); + assertEquals(patchItem.getAspectName(), DATA_PRODUCT_PROPERTIES_ASPECT_NAME); + + // Verify the patch contains remove operations for all 100 assets + JsonArray patchArray = ((PatchItemImpl) patchItem).getPatch().toJsonArray(); + assertEquals(patchArray.size(), 100, "Should have 100 remove operations"); + + // Verify each remove operation + for (int i = 0; i < 100; i++) { + JsonObject op = patchArray.getJsonObject(i); + assertEquals(op.getString("op"), PatchOperationType.REMOVE.getValue()); + assertEquals( + op.getString("path"), + String.format("/assets/urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_%d,PROD)", i)); + } + } + private static DataProductProperties getTestDataProductProperties(Urn destinationUrn) { DataProductProperties dataProductProperties = new DataProductProperties(); DataProductAssociationArray dataProductAssociations = new DataProductAssociationArray(); From 9f9a8b1006afbf92c01f558db28503728005dc29 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 26 Nov 2024 01:00:56 -0500 Subject: [PATCH 6/6] fix(ingest/tableau): handle none database field (#11950) --- .../src/datahub/ingestion/source/tableau/tableau_common.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py index 8d6746b6433a4..ac917c5f128ed 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py @@ -643,8 +643,11 @@ def create( cls, d: dict, default_schema_map: Optional[Dict[str, str]] = None ) -> "TableauUpstreamReference": # Values directly from `table` object from Tableau - database = t_database = d.get(c.DATABASE, {}).get(c.NAME) - database_id = d.get(c.DATABASE, {}).get(c.ID) + database_dict = ( + d.get(c.DATABASE) or {} + ) # this sometimes is None, so we need the `or {}` + database = t_database = database_dict.get(c.NAME) + database_id = database_dict.get(c.ID) schema = t_schema = d.get(c.SCHEMA) table = t_table = d.get(c.NAME) or "" t_full_name = d.get(c.FULL_NAME)