diff --git a/.github/scripts/check_python_package.py b/.github/scripts/check_python_package.py new file mode 100644 index 00000000000000..f1f30056917006 --- /dev/null +++ b/.github/scripts/check_python_package.py @@ -0,0 +1,18 @@ +import setuptools + +folders = ["./smoke-test/tests"] + +for folder in folders: + print(f"Checking folder {folder}") + a = [i for i in setuptools.find_packages(folder) if "cypress" not in i] + b = [i for i in setuptools.find_namespace_packages(folder) if "cypress" not in i] + + in_a_not_b = set(a) - set(b) + in_b_not_a = set(b) - set(a) + + assert ( + len(in_a_not_b) == 0 + ), f"Found packages in {folder} that are not in namespace packages: {in_a_not_b}" + assert ( + len(in_b_not_a) == 0 + ), f"Found namespace packages in {folder} that are not in packages: {in_b_not_a}" diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 58645cbd0fd0fc..c38b97d5f44bf8 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -43,8 +43,29 @@ env: #### IMPORTANT #### jobs: + check_lint: + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: hsheth2/sane-checkout-action@v1 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + cache: "pip" + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 + - name: Ensure packages are correct + run: | + python ./.github/scripts/check_python_package.py + - name: Run lint on smoke test + run: | + ./gradlew :smoke-test:lint setup: runs-on: ubuntu-latest + needs: check_lint outputs: tag: ${{ steps.tag.outputs.tag }} slim_tag: ${{ steps.tag.outputs.slim_tag }} diff --git a/datahub-frontend/build.gradle b/datahub-frontend/build.gradle index 1174c5c5cfd5d1..ab4ce405a55411 100644 --- a/datahub-frontend/build.gradle +++ b/datahub-frontend/build.gradle @@ -87,7 +87,7 @@ docker { } } -task unversionZip(type: Copy, dependsOn: [':datahub-web-react:build', dist]) { +task unversionZip(type: Copy, dependsOn: [':datahub-web-react:distZip', dist]) { from ("${buildDir}/distributions") include "datahub-frontend-${version}.zip" into "${buildDir}/docker/" diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 41f48e0a7dc3e6..8d9b9a5ad82c84 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -92,6 +92,7 @@ import com.linkedin.datahub.graphql.generated.QuerySubject; import com.linkedin.datahub.graphql.generated.QuickFilter; import com.linkedin.datahub.graphql.generated.RecommendationContent; +import com.linkedin.datahub.graphql.generated.ResolvedAuditStamp; import com.linkedin.datahub.graphql.generated.SchemaField; import com.linkedin.datahub.graphql.generated.SchemaFieldEntity; import com.linkedin.datahub.graphql.generated.SearchAcrossLineageResult; @@ -1642,7 +1643,7 @@ private void configureResolvedAuditStampResolvers(final RuntimeWiring.Builder bu typeWiring.dataFetcher( "actor", new LoadableTypeResolver<>( - corpUserType, (env) -> ((CorpUser) env.getSource()).getUrn()))); + corpUserType, (env) -> ((ResolvedAuditStamp) env.getSource()).getActor().getUrn()))); } /** diff --git a/datahub-graphql-core/src/main/resources/forms.graphql b/datahub-graphql-core/src/main/resources/forms.graphql index 0ff55cfa9f1733..a0f84f8e3bb1a6 100644 --- a/datahub-graphql-core/src/main/resources/forms.graphql +++ b/datahub-graphql-core/src/main/resources/forms.graphql @@ -275,20 +275,6 @@ input SubmitFormPromptInput { structuredPropertyParams: StructuredPropertyInputParams } -""" -Input for responding to a singular prompt in a form for a batch of entities -""" -input BatchSubmitFormPromptInput { - """ - The urns of the entities this prompt submission is for - """ - assetUrns: [String!]! - - """ - Input for responding to a specific prompt on a form - """ - input: SubmitFormPromptInput -} """ Input for collecting structured property values to apply to entities @@ -390,18 +376,3 @@ input VerifyFormInput { """ entityUrn: String! } - -""" -Input for verifying a batch of entities for a give form -""" -input BatchVerifyFormInput { - """ - The urns of the entities getting verified for this form - """ - assetUrns: [String!]! - - """ - The urn of the form being verified on the given entities - """ - formUrn: String! -} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/DataMigrationStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/DataMigrationStep.java index ac56e5e91c72be..9f41daf02d2093 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/DataMigrationStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/DataMigrationStep.java @@ -10,6 +10,7 @@ import com.linkedin.datahub.upgrade.UpgradeStepResult; import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.EbeanAspectV1; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; @@ -170,7 +171,7 @@ public Function executable() { // Emit a browse path aspect. final BrowsePaths browsePaths; try { - browsePaths = _entityService.buildDefaultBrowsePath(urn); + browsePaths = DefaultAspectsUtil.buildDefaultBrowsePath(urn, _entityService); final AuditStamp browsePathsStamp = new AuditStamp(); browsePathsStamp.setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java index 9a426369cfb026..601ce4d25493c1 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java @@ -15,6 +15,7 @@ import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Condition; @@ -181,7 +182,8 @@ private Filter backfillDefaultBrowsePathsV2Filter() { } private void ingestBrowsePathsV2(Urn urn, AuditStamp auditStamp) throws Exception { - BrowsePathsV2 browsePathsV2 = _entityService.buildDefaultBrowsePathV2(urn, true); + BrowsePathsV2 browsePathsV2 = + DefaultAspectsUtil.buildDefaultBrowsePathV2(urn, true, _entityService); log.debug(String.format("Adding browse path v2 for urn %s with value %s", urn, browsePathsV2)); MetadataChangeProposal proposal = new MetadataChangeProposal(); proposal.setEntityUrn(urn); diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json index 97830cec4e164e..337b0dc87ec1c1 100644 --- a/datahub-web-react/package.json +++ b/datahub-web-react/package.json @@ -65,6 +65,7 @@ "react-helmet-async": "^1.3.0", "react-highlighter": "^0.4.3", "react-icons": "4.3.1", + "react-intersection-observer": "^9.5.3", "react-js-cron": "^2.1.0", "react-router": "^5.3", "react-router-dom": "^5.3", diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index 9f339bb7db548a..f533e8d50385b1 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -298,6 +298,7 @@ export const dataset1 = { browsePathV2: { path: [{ name: 'test', entity: null }], __typename: 'BrowsePathV2' }, autoRenderAspects: [], structuredProperties: null, + forms: null, }; export const dataset2 = { @@ -395,6 +396,7 @@ export const dataset2 = { browsePathV2: { path: [{ name: 'test', entity: null }], __typename: 'BrowsePathV2' }, autoRenderAspects: [], structuredProperties: null, + forms: null, }; export const dataset3 = { @@ -629,6 +631,7 @@ export const dataset3 = { lastProfile: null, lastOperation: null, structuredProperties: null, + forms: null, } as Dataset; export const dataset3WithSchema = { diff --git a/datahub-web-react/src/app/entity/Entity.tsx b/datahub-web-react/src/app/entity/Entity.tsx index 5920919a9cdab2..3277051661bf1b 100644 --- a/datahub-web-react/src/app/entity/Entity.tsx +++ b/datahub-web-react/src/app/entity/Entity.tsx @@ -1,6 +1,6 @@ import { EntityType, SearchResult } from '../../types.generated'; import { FetchedEntity } from '../lineage/types'; -import { GenericEntityProperties } from './shared/types'; +import { EntitySidebarSection, GenericEntityProperties } from './shared/types'; export enum PreviewType { /** @@ -176,4 +176,9 @@ export interface Entity { * Returns the profile component to be displayed in our Chrome extension */ renderEmbeddedProfile?: (urn: string) => JSX.Element; + + /** + * Returns the entity profile sidebar sections for an entity type. Only implemented on Datasets for now. + */ + getSidebarSections?: () => EntitySidebarSection[]; } diff --git a/datahub-web-react/src/app/entity/EntityRegistry.tsx b/datahub-web-react/src/app/entity/EntityRegistry.tsx index 6642c2c7b0467c..4a2e0e386b7686 100644 --- a/datahub-web-react/src/app/entity/EntityRegistry.tsx +++ b/datahub-web-react/src/app/entity/EntityRegistry.tsx @@ -4,7 +4,7 @@ import { FetchedEntity } from '../lineage/types'; import { SearchResultProvider } from '../search/context/SearchResultContext'; import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from './Entity'; import { GLOSSARY_ENTITY_TYPES } from './shared/constants'; -import { GenericEntityProperties } from './shared/types'; +import { EntitySidebarSection, GenericEntityProperties } from './shared/types'; import { dictToQueryStringParams, getFineGrainedLineageWithSiblings, urlEncodeUrn } from './shared/utils'; function validatedGet(key: K, map: Map): V { @@ -194,6 +194,11 @@ export default class EntityRegistry { return entity.displayName(data); } + getSidebarSections(type: EntityType): EntitySidebarSection[] { + const entity = validatedGet(type, this.entityTypeToEntity); + return entity.getSidebarSections ? entity.getSidebarSections() : []; + } + getGenericEntityProperties(type: EntityType, data: T): GenericEntityProperties | null { const entity = validatedGet(type, this.entityTypeToEntity); return entity.getGenericEntityProperties(data); diff --git a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx index f60eb959374527..90fac38ebd6b3c 100644 --- a/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx +++ b/datahub-web-react/src/app/entity/dataset/DatasetEntity.tsx @@ -190,51 +190,51 @@ export class DatasetEntity implements Entity { }, }, ]} - sidebarSections={[ - { - component: SidebarAboutSection, - }, - { - component: SidebarOwnerSection, - properties: { - defaultOwnerType: OwnershipType.TechnicalOwner, - }, - }, - { - component: SidebarSiblingsSection, - display: { - visible: (_, dataset: GetDatasetQuery) => - (dataset?.dataset?.siblings?.siblings?.length || 0) > 0, - }, - }, - { - component: SidebarViewDefinitionSection, - display: { - visible: (_, dataset: GetDatasetQuery) => - (dataset?.dataset?.viewProperties?.logic && true) || false, - }, - }, - { - component: SidebarTagsSection, - properties: { - hasTags: true, - hasTerms: true, - }, - }, - { - component: SidebarDomainSection, - }, - { - component: DataProductSection, - }, - // TODO: Add back once entity-level recommendations are complete. - // { - // component: SidebarRecommendationsSection, - // }, - ]} + sidebarSections={this.getSidebarSections()} /> ); + getSidebarSections = () => [ + { + component: SidebarAboutSection, + }, + { + component: SidebarOwnerSection, + properties: { + defaultOwnerType: OwnershipType.TechnicalOwner, + }, + }, + { + component: SidebarSiblingsSection, + display: { + visible: (_, dataset: GetDatasetQuery) => (dataset?.dataset?.siblings?.siblings?.length || 0) > 0, + }, + }, + { + component: SidebarViewDefinitionSection, + display: { + visible: (_, dataset: GetDatasetQuery) => (dataset?.dataset?.viewProperties?.logic && true) || false, + }, + }, + { + component: SidebarTagsSection, + properties: { + hasTags: true, + hasTerms: true, + }, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + // TODO: Add back once entity-level recommendations are complete. + // { + // component: SidebarRecommendationsSection, + // }, + ]; + getOverridePropertiesFromEntity = (dataset?: Dataset | null): GenericEntityProperties => { // if dataset has subTypes filled out, pick the most specific subtype and return it const subTypes = dataset?.subTypes; diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx index a781c732c9de63..60d67355d5d7dd 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx @@ -45,6 +45,7 @@ import { import { useAppConfig } from '../../../../useAppConfig'; import { useUpdateDomainEntityDataOnChange } from '../../../../domain/utils'; import ProfileSidebar from './sidebar/ProfileSidebar'; +import SidebarFormInfoWrapper from './sidebar/FormInfo/SidebarFormInfoWrapper'; type Props = { urn: string; @@ -333,7 +334,10 @@ export const EntityProfile = ({ - + )} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntityInfo/EntityInfo.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntityInfo/EntityInfo.tsx new file mode 100644 index 00000000000000..1d1400a8cc7539 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntityInfo/EntityInfo.tsx @@ -0,0 +1,66 @@ +import Link from 'antd/lib/typography/Link'; +import React from 'react'; +import styled from 'styled-components'; +import PlatformContent from '../../header/PlatformContent'; +import { useEntityData } from '../../../../EntityContext'; +import { useEntityRegistry } from '../../../../../../useEntityRegistry'; +import { StyledDivider } from '../FormInfo/components'; +import { DatasetStatsSummarySubHeader } from '../../../../../dataset/profile/stats/stats/DatasetStatsSummarySubHeader'; +import LinkOut from '../../../../../../../images/link-out.svg?react'; +import FormInfo from '../FormInfo/FormInfo'; + +const EntityName = styled.div` + font-size: 16px; + font-weight: 700; + line-height: 24px; + margin-bottom: 8px; +`; + +const EntityInfoWrapper = styled.div` + padding-top: 20px; +`; + +const StyledLink = styled(Link)` + font-size: 14px; + line-height: 18px; + display: inline-flex; + align-items: center; + + svg { + height: 14px; + width: 14px; + } +`; + +const FormInfoWrapper = styled.div` + margin-top: 12px; +`; + +interface Props { + formUrn: string; +} + +export default function EntityInfo({ formUrn }: Props) { + const entityRegistry = useEntityRegistry(); + const { entityType, entityData } = useEntityData(); + const entityName = entityData ? entityRegistry.getDisplayName(entityType, entityData) : ''; + + return ( + + + {entityName} + + View Profile + + + + + + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/CompletedView.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/CompletedView.tsx new file mode 100644 index 00000000000000..f8c0b74cc2cd4f --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/CompletedView.tsx @@ -0,0 +1,59 @@ +import Link from 'antd/lib/typography/Link'; +import React from 'react'; +import styled from 'styled-components'; +import GreenVerificationLogo from '../../../../../../../images/verificationGreen.svg?react'; +import PurpleVerificationLogo from '../../../../../../../images/verificationPurple.svg?react'; +import { CTAWrapper, FlexWrapper, StyledIcon, StyledReadOutlined, Title } from './components'; +import OptionalPromptsRemaining from './OptionalPromptsRemaining'; +import VerificationAuditStamp from './VerificationAuditStamp'; + +const StyledLink = styled(Link)` + margin-top: 8px; + font-size: 12px; + display: block; +`; + +interface Props { + showVerificationStyles: boolean; + numOptionalPromptsRemaining: number; + isUserAssigned: boolean; + formUrn?: string; + shouldDisplayBackground?: boolean; + openFormModal?: () => void; +} + +export default function CompletedView({ + showVerificationStyles, + numOptionalPromptsRemaining, + isUserAssigned, + formUrn, + shouldDisplayBackground, + openFormModal, +}: Props) { + return ( + + + {showVerificationStyles ? ( + + ) : ( + + )} +
+ {showVerificationStyles ? 'Verified' : 'Documented'} + + {isUserAssigned && ( + <> + + {!!openFormModal && ( + View and edit responses + )} + + )} +
+
+
+ ); +} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/FormInfo.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/FormInfo.tsx new file mode 100644 index 00000000000000..681555a919b636 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/FormInfo.tsx @@ -0,0 +1,48 @@ +import React from 'react'; +import { useEntityData } from '../../../../EntityContext'; +import useGetPromptInfo from './useGetPromptInfo'; +import { isVerificationComplete, shouldShowVerificationInfo } from './utils'; +import CompletedView from './CompletedView'; +import IncompleteView from './IncompleteView'; +import useIsUserAssigned from './useIsUserAssigned'; + +interface Props { + formUrn?: string; + shouldDisplayBackground?: boolean; + openFormModal?: () => void; +} + +export default function FormInfo({ formUrn, shouldDisplayBackground, openFormModal }: Props) { + const { entityData } = useEntityData(); + const { numRequiredPromptsRemaining, numOptionalPromptsRemaining } = useGetPromptInfo(formUrn); + const showVerificationInfo = shouldShowVerificationInfo(entityData, formUrn); + const isUserAssigned = useIsUserAssigned(); + const allRequiredPromptsAreComplete = numRequiredPromptsRemaining === 0; + + const shouldShowCompletedView = showVerificationInfo + ? allRequiredPromptsAreComplete && isVerificationComplete(entityData, formUrn) + : allRequiredPromptsAreComplete; + + if (shouldShowCompletedView) { + return ( + + ); + } + + return ( + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/IncompleteView.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/IncompleteView.tsx new file mode 100644 index 00000000000000..b08e1baec5f54a --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/IncompleteView.tsx @@ -0,0 +1,65 @@ +import { Button } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import PurpleVerificationLogo from '../../../../../../../images/verificationPurple.svg?react'; +import GrayVerificationIcon from '../../../../../../../images/verificationWarningGray.svg?react'; +import { CTAWrapper, FlexWrapper, StyledIcon, StyledReadFilled, Title } from './components'; +import OptionalPromptsRemaining from './OptionalPromptsRemaining'; +import RequiredPromptsRemaining from './RequiredPromptsRemaining'; + +const StyledButton = styled(Button)` + width: 100%; + margin-top: 12px; + font-size: 14px; + display: flex; + align-items: center; + justify-content: center; +`; + +interface Props { + showVerificationStyles: boolean; + numOptionalPromptsRemaining: number; + numRequiredPromptsRemaining: number; + isUserAssigned: boolean; + openFormModal?: () => void; +} + +export default function IncompleteView({ + showVerificationStyles, + numOptionalPromptsRemaining, + numRequiredPromptsRemaining, + isUserAssigned, + openFormModal, +}: Props) { + return ( + + + {isUserAssigned && ( + <> + {showVerificationStyles ? ( + + ) : ( + + )} + + )} + {!isUserAssigned && } +
+ Awaiting {showVerificationStyles ? 'Verification' : 'Documentation'} + {isUserAssigned && ( + <> + You are being asked to complete a set of requirements for this entity. + + + + )} +
+
+ {!!openFormModal && isUserAssigned && ( + + {showVerificationStyles ? 'Complete Verification' : 'Complete Documentation'} + + )} +
+ ); +} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/OptionalPromptsRemaining.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/OptionalPromptsRemaining.tsx new file mode 100644 index 00000000000000..3198453ce467a1 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/OptionalPromptsRemaining.tsx @@ -0,0 +1,23 @@ +import React from 'react'; +import styled from 'styled-components'; +import { pluralize } from '../../../../../../shared/textUtil'; +import { ANTD_GRAY_V2 } from '../../../../constants'; + +const OptionalPromptsWrapper = styled.div` + color: ${ANTD_GRAY_V2[8]}; + margin-top: 4px; +`; + +interface Props { + numRemaining: number; +} + +export default function OptionalPromptsRemaining({ numRemaining }: Props) { + if (numRemaining <= 0) return null; + + return ( + + {numRemaining} additional {pluralize(numRemaining, 'question', 's')} remaining + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/RequiredPromptsRemaining.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/RequiredPromptsRemaining.tsx new file mode 100644 index 00000000000000..e275b2d75146ac --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/RequiredPromptsRemaining.tsx @@ -0,0 +1,15 @@ +import React from 'react'; +import { pluralize } from '../../../../../../shared/textUtil'; +import { SubTitle } from './components'; + +interface Props { + numRemaining: number; +} + +export default function RequiredPromptsRemaining({ numRemaining }: Props) { + return ( + + {numRemaining} required {pluralize(numRemaining, 'question', 's')} remaining + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/SidebarFormInfoWrapper.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/SidebarFormInfoWrapper.tsx new file mode 100644 index 00000000000000..9340a7f51d537d --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/SidebarFormInfoWrapper.tsx @@ -0,0 +1,56 @@ +import React, { useState } from 'react'; +import styled from 'styled-components'; +import EntityFormModal from '../../../../entityForm/EntityFormModal'; +import FormInfo from './FormInfo'; +import { StyledDivider } from './components'; +import { useEntityData } from '../../../../EntityContext'; +import { getFormAssociations } from './utils'; +import FormSelectionModal from '../../../../entityForm/FormSelectionModal/FormSelectionModal'; + +const FormInfoWrapper = styled.div` + margin-top: 16px; +`; + +export default function SidebarFormInfoWrapper() { + const { entityData } = useEntityData(); + const [isFormSelectionModalVisible, setIsFormSelectionModalVisible] = useState(false); + const [isFormVisible, setIsFormVisible] = useState(false); + const [selectedFormUrn, setSelectedFormUrn] = useState(null); + const formAssociations = getFormAssociations(entityData); + + if (!formAssociations.length) return null; + + function openFormModal() { + if (formAssociations.length === 1) { + setSelectedFormUrn(formAssociations[0].form.urn); + setIsFormVisible(true); + } else { + setIsFormSelectionModalVisible(true); + } + } + + function selectFormUrn(urn: string) { + setSelectedFormUrn(urn); + setIsFormVisible(true); + setIsFormSelectionModalVisible(false); + } + + return ( + <> + + + + + setIsFormVisible(false)} + /> + setIsFormSelectionModalVisible(false)} + selectFormUrn={selectFormUrn} + /> + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/VerificationAuditStamp.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/VerificationAuditStamp.tsx new file mode 100644 index 00000000000000..f4373632418f42 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/VerificationAuditStamp.tsx @@ -0,0 +1,26 @@ +import dayjs from 'dayjs'; +import React from 'react'; +import { useEntityRegistry } from '../../../../../../useEntityRegistry'; +import { getVerificationAuditStamp } from './utils'; +import { useEntityData } from '../../../../EntityContext'; + +interface Props { + formUrn?: string; +} + +export default function VerificationAuditStamp({ formUrn }: Props) { + const entityRegistry = useEntityRegistry(); + const { entityData } = useEntityData(); + const verifiedAuditStamp = getVerificationAuditStamp(entityData, formUrn); + const verifiedTimestamp = verifiedAuditStamp?.time; + const verifiedActor = verifiedAuditStamp?.actor; + + if (!verifiedTimestamp) return null; + + return ( +
+ On {dayjs(verifiedTimestamp).format('ll')}{' '} + {verifiedActor && <>by {entityRegistry.getDisplayName(verifiedActor.type, verifiedActor)}} +
+ ); +} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/__tests__/utils.test.ts b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/__tests__/utils.test.ts new file mode 100644 index 00000000000000..7898befaf7ee3c --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/__tests__/utils.test.ts @@ -0,0 +1,187 @@ +import { FormAssociation, FormPrompt } from '../../../../../../../../types.generated'; +import { mockEntityData, mockEntityDataAllVerified } from '../../../../../entityForm/mocks'; +import { isAssignedToForm } from '../useIsUserAssigned'; +import { + getEntityPromptsInfo, + getFieldPromptsInfo, + getFormAssociations, + getNumEntityPromptsRemaining, + getNumPromptsCompletedForField, + getNumSchemaFieldPromptsRemaining, + getPromptsForForm, + getVerificationAuditStamp, + isVerificationComplete, + shouldShowVerificationInfo, +} from '../utils'; + +// only looking at IDs +const prompts = [{ id: '1' }, { id: '2' }, { id: '3' }, { id: '4' }] as FormPrompt[]; + +describe('form prompt utils', () => { + it('should get the correct number of top-level prompts remaining', () => { + const numPromptsRemaining = getNumEntityPromptsRemaining(prompts, mockEntityData); + expect(numPromptsRemaining).toBe(2); + }); + + // if there are 2 top level prompts for schema fields, 8 fields in the schema, then there are 16 total schema-field prompts + // there are 5 completed prompts in the mock data, should have 11 remaining + it('should get the correct number of field-level prompts remaining', () => { + const fieldFormPrompts = [{ id: '1' }, { id: '2' }] as FormPrompt[]; + const numPromptsRemaining = getNumSchemaFieldPromptsRemaining(mockEntityData, fieldFormPrompts, 8); + expect(numPromptsRemaining).toBe(11); + }); + + it('should get the correct number of field-level prompts remaining given a form urn', () => { + const fieldFormPrompts = [{ id: '1' }, { id: '2' }] as FormPrompt[]; + const numPromptsRemaining = getNumSchemaFieldPromptsRemaining( + mockEntityData, + fieldFormPrompts, + 8, + 'urn:li:form:1', + ); + expect(numPromptsRemaining).toBe(11); + }); + + it('should get the correct number of field-level prompts remaining given a form urn with no field level prompts completed', () => { + const fieldFormPrompts = [{ id: '3' }] as FormPrompt[]; + const numPromptsRemaining = getNumSchemaFieldPromptsRemaining( + mockEntityData, + fieldFormPrompts, + 8, + 'urn:li:form:2', + ); + // none are completed in this form, with only 1 schema field prompt with 8 schema fields, so all 8 should be remaining + expect(numPromptsRemaining).toBe(8); + }); + + it('should get the numer of completed prompts for a given schema field in incompletePrompts', () => { + const numCompleted = getNumPromptsCompletedForField('test2', mockEntityData, 'urn:li:form:1'); + expect(numCompleted).toBe(1); + }); + + it('should get the numer of completed prompts for a given schema field in completedPrompts and incompletePrompts', () => { + const numCompleted = getNumPromptsCompletedForField('test3', mockEntityData, 'urn:li:form:1'); + expect(numCompleted).toBe(2); + }); + + it('should get the prompts for a given form urn correctly', () => { + const promptsForForm = getPromptsForForm('urn:li:form:1', mockEntityData); + expect(promptsForForm.length).toBe(2); + expect(promptsForForm.map((p) => p.id)).toMatchObject(['1', '2']); + }); + + it('should get information for entity specific prompts', () => { + const promptsForForm = getPromptsForForm('urn:li:form:2', mockEntityData); + const { entityPrompts, numOptionalEntityPromptsRemaining, numRequiredEntityPromptsRemaining } = + getEntityPromptsInfo(promptsForForm, mockEntityData); + + expect(entityPrompts.length).toBe(2); + expect(entityPrompts.map((p) => p.id)).toMatchObject(['3', '5']); + expect(numOptionalEntityPromptsRemaining).toBe(1); + expect(numRequiredEntityPromptsRemaining).toBe(0); + }); + + it('should get information for field specific prompts', () => { + const promptsForForm = getPromptsForForm('urn:li:form:1', mockEntityData); + const { fieldPrompts, numOptionalFieldPromptsRemaining, numRequiredFieldPromptsRemaining } = + getFieldPromptsInfo(promptsForForm, mockEntityData, 8, 'urn:li:form:1'); + + expect(fieldPrompts.length).toBe(2); + expect(fieldPrompts.map((p) => p.id)).toMatchObject(['1', '2']); + expect(numOptionalFieldPromptsRemaining).toBe(11); + expect(numRequiredFieldPromptsRemaining).toBe(0); + }); + + it('should get all form associations for an entity', () => { + const formAssociations = getFormAssociations(mockEntityData); + expect(formAssociations.length).toBe(3); + expect(formAssociations.map((f) => f.form.urn)).toMatchObject([ + 'urn:li:form:1', + 'urn:li:form:2', + 'urn:li:form:3', + ]); + }); +}); + +describe('useIsUserAssigned utils tests', () => { + it('should return true if user is an owner and the form is assigned to owners', () => { + const isAssigned = isAssignedToForm(mockEntityData.forms?.incompleteForms[0] as FormAssociation, true); + expect(isAssigned).toBe(true); + }); + + it('should return false if user is not an owner and the form is assigned to owners', () => { + const isAssigned = isAssignedToForm(mockEntityData.forms?.incompleteForms[0] as FormAssociation, false); + expect(isAssigned).toBe(false); + }); + + it('should return true if the user is explicitly assigned', () => { + const isAssigned = isAssignedToForm(mockEntityData.forms?.completedForms[0] as FormAssociation, false); + expect(isAssigned).toBe(true); + }); +}); + +describe('shouldShowVerificationInfo', () => { + it('should return true if a form is supplied that is a verification form', () => { + const showVerificationInfo = shouldShowVerificationInfo(mockEntityData, 'urn:li:form:1'); + expect(showVerificationInfo).toBe(true); + }); + + it('should return false if a form is supplied that is not a verification form', () => { + const isAssigned = shouldShowVerificationInfo(mockEntityData, 'urn:li:form:3'); + expect(isAssigned).toBe(false); + }); + + it('should return true if no formUrn is supplied and there is a verification form', () => { + const isAssigned = shouldShowVerificationInfo(mockEntityData); + expect(isAssigned).toBe(true); + }); +}); + +describe('getVerificationAuditStamp', () => { + it('should return the audit stamp for a given form', () => { + const auditStamp = getVerificationAuditStamp(mockEntityData, 'urn:li:form:2'); + expect(auditStamp).toMatchObject({ + actor: { + urn: 'urn:li:corpuser:test', + }, + time: 100, + }); + }); + + it('should return undefined for audit stamp for a given form with no verifications', () => { + const auditStamp = getVerificationAuditStamp(mockEntityData, 'urn:li:form:1'); + expect(auditStamp).toBe(null); + }); + + it('should return the most recent audit stamp when not given form', () => { + const auditStamp = getVerificationAuditStamp(mockEntityData); + expect(auditStamp).toMatchObject({ + actor: { + urn: 'urn:li:corpuser:test', + }, + time: 101, + }); + }); +}); + +describe('isVerificationComplete', () => { + it('should return true if the given form is verified', () => { + const isComplete = isVerificationComplete(mockEntityData, 'urn:li:form:2'); + expect(isComplete).toBe(true); + }); + + it('should return false if the given form is not verified', () => { + const isComplete = isVerificationComplete(mockEntityData, 'urn:li:form:1'); + expect(isComplete).toBe(false); + }); + + it('should return false if no form is given and not all verification forms are complete', () => { + const isComplete = isVerificationComplete(mockEntityData); + expect(isComplete).toBe(false); + }); + + it('should return true if no form is given and all verification forms are complete', () => { + const isComplete = isVerificationComplete(mockEntityDataAllVerified); + expect(isComplete).toBe(true); + }); +}); diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/components.ts b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/components.ts new file mode 100644 index 00000000000000..3c6aef5517d3db --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/components.ts @@ -0,0 +1,60 @@ +import { ReadFilled, ReadOutlined } from '@ant-design/icons'; +import Icon from '@ant-design/icons/lib/components/Icon'; +import { Divider } from 'antd'; +import styled from 'styled-components'; + +export const FlexWrapper = styled.div` + display: flex; + line-height: 18px; +`; + +export const StyledIcon = styled(Icon)<{ addLineHeight?: boolean }>` + font-size: 18px; + margin-right: 8px; + ${(props) => props.addLineHeight && `line-height: 24px;`} +`; + +export const SubTitle = styled.div<{ addMargin?: boolean }>` + font-weight: 600; + margin-bottom: 4px; + ${(props) => props.addMargin && `margin-top: 8px;`} +`; + +export const Title = styled.div` + font-size: 16px; + font-weight: 600; + margin-bottom: 4px; +`; + +export const StyledDivider = styled(Divider)` + margin: 12px 0 0 0; +`; + +export const StyledReadOutlined = styled(ReadOutlined)<{ addLineHeight?: boolean }>` + margin-right: 8px; + height: 13.72px; + width: 17.5px; + color: #373d44; + ${(props) => props.addLineHeight && `line-height: 24px;`} +`; + +export const StyledReadFilled = styled(ReadFilled)<{ addLineHeight?: boolean }>` + margin-right: 8px; + height: 13.72px; + width: 17.5px; + color: #7532a4; + ${(props) => props.addLineHeight && `line-height: 24px;`} +`; + +export const CTAWrapper = styled.div<{ shouldDisplayBackground?: boolean }>` + color: #373d44; + font-size: 14px; + ${(props) => + props.shouldDisplayBackground && + ` + border-radius: 8px; + padding: 16px; + background-color: #f9f0ff; + border: 1px solid #8338b8; + `} +`; diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/useGetPromptInfo.ts b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/useGetPromptInfo.ts new file mode 100644 index 00000000000000..252cefb9f6f4f2 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/useGetPromptInfo.ts @@ -0,0 +1,38 @@ +import { useMemo } from 'react'; +import { useEntityData } from '../../../../EntityContext'; +import { useGetEntityWithSchema } from '../../../../tabs/Dataset/Schema/useGetEntitySchema'; +import { getAllPrompts, getEntityPromptsInfo, getFieldPromptsInfo, getPromptsForForm } from './utils'; + +export default function useGetPromptInfo(formUrn?: string) { + const { entityData } = useEntityData(); + const { entityWithSchema } = useGetEntityWithSchema(); + const prompts = useMemo( + () => (formUrn ? getPromptsForForm(formUrn, entityData) : getAllPrompts(entityData)), + [formUrn, entityData], + ); + + const { + entityPrompts, + numRequiredEntityPromptsRemaining, + numOptionalEntityPromptsRemaining, + requiredEntityPrompts, + } = getEntityPromptsInfo(prompts, entityData); + const { fieldPrompts, numRequiredFieldPromptsRemaining, numOptionalFieldPromptsRemaining, requiredFieldPrompts } = + getFieldPromptsInfo(prompts, entityData, entityWithSchema?.schemaMetadata?.fields.length || 0, formUrn); + // Multiply number of field prompts by number of schema fields for total number of schema field prompts + const totalRequiredSchemaFieldPrompts = + (entityWithSchema?.schemaMetadata?.fields.length || 0) * requiredFieldPrompts.length; + + const numRequiredPromptsRemaining = numRequiredEntityPromptsRemaining + numRequiredFieldPromptsRemaining; + const numOptionalPromptsRemaining = numOptionalEntityPromptsRemaining + numOptionalFieldPromptsRemaining; + + return { + prompts, + fieldPrompts, + totalRequiredSchemaFieldPrompts, + entityPrompts, + numRequiredPromptsRemaining, + numOptionalPromptsRemaining, + requiredEntityPrompts, + }; +} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/useIsUserAssigned.ts b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/useIsUserAssigned.ts new file mode 100644 index 00000000000000..067513bfef03d0 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/useIsUserAssigned.ts @@ -0,0 +1,24 @@ +import { FormAssociation } from '../../../../../../../types.generated'; +import { useUserContext } from '../../../../../../context/useUserContext'; +import { useEntityData } from '../../../../EntityContext'; +import { getFormAssociations } from './utils'; + +export function isAssignedToForm(formAssociation: FormAssociation, isUserAnOwner: boolean) { + const { isAssignedToMe, owners: isAssignedToOwners } = formAssociation.form.info.actors; + return isAssignedToMe || (isAssignedToOwners && isUserAnOwner); +} + +// returns true if this user is assigned (explicitly or by ownership) to a given form or any forms on this entity +export default function useIsUserAssigned(formUrn?: string) { + const { entityData } = useEntityData(); + const owners = entityData?.ownership?.owners; + const { user: loggedInUser } = useUserContext(); + const isUserAnOwner = !!owners?.find((owner) => owner.owner.urn === loggedInUser?.urn); + + const formAssociations = getFormAssociations(entityData); + if (formUrn) { + const formAssociation = formAssociations.find((association) => association.form.urn === formUrn); + return formAssociation ? isAssignedToForm(formAssociation, isUserAnOwner) : false; + } + return formAssociations.some((formAssociation) => isAssignedToForm(formAssociation, isUserAnOwner)); +} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/utils.ts b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/utils.ts new file mode 100644 index 00000000000000..48206b411374c4 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/FormInfo/utils.ts @@ -0,0 +1,324 @@ +import { Maybe } from 'graphql/jsutils/Maybe'; +import { + FieldFormPromptAssociation, + FormAssociation, + FormPrompt, + FormPromptAssociation, + FormType, + ResolvedAuditStamp, + SchemaField, +} from '../../../../../../../types.generated'; +import { SCHEMA_FIELD_PROMPT_TYPES } from '../../../../entityForm/constants'; +import { GenericEntityProperties } from '../../../../types'; + +export function getFormAssociations(entityData: GenericEntityProperties | null) { + return [...(entityData?.forms?.incompleteForms || []), ...(entityData?.forms?.completedForms || [])]; +} + +export function getFormAssociation(formUrn: string, entityData: GenericEntityProperties | null) { + return ( + entityData?.forms?.incompleteForms?.find((formAssoc) => formAssoc.form.urn === formUrn) || + entityData?.forms?.completedForms?.find((formAssoc) => formAssoc.form.urn === formUrn) + ); +} + +/* + * For a given prompt, get all of the completed field prompts. + * Takes in an optional set of prompt IDs, if this exists, only return the + * completed field prompts for this prompt if this prompt ID is in the set. + */ +function getCompletedFieldPromptsFromPrompt(prompt: FormPromptAssociation, relevantFieldFormPromptIds?: Set) { + if (relevantFieldFormPromptIds && relevantFieldFormPromptIds.has(prompt.id)) { + return prompt.fieldAssociations?.completedFieldPrompts || []; + } + if (!relevantFieldFormPromptIds) { + return prompt.fieldAssociations?.completedFieldPrompts || []; + } + return []; +} + +/* + * For a given form, loop over all prompts and retrieve completedFieldPrompts from each. + * Note: we pass in an optional set of prompt IDs to choose from in order to get completed + * field prompts for a certain set of entity prompts id we choose. + */ +export function getCompletedFieldPromptsFromForm( + formAssociation: FormAssociation, + relevantFieldFormPromptIds?: Set, +) { + let completedFieldPromptAssociations: FieldFormPromptAssociation[] = []; + formAssociation.completedPrompts?.forEach((completedPrompt) => { + completedFieldPromptAssociations = completedFieldPromptAssociations.concat( + getCompletedFieldPromptsFromPrompt(completedPrompt, relevantFieldFormPromptIds), + ); + }); + formAssociation.incompletePrompts?.forEach((incompletPrompt) => { + completedFieldPromptAssociations = completedFieldPromptAssociations.concat( + getCompletedFieldPromptsFromPrompt(incompletPrompt, relevantFieldFormPromptIds), + ); + }); + return completedFieldPromptAssociations; +} + +/* + * Gets a list of the completed field prompt associations which live as children to top + * level prompt associations for each schema field. + * We need to loop over every prompt both completed and incomplete, form complete and incomplete forms. + * For each prompt, get their list of completedFieldPrompts + * Takes in an optional list of relevant prompt IDs to filter results down. + */ +export function getCompletedFieldPromptAssociations( + entityData: GenericEntityProperties | null, + relevantFieldFormPromptIds?: Set, +) { + let completedFieldPromptAssociations: FieldFormPromptAssociation[] = []; + + const forms = entityData?.forms; + forms?.completedForms?.forEach((formAssociation) => { + completedFieldPromptAssociations = completedFieldPromptAssociations.concat( + getCompletedFieldPromptsFromForm(formAssociation, relevantFieldFormPromptIds), + ); + }); + forms?.incompleteForms?.forEach((formAssociation) => { + completedFieldPromptAssociations = completedFieldPromptAssociations.concat( + getCompletedFieldPromptsFromForm(formAssociation, relevantFieldFormPromptIds), + ); + }); + + return completedFieldPromptAssociations; +} + +/* + * For a given form, gets a list of the completed field prompt associations which live + * as children to top level prompt associations for each schema field. + */ +export function getCompletedFieldPromptAssociationsForForm( + formUrn: string, + entityData: GenericEntityProperties | null, + relevantFieldFormPromptIds?: Set, +) { + const formAssociation = getFormAssociation(formUrn, entityData); + return formAssociation ? getCompletedFieldPromptsFromForm(formAssociation, relevantFieldFormPromptIds) : []; +} + +export function getNumPromptsCompletedForField( + fieldPath: string, + entityData: GenericEntityProperties | null, + formUrn: string, +) { + const completedFieldPromptAssociations = getCompletedFieldPromptAssociationsForForm(formUrn, entityData); + return completedFieldPromptAssociations.filter((association) => association.fieldPath === fieldPath).length; +} + +/* + * Returns the number of schema field prompts not completed yet. + * The total number of schema field prompts equals the top level number of field prompts + * on the form multiplied by the number of schema fields. + * Optionally takes in a formUrn to look at one specific form or all forms + */ +export function getNumSchemaFieldPromptsRemaining( + entityData: GenericEntityProperties | null, + fieldFormPrompts: FormPrompt[], + numSchemaFields: number, + formUrn?: string, +) { + const numFieldPromptsAvailable = numSchemaFields * fieldFormPrompts.length; + // we pass in either required or optional fieldFormPrompts that we care about in this method, need to check against these + const relevantFieldFormPromptIds = new Set(fieldFormPrompts.map((prompt) => prompt.id)); + const completedFieldPromptAssociations = formUrn + ? getCompletedFieldPromptAssociationsForForm(formUrn, entityData, relevantFieldFormPromptIds) + : getCompletedFieldPromptAssociations(entityData, relevantFieldFormPromptIds); + + return numFieldPromptsAvailable - completedFieldPromptAssociations.length; +} + +// Get completed prompts from both complete and incomplete forms for this entity +export function getCompletedPrompts(entityData: GenericEntityProperties | null) { + const forms = entityData?.forms; + let completedPrompts = + forms?.incompleteForms?.flatMap((form) => (form.completedPrompts ? form.completedPrompts : [])) || []; + completedPrompts = completedPrompts.concat( + forms?.completedForms?.flatMap((form) => (form.completedPrompts ? form.completedPrompts : [])) || [], + ); + console.log('entityData', entityData); + console.log('getCompletedPrompts', completedPrompts); + return completedPrompts; +} + +// Get incomplete prompts from both complete and incomplete forms for this entity +export function getIncompletePrompts(entityData: GenericEntityProperties | null) { + const forms = entityData?.forms; + let incompletePrompts = + forms?.incompleteForms?.flatMap((form) => (form.incompletePrompts ? form.incompletePrompts : [])) || []; + incompletePrompts = incompletePrompts.concat( + forms?.completedForms?.flatMap((form) => (form.incompletePrompts ? form.incompletePrompts : [])) || [], + ); + return incompletePrompts; +} + +export function isPromptComplete(prompt: FormPrompt, completedPrompts: FormPromptAssociation[]) { + return !!completedPrompts.find((completedPrompt) => completedPrompt.id === prompt.id); +} + +export function findCompletedFieldPrompt(fieldPrompt?: SchemaField, promptAssociation?: FormPromptAssociation) { + return promptAssociation?.fieldAssociations?.completedFieldPrompts?.find( + (fieldPath) => fieldPath.fieldPath === fieldPrompt?.fieldPath, + ); +} + +export function isFieldPromptComplete(fieldPrompt, promptAssociation) { + return !!findCompletedFieldPrompt(fieldPrompt, promptAssociation); +} + +// For every prompt provided, check if it's in our list of completed prompts and return number prompts not completed +export function getNumEntityPromptsRemaining(entityPrompts: FormPrompt[], entityData: GenericEntityProperties | null) { + const completedPrompts = getCompletedPrompts(entityData); + let numPromptsRemaining = 0; + + entityPrompts.forEach((prompt) => { + if (prompt && !isPromptComplete(prompt, completedPrompts)) { + numPromptsRemaining += 1; + } + }); + + return numPromptsRemaining; +} + +// Get prompts from both complete and incomplete forms +export function getAllPrompts(entityData: GenericEntityProperties | null) { + let prompts = entityData?.forms?.incompleteForms?.flatMap((form) => form.form.info.prompts) || []; + prompts = prompts.concat(entityData?.forms?.completedForms?.flatMap((form) => form.form.info.prompts) || []); + return prompts; +} + +// Find a specific prompt association from both complete and incomplete prompts +export function findPromptAssociation(prompt: FormPrompt, allPrompts: Array) { + return allPrompts?.find((myprompt) => myprompt.id === prompt.id); +} + +// Get the prompts for a given form +export function getPromptsForForm(formUrn: string, entityData: GenericEntityProperties | null) { + const formAssociation = getFormAssociation(formUrn, entityData); + return formAssociation?.form.info.prompts || []; +} + +/* + * Gets information for entity level prompts + */ +export function getEntityPromptsInfo(prompts: FormPrompt[], entityData: GenericEntityProperties | null) { + const entityPrompts = prompts.filter((prompt) => !SCHEMA_FIELD_PROMPT_TYPES.includes(prompt.type)); + const requiredEntityPrompts = entityPrompts.filter((prompt) => prompt.required); + const optionalEntityPrompts = entityPrompts.filter((prompt) => !prompt.required); + + const numRequiredEntityPromptsRemaining = getNumEntityPromptsRemaining(requiredEntityPrompts, entityData); + const numOptionalEntityPromptsRemaining = getNumEntityPromptsRemaining(optionalEntityPrompts, entityData); + + return { + entityPrompts, + numRequiredEntityPromptsRemaining, + numOptionalEntityPromptsRemaining, + requiredEntityPrompts, + }; +} + +/* + * Gets information for schema field level prompts + */ +export function getFieldPromptsInfo( + prompts: FormPrompt[], + entityData: GenericEntityProperties | null, + numSchemaFields: number, + formUrn?: string, +) { + const fieldPrompts = prompts.filter((prompt) => SCHEMA_FIELD_PROMPT_TYPES.includes(prompt.type)); + const requiredFieldPrompts = fieldPrompts.filter((prompt) => prompt.required); + const optionalFieldPrompts = fieldPrompts.filter((prompt) => !prompt.required); + + const numRequiredFieldPromptsRemaining = getNumSchemaFieldPromptsRemaining( + entityData, + requiredFieldPrompts, + numSchemaFields, + formUrn, + ); + const numOptionalFieldPromptsRemaining = getNumSchemaFieldPromptsRemaining( + entityData, + optionalFieldPrompts, + numSchemaFields, + formUrn, + ); + + return { fieldPrompts, numRequiredFieldPromptsRemaining, numOptionalFieldPromptsRemaining, requiredFieldPrompts }; +} + +export function getFormVerification(formUrn: string, entityData: GenericEntityProperties | null) { + return entityData?.forms?.verifications?.find((verification) => verification.form.urn === formUrn); +} + +export function getVerificationForms(entityData: GenericEntityProperties | null) { + const formAssociations = getFormAssociations(entityData); + return formAssociations.filter((formAssociation) => formAssociation.form.info.type === FormType.Verification); +} + +export function areAllFormsVerified(formAssociations: FormAssociation[], entityData: GenericEntityProperties | null) { + return formAssociations.every((formAssociation) => !!getFormVerification(formAssociation.form.urn, entityData)); +} + +/* + * If a form urn is supplied, return true if that form is verified. + * If no form is supplied, return true if all verification type forms are verified. + * If there are no verification type forms or any are missing verification, return false. + */ +export function isVerificationComplete(entityData: GenericEntityProperties | null, formUrn?: string) { + if (formUrn) { + return !!getFormVerification(formUrn, entityData); + } + const verificationForms = getVerificationForms(entityData); + if (verificationForms.length) { + return areAllFormsVerified(verificationForms, entityData); + } + return false; +} + +export function isFormVerificationType(entityData: GenericEntityProperties | null, formUrn: string) { + const formAssociation = getFormAssociation(formUrn, entityData); + return formAssociation?.form.info.type === FormType.Verification; +} + +/* + * If given a single form we should show verification copy and styles if it is of type verification. + * If no formUrn is supplied, return true if any of our multiple forms are of type verification. + */ +export function shouldShowVerificationInfo(entityData: GenericEntityProperties | null, formUrn?: string) { + if (formUrn) { + return isFormVerificationType(entityData, formUrn); + } + return getVerificationForms(entityData).length > 0; +} + +function getMostRecentVerificationAuditStamp(entityData: GenericEntityProperties | null) { + let mostRecentTimestamp: Maybe = null; + entityData?.forms?.verifications?.forEach((verification) => { + if (mostRecentTimestamp === null || (verification.lastModified?.time || 0) > (mostRecentTimestamp?.time || 0)) { + mostRecentTimestamp = verification.lastModified; + } + }); + return mostRecentTimestamp; +} + +/* + * If given one form, return the verification lastModified for it. Otherwise, find the most + * recently completed verification time stamp from any of the forms on this entity + */ +export function getVerificationAuditStamp(entityData: GenericEntityProperties | null, formUrn?: string) { + if (formUrn) { + return getFormVerification(formUrn, entityData)?.lastModified || null; + } + return getMostRecentVerificationAuditStamp(entityData); +} + +export function getBulkByQuestionPrompts(formUrn: string, entityData: GenericEntityProperties | null) { + const formAssociation = getFormAssociation(formUrn, entityData); + return ( + formAssociation?.form.info.prompts.filter((prompt) => !SCHEMA_FIELD_PROMPT_TYPES.includes(prompt.type)) || [] + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/EntityForm.tsx b/datahub-web-react/src/app/entity/shared/entityForm/EntityForm.tsx new file mode 100644 index 00000000000000..136bbabb61e0b4 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/EntityForm.tsx @@ -0,0 +1,15 @@ +import React from 'react'; +import FormByEntity from './FormByEntity'; +import { FormView, useEntityFormContext } from './EntityFormContext'; + +interface Props { + formUrn: string; +} + +export default function EntityForm({ formUrn }: Props) { + const { formView } = useEntityFormContext(); + + if (formView === FormView.BY_ENTITY) return ; + + return null; +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/EntityFormContext.tsx b/datahub-web-react/src/app/entity/shared/entityForm/EntityFormContext.tsx new file mode 100644 index 00000000000000..aa5e1e3c4a8fed --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/EntityFormContext.tsx @@ -0,0 +1,54 @@ +import React, { useContext } from 'react'; +import { Entity } from '../../../../types.generated'; +import { EntityAndType, GenericEntityProperties } from '../types'; + +export enum FormView { + BY_ENTITY, +} + +export type EntityFormContextType = { + formUrn: string; + isInFormContext: boolean; + entityData: GenericEntityProperties | undefined; + loading: boolean; + selectedEntity: Entity | undefined; + selectedPromptId: string | null; + formView: FormView; + selectedEntities: EntityAndType[]; + setSelectedEntities: (entities: EntityAndType[]) => void; + setFormView: (formView: FormView) => void; + refetch: () => Promise; + setSelectedEntity: (sortOption: Entity) => void; + setSelectedPromptId: (promptId: string) => void; + shouldRefetchSearchResults: boolean; + setShouldRefetchSearchResults: (shouldRefetch: boolean) => void; + isVerificationType: boolean; +}; + +export const DEFAULT_CONTEXT = { + formUrn: '', + isInFormContext: false, + entityData: undefined, + loading: false, + refetch: () => Promise.resolve({}), + selectedEntity: undefined, + setSelectedEntity: (_: Entity) => null, + selectedEntities: [], + setSelectedEntities: (_: EntityAndType[]) => null, + formView: FormView.BY_ENTITY, + setFormView: (_: FormView) => null, + selectedPromptId: null, + setSelectedPromptId: (_: string) => null, + shouldRefetchSearchResults: false, + setShouldRefetchSearchResults: () => null, + isVerificationType: true, +}; + +export const EntityFormContext = React.createContext(DEFAULT_CONTEXT); + +export function useEntityFormContext() { + const context = useContext(EntityFormContext); + if (context === null) + throw new Error(`${useEntityFormContext.name} must be used under a EntityFormContextProvider`); + return context; +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/EntityFormContextProvider.tsx b/datahub-web-react/src/app/entity/shared/entityForm/EntityFormContextProvider.tsx new file mode 100644 index 00000000000000..41d7fcf4bd8998 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/EntityFormContextProvider.tsx @@ -0,0 +1,78 @@ +import React, { useEffect, useState } from 'react'; +import { EntityFormContext, FormView } from './EntityFormContext'; +import { useEntityContext } from '../EntityContext'; +import { Entity } from '../../../../types.generated'; +import { useGetDatasetQuery } from '../../../../graphql/dataset.generated'; +import { EntityAndType, GenericEntityProperties } from '../types'; +import { getFormAssociation, isFormVerificationType } from '../containers/profile/sidebar/FormInfo/utils'; +import usePrevious from '../../../shared/usePrevious'; +import { SCHEMA_FIELD_PROMPT_TYPES } from './constants'; + +interface Props { + children: React.ReactNode; + formUrn: string; +} + +export default function EntityFormContextProvider({ children, formUrn }: Props) { + const { entityData, refetch: refetchEntityProfile, loading: profileLoading } = useEntityContext(); + const formAssociation = getFormAssociation(formUrn, entityData); + const initialPromptId = + formAssociation?.form.info.prompts.filter((prompt) => !SCHEMA_FIELD_PROMPT_TYPES.includes(prompt.type))[0] + ?.id || null; + const isVerificationType = isFormVerificationType(entityData, formUrn); + const [formView, setFormView] = useState(FormView.BY_ENTITY); + const [selectedEntity, setSelectedEntity] = useState(entityData as Entity); + const [selectedPromptId, setSelectedPromptId] = useState(initialPromptId); + const [selectedEntities, setSelectedEntities] = useState([]); + const [shouldRefetchSearchResults, setShouldRefetchSearchResults] = useState(false); + + useEffect(() => { + if (!selectedPromptId && formAssociation) { + setSelectedPromptId(initialPromptId); + } + }, [selectedPromptId, formAssociation, initialPromptId]); + + const previousFormUrn = usePrevious(formUrn); + useEffect(() => { + if (formUrn && previousFormUrn !== formUrn) { + setFormView(FormView.BY_ENTITY); + setSelectedPromptId(initialPromptId); + } + }, [formUrn, previousFormUrn, initialPromptId]); + + const { + data: fetchedData, + refetch, + loading, + } = useGetDatasetQuery({ + variables: { urn: selectedEntity.urn }, + }); + + const isOnEntityProfilePage = selectedEntity.urn === entityData?.urn; + const selectedEntityData = isOnEntityProfilePage ? entityData : (fetchedData?.dataset as GenericEntityProperties); + + return ( + + {children} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/EntityFormModal.tsx b/datahub-web-react/src/app/entity/shared/entityForm/EntityFormModal.tsx new file mode 100644 index 00000000000000..47026472c43f9c --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/EntityFormModal.tsx @@ -0,0 +1,59 @@ +import { CloseOutlined } from '@ant-design/icons'; +import React from 'react'; +import { Modal } from 'antd'; +import styled from 'styled-components'; +import EntityForm from './EntityForm'; +import FormPageHeader from './FormHeader/FormPageHeader'; +import EntityFormContextProvider from './EntityFormContextProvider'; + +const StyledModal = styled(Modal)` + &&& .ant-modal-content { + display: flex; + flex-direction: column; + height: calc(100vh); + } + + .ant-modal-header { + padding: 0; + } + + .ant-modal-body { + flex: 1; + max-height: 100%; + overflow: hidden; + padding: 0; + display: flex; + } +`; + +const StyledClose = styled(CloseOutlined)` + && { + color: white; + font-size: 24px; + margin: 18px 12px 0 0; + } +`; + +interface Props { + selectedFormUrn: string | null; + isFormVisible: boolean; + hideFormModal: () => void; +} + +export default function EntityFormModal({ selectedFormUrn, isFormVisible, hideFormModal }: Props) { + return ( + + } + closeIcon={} + style={{ top: 0, height: '100vh', minWidth: '100vw' }} + destroyOnClose + > + + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/Form.tsx b/datahub-web-react/src/app/entity/shared/entityForm/Form.tsx new file mode 100644 index 00000000000000..bf1b093d984345 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/Form.tsx @@ -0,0 +1,100 @@ +import React from 'react'; +import styled from 'styled-components'; +import { useEntityData } from '../EntityContext'; +import { FormPrompt } from '../../../../types.generated'; +import Prompt, { PromptWrapper } from './prompts/Prompt'; +import { ANTD_GRAY_V2 } from '../constants'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { PromptSubTitle } from './prompts/StructuredPropertyPrompt/StructuredPropertyPrompt'; +import SchemaFieldPrompts from './schemaFieldPrompts/SchemaFieldPrompts'; +import useGetPromptInfo from '../containers/profile/sidebar/FormInfo/useGetPromptInfo'; +import VerificationPrompt from './prompts/VerificationPrompt'; +import useShouldShowVerificationPrompt from './useShouldShowVerificationPrompt'; +import { getFormAssociation } from '../containers/profile/sidebar/FormInfo/utils'; +import FormRequestedBy from './FormSelectionModal/FormRequestedBy'; +import useHasComponentRendered from '../../../shared/useHasComponentRendered'; +import Loading from '../../../shared/Loading'; +import { DeferredRenderComponent } from '../../../shared/DeferredRenderComponent'; +import { OnboardingTour } from '../../../onboarding/OnboardingTour'; +import { FORM_ASSET_COMPLETION } from '../../../onboarding/config/FormOnboardingConfig'; + +const TabWrapper = styled.div` + background-color: ${ANTD_GRAY_V2[1]}; + overflow: auto; + padding: 24px; + flex: 1; + max-height: 100%; +`; + +const IntroTitle = styled.div` + font-size: 20px; + font-weight: 600; +`; + +const HeaderWrapper = styled(PromptWrapper)``; + +const SubTitle = styled(PromptSubTitle)` + margin-top: 16px; +`; + +const RequestedByWrapper = styled(PromptSubTitle)` + color: ${ANTD_GRAY_V2[8]}; +`; + +interface Props { + formUrn: string; +} + +function Form({ formUrn }: Props) { + const entityRegistry = useEntityRegistry(); + const { entityType, entityData } = useEntityData(); + const { entityPrompts, fieldPrompts } = useGetPromptInfo(formUrn); + const shouldShowVerificationPrompt = useShouldShowVerificationPrompt(formUrn); + const { hasRendered } = useHasComponentRendered(); + + if (!hasRendered) return ; + + const formAssociation = getFormAssociation(formUrn, entityData); + const title = formAssociation?.form.info.name; + const associatedUrn = formAssociation?.associatedUrn; + const description = formAssociation?.form.info.description; + const owners = formAssociation?.form.ownership?.owners; + + return ( + + + + + {title ? <>{title} : <>{entityRegistry.getEntityName(entityType)} Requirements} + + {owners && owners.length > 0 && ( + + + + )} + {description ? ( + {description} + ) : ( + + Please fill out the following information for this {entityRegistry.getEntityName(entityType)} so + that we can keep track of the status of the asset + + )} + + {entityPrompts?.map((prompt, index) => ( + + ))} + {fieldPrompts.length > 0 && } + {shouldShowVerificationPrompt && } + + ); +} + +export default function FormContainer({ formUrn }: Props) { + return } />; +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/FormByEntity.tsx b/datahub-web-react/src/app/entity/shared/entityForm/FormByEntity.tsx new file mode 100644 index 00000000000000..23550e8fcca5f9 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/FormByEntity.tsx @@ -0,0 +1,71 @@ +import React from 'react'; +import styled from 'styled-components'; +import Form from './Form'; +import { ANTD_GRAY_V2 } from '../constants'; +import ProfileSidebar from '../containers/profile/sidebar/ProfileSidebar'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import EntityContext, { useEntityContext } from '../EntityContext'; +import EntityInfo from '../containers/profile/sidebar/EntityInfo/EntityInfo'; +import { useEntityFormContext } from './EntityFormContext'; +import ProgressBar from './ProgressBar'; +import { OnboardingTour } from '../../../onboarding/OnboardingTour'; +import { + FORM_QUESTION_VIEW_BUTTON, + WELCOME_TO_BULK_BY_ENTITY_ID, +} from '../../../onboarding/config/FormOnboardingConfig'; + +const ContentWrapper = styled.div` + background-color: ${ANTD_GRAY_V2[1]}; + max-height: 100%; + display: flex; + flex-direction: column; + width: 100%; + flex: 1; +`; + +const FlexWrapper = styled.div` + display: flex; + max-height: 100%; + overflow: auto; + width: 100%; +`; + +interface Props { + formUrn: string; +} + +export default function FormByEntity({ formUrn }: Props) { + const { selectedEntity, entityData: selectedEntityData, refetch, loading } = useEntityFormContext(); + const { entityType } = useEntityContext(); + const entityRegistry = useEntityRegistry(); + const sidebarSections = entityRegistry.getSidebarSections(selectedEntity?.type || entityType); + + return ( + {}, + refetch, + }} + > + + + + + }} + backgroundColor="white" + alignLeft + /> +
+ + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/FormHeader/FormPageHeader.tsx b/datahub-web-react/src/app/entity/shared/entityForm/FormHeader/FormPageHeader.tsx new file mode 100644 index 00000000000000..4baa762fcf88d4 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/FormHeader/FormPageHeader.tsx @@ -0,0 +1,40 @@ +import React from 'react'; +import styled from 'styled-components'; +import AppLogoLink from '../../../../shared/AppLogoLink'; + +const Header = styled.div` + padding: 12px 24px; + background-color: black; + font-size: 24px; + display: flex; + align-items: center; + color: white; + justify-content: space-between; +`; + +const HeaderText = styled.div` + margin-left: 24px; +`; + +const StyledDivider = styled.div` + display: flex; + flex-direction: column; +`; + +const TitleWrapper = styled.div` + display: flex; + align-items: center; +`; + +export default function FormPageHeader() { + return ( + +
+ + + Complete Documentation Requests + +
+
+ ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/FormHeader/components.ts b/datahub-web-react/src/app/entity/shared/entityForm/FormHeader/components.ts new file mode 100644 index 00000000000000..c94dc0ef633003 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/FormHeader/components.ts @@ -0,0 +1,36 @@ +import { ArrowLeftOutlined, ArrowRightOutlined } from '@ant-design/icons'; +import styled from 'styled-components'; +import { ANTD_GRAY_V2 } from '../../constants'; +import BackgroundDots from '../../../../../images/background_dots.svg'; + +export const BulkNavigationWrapper = styled.div<{ $hideBackground?: boolean }>` + padding: 16px 68px 16px 24px; + background-color: ${ANTD_GRAY_V2[10]}; + display: flex; + justify-content: flex-end; + ${(props) => + !props.$hideBackground && + ` + background-image: url(${BackgroundDots}); + background-position: right; + background-repeat: no-repeat; + `} +`; + +export const NavigationWrapper = styled.div<{ isHidden: boolean }>` + font-size: 20px; + color: white; + display: flex; + flex-wrap: nowrap; + ${(props) => props.isHidden && 'opacity: 0;'} +`; + +export const ArrowLeft = styled(ArrowLeftOutlined)` + margin-right: 24px; + cursor: pointer; +`; + +export const ArrowRight = styled(ArrowRightOutlined)` + margin-left: 24px; + cursor: pointer; +`; diff --git a/datahub-web-react/src/app/entity/shared/entityForm/FormSelectionModal/FormItem.tsx b/datahub-web-react/src/app/entity/shared/entityForm/FormSelectionModal/FormItem.tsx new file mode 100644 index 00000000000000..c23fd39d8a10fe --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/FormSelectionModal/FormItem.tsx @@ -0,0 +1,101 @@ +import { Tooltip } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import { FormAssociation } from '../../../../../types.generated'; +import useGetPromptInfo from '../../containers/profile/sidebar/FormInfo/useGetPromptInfo'; +import { useEntityData } from '../../EntityContext'; +import { isVerificationComplete, shouldShowVerificationInfo } from '../../containers/profile/sidebar/FormInfo/utils'; +import { pluralize } from '../../../../shared/textUtil'; +import { WhiteButton } from '../../../../shared/components'; +import { ANTD_GRAY_V2 } from '../../constants'; +import useIsUserAssigned from '../../containers/profile/sidebar/FormInfo/useIsUserAssigned'; +import FormRequestedBy from './FormRequestedBy'; + +const FormItemWrapper = styled.div` + display: flex; + padding: 16px; + justify-content: space-between; +`; + +const FormName = styled.div` + font-size: 16px; + font-weight: 600; + margin-bottom: 4px; +`; + +const FormAssigner = styled.div` + font-size: 14px; + color: #373d44; + margin-top: -4px; + margin-bottom: 4px; +`; + +const OptionalText = styled.div` + color: ${ANTD_GRAY_V2[8]}; + font-weight: normal; +`; + +const CompleteWrapper = styled.div` + display: flex; + align-items: center; +`; + +const FormInfoWrapper = styled.div` + font-size: 12px; + color: #373d44; + font-weight: 600; +`; + +interface Props { + formAssociation: FormAssociation; + selectFormUrn: (urn: string) => void; +} + +export default function FormItem({ formAssociation, selectFormUrn }: Props) { + const { entityData } = useEntityData(); + const { form } = formAssociation; + const { numRequiredPromptsRemaining, numOptionalPromptsRemaining } = useGetPromptInfo(form.urn); + const allRequiredPromptsAreComplete = numRequiredPromptsRemaining === 0; + const showVerificationInfo = shouldShowVerificationInfo(entityData, form.urn); + const isComplete = showVerificationInfo + ? isVerificationComplete(entityData, form.urn) + : allRequiredPromptsAreComplete; + const isUserAssigned = useIsUserAssigned(form.urn); + const owners = form.ownership?.owners; + + return ( + +
+ {form.info.name} + {owners && owners.length > 0 && ( + + + + )} + + {isComplete && ( + {showVerificationInfo ? <>Verified : <>Complete} + )} + {!isComplete && ( +
+ {numRequiredPromptsRemaining} required {pluralize(numRequiredPromptsRemaining, 'response')}{' '} + remaining +
+ )} + {numOptionalPromptsRemaining > 0 && ( + + {numOptionalPromptsRemaining} optional {pluralize(numOptionalPromptsRemaining, 'response')}{' '} + remaining + + )} +
+
+ + selectFormUrn(form.urn)} disabled={!isUserAssigned}> + {isComplete && 'View'} + {!isComplete && <>{showVerificationInfo ? 'Verify' : 'Complete'}} + + +
+ ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/FormSelectionModal/FormRequestedBy.tsx b/datahub-web-react/src/app/entity/shared/entityForm/FormSelectionModal/FormRequestedBy.tsx new file mode 100644 index 00000000000000..fa4834b5a4f851 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/FormSelectionModal/FormRequestedBy.tsx @@ -0,0 +1,24 @@ +import React from 'react'; +import { Owner } from '../../../../../types.generated'; +import { useEntityRegistry } from '../../../../useEntityRegistry'; + +interface Props { + owners: Owner[]; +} + +export default function FormRequestedBy({ owners }: Props) { + const entityRegistry = useEntityRegistry(); + + return ( + <> + Requested by:{' '} + {owners.map((ownerAssoc, index) => ( + <> + {owners.length > 1 && index === owners.length - 1 && 'and '} + {entityRegistry.getDisplayName(ownerAssoc.owner.type, ownerAssoc.owner)} + {owners.length > 1 && index !== owners.length - 1 && ', '} + + ))} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/FormSelectionModal/FormSelectionModal.tsx b/datahub-web-react/src/app/entity/shared/entityForm/FormSelectionModal/FormSelectionModal.tsx new file mode 100644 index 00000000000000..17452b30f6c117 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/FormSelectionModal/FormSelectionModal.tsx @@ -0,0 +1,21 @@ +import { Modal } from 'antd'; +import React from 'react'; +import FormSelector from './FormSelector'; + +interface Props { + isFormSelectionModalVisible: boolean; + hideFormSelectionModal: () => void; + selectFormUrn: (urn: string) => void; +} + +export default function FormSelectionModal({ + isFormSelectionModalVisible, + hideFormSelectionModal, + selectFormUrn, +}: Props) { + return ( + + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/FormSelectionModal/FormSelector.tsx b/datahub-web-react/src/app/entity/shared/entityForm/FormSelectionModal/FormSelector.tsx new file mode 100644 index 00000000000000..6c9d593b38f0ea --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/FormSelectionModal/FormSelector.tsx @@ -0,0 +1,48 @@ +import { Divider } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import { useEntityData } from '../../EntityContext'; +import { getFormAssociations } from '../../containers/profile/sidebar/FormInfo/utils'; +import FormItem from './FormItem'; + +const FormSelectorWrapper = styled.div` + font-size: 14px; +`; + +const HeaderText = styled.div` + font-size: 16px; + font-weight: 600; + margin-bottom: 8px; +`; + +const Subheader = styled.div` + margin-bottom: 8px; +`; + +const StyledDivider = styled(Divider)` + margin: 8px 0; +`; + +interface Props { + selectFormUrn: (urn: string) => void; +} + +export default function FormSelector({ selectFormUrn }: Props) { + const { entityData } = useEntityData(); + const formAssociations = getFormAssociations(entityData); + + return ( + + Choose Which Form to View + + There are multiple open requests for this entity. Choose which one you’d like to view or complete. + + {formAssociations.map((formAssociation, index) => ( +
+ + {index !== formAssociations.length - 1 && } +
+ ))} +
+ ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/ProgressBar.tsx b/datahub-web-react/src/app/entity/shared/entityForm/ProgressBar.tsx new file mode 100644 index 00000000000000..a4473fc825e905 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/ProgressBar.tsx @@ -0,0 +1,38 @@ +import { Progress } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import useGetPromptInfo from '../containers/profile/sidebar/FormInfo/useGetPromptInfo'; +import { ANTD_GRAY } from '../constants'; + +const StyledProgress = styled(Progress)` + &&& .ant-progress-outer { + display: flex; + flex-direction: column; + align-items: flex-start; + gap: 8px; + align-self: stretch; + } + + .ant-progress-bg { + height: 4px !important; + } +`; + +interface Props { + formUrn: string; +} +export default function ProgressBar({ formUrn }: Props) { + const { totalRequiredSchemaFieldPrompts, numRequiredPromptsRemaining, requiredEntityPrompts } = + useGetPromptInfo(formUrn); + const totalRequiredPrompts = requiredEntityPrompts.length + totalRequiredSchemaFieldPrompts; + const percent = ((totalRequiredPrompts - numRequiredPromptsRemaining) / totalRequiredPrompts) * 100; + + return ( + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/__tests__/Form.test.tsx b/datahub-web-react/src/app/entity/shared/entityForm/__tests__/Form.test.tsx new file mode 100644 index 00000000000000..d1e458d37bd5ed --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/__tests__/Form.test.tsx @@ -0,0 +1,62 @@ +import { MockedProvider } from '@apollo/client/testing'; +import React from 'react'; +import { render, waitFor } from '@testing-library/react'; +import EntityContext from '../../EntityContext'; +import { mockEntityDataWithFieldPrompts, mockEntityData } from '../mocks'; +import { EntityType } from '../../../../../types.generated'; +import Form from '../Form'; +import TestPageContainer from '../../../../../utils/test-utils/TestPageContainer'; +import { mocks } from '../../../../../Mocks'; + +beforeEach(() => { + // IntersectionObserver isn't available in test environment + const mockIntersectionObserver = vi.fn(); + mockIntersectionObserver.mockReturnValue({ + observe: () => null, + unobserve: () => null, + disconnect: () => null, + }); + window.IntersectionObserver = mockIntersectionObserver; +}); + +describe('Form', () => { + it('should show field-level header if there are schema field prompts', async () => { + const { getByTestId, findByTestId } = render( + + + + + + + , + ); + // DeferredRenderComponent defers rendering for a short period, wait for that + await waitFor(() => findByTestId('field-level-requirements')); + expect(getByTestId('field-level-requirements')).toBeInTheDocument(); + }); + + it('should not show field-level header if there are no schema field prompts', () => { + const { queryByTestId } = render( + + + + + + + , + ); + expect(queryByTestId('field-level-requirements')).not.toBeInTheDocument(); + }); +}); diff --git a/datahub-web-react/src/app/entity/shared/entityForm/__tests__/useShouldShowVerificationPrompt.test.ts b/datahub-web-react/src/app/entity/shared/entityForm/__tests__/useShouldShowVerificationPrompt.test.ts new file mode 100644 index 00000000000000..93413fcf634946 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/__tests__/useShouldShowVerificationPrompt.test.ts @@ -0,0 +1,48 @@ +import { FormType, FormVerificationAssociation } from '../../../../../types.generated'; +import { shouldShowVerificationPrompt } from '../useShouldShowVerificationPrompt'; + +describe('shouldShowVerificationPrompt', () => { + const formVerification = { + form: { urn: 'urn:li:form:1' }, + lastModified: { time: 100 }, + } as FormVerificationAssociation; + + it('should return true if the form is verification, there are no prompts remaining, and no verification', () => { + const shouldShow = shouldShowVerificationPrompt({ + formType: FormType.Verification, + numRequiredPromptsRemaining: 0, + }); + + expect(shouldShow).toBe(true); + }); + + it('should return false if the form was verified', () => { + const shouldShow = shouldShowVerificationPrompt({ + formType: FormType.Verification, + numRequiredPromptsRemaining: 0, + formVerification, + }); + + expect(shouldShow).toBe(false); + }); + + it('should return false if the form is not of type verification', () => { + const shouldShow = shouldShowVerificationPrompt({ + formType: FormType.Completion, + numRequiredPromptsRemaining: 0, + formVerification, + }); + + expect(shouldShow).toBe(false); + }); + + it('should return false if the form has prompts remaining', () => { + const shouldShow = shouldShowVerificationPrompt({ + formType: FormType.Verification, + numRequiredPromptsRemaining: 1, + formVerification, + }); + + expect(shouldShow).toBe(false); + }); +}); diff --git a/datahub-web-react/src/app/entity/shared/entityForm/constants.ts b/datahub-web-react/src/app/entity/shared/entityForm/constants.ts new file mode 100644 index 00000000000000..fb62ab5de7323e --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/constants.ts @@ -0,0 +1,3 @@ +import { FormPromptType } from '../../../../types.generated'; + +export const SCHEMA_FIELD_PROMPT_TYPES = [FormPromptType.FieldsStructuredProperty]; diff --git a/datahub-web-react/src/app/entity/shared/entityForm/mocks.ts b/datahub-web-react/src/app/entity/shared/entityForm/mocks.ts new file mode 100644 index 00000000000000..b29848e4b119b8 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/mocks.ts @@ -0,0 +1,221 @@ +import { EntityType, FormPromptType, FormType } from '../../../../types.generated'; +import { GenericEntityProperties } from '../types'; + +const form1 = { + urn: 'urn:li:form:1', + type: EntityType.Form, + info: { + name: '', + type: FormType.Verification, + prompts: [ + { + id: '1', + type: FormPromptType.FieldsStructuredProperty, + title: '', + formUrn: 'urn:li:form:1', + required: false, + }, + { + id: '2', + type: FormPromptType.FieldsStructuredProperty, + title: '', + formUrn: 'urn:li:form:1', + required: false, + }, + ], + actors: { + owners: true, + users: null, + groups: null, + isAssignedToMe: false, + }, + }, +}; + +const form2 = { + urn: 'urn:li:form:2', + type: EntityType.Form, + info: { + name: '', + prompts: [ + { + id: '3', + type: FormPromptType.StructuredProperty, + title: '', + formUrn: 'urn:li:form:2', + required: true, + }, + { + id: '4', + type: FormPromptType.FieldsStructuredProperty, + title: '', + formUrn: 'urn:li:form:2', + required: false, + }, + { + id: '5', + type: FormPromptType.StructuredProperty, + title: '', + formUrn: 'urn:li:form:2', + required: false, + }, + ], + type: FormType.Verification, + actors: { + owners: false, + users: null, + groups: null, + isAssignedToMe: true, + }, + }, +}; + +export const mockEntityData = { + schemaMetadata: { fields: [{ fieldPath: 'test' }] }, + forms: { + verifications: [ + { + form: form2, + lastModified: { + actor: { + urn: 'urn:li:corpuser:test', + }, + time: 100, + }, + }, + { + form: form2, + lastModified: { + actor: { + urn: 'urn:li:corpuser:test', + }, + time: 101, + }, + }, + ], + incompleteForms: [ + { + completedPrompts: [ + { + id: '1', + lastModified: { time: 123 }, + fieldAssociations: { + completedFieldPrompts: [ + { fieldPath: 'test3', lastModified: { time: 123 } }, + { fieldPath: 'test4', lastModified: { time: 123 } }, + ], + }, + }, + ], + incompletePrompts: [ + { + id: '2', + lastModified: { time: 1234 }, + fieldAssociations: { + completedFieldPrompts: [ + { fieldPath: 'test1', lastModified: { time: 123 } }, + { fieldPath: 'test2', lastModified: { time: 123 } }, + { fieldPath: 'test3', lastModified: { time: 123 } }, + ], + }, + }, + ], + associatedUrn: '', + form: form1, + }, + ], + completedForms: [ + { + completedPrompts: [{ id: '3', lastModified: { time: 1234 } }], + incompletePrompts: [ + { id: '4', lastModified: { time: 123 } }, + { id: '5', lastModified: { time: 123 } }, + ], + associatedUrn: '', + form: form2, + }, + { + completedPrompts: [{ id: '6', lastModified: { time: 1234 } }], + associatedUrn: '', + form: { + urn: 'urn:li:form:3', + type: EntityType.Form, + info: { + name: '', + prompts: [ + { + id: '6', + type: FormPromptType.StructuredProperty, + title: '', + formUrn: 'urn:li:form:3', + required: true, + }, + ], + type: FormType.Completion, + actors: { + owners: true, + users: null, + groups: null, + isAssignedToMe: false, + }, + }, + }, + }, + ], + }, +} as GenericEntityProperties; + +export const mockEntityDataAllVerified = { + ...mockEntityData, + forms: { + ...mockEntityData.forms, + verifications: [ + { + form: form2, + lastModified: { + actor: { + urn: 'urn:li:corpuser:test', + }, + time: 100, + }, + }, + { + form: form1, + lastModified: { + actor: { + urn: 'urn:li:corpuser:test', + }, + time: 101, + }, + }, + ], + }, +} as GenericEntityProperties; + +export const mockEntityDataWithFieldPrompts = { + ...mockEntityData, + forms: { + ...mockEntityData.forms, + incompleteForms: [ + { + ...(mockEntityData as any).forms.incompleteForms[0], + form: { + urn: 'urn:li:form:1', + type: EntityType.Form, + info: { + name: '', + prompts: [ + { + id: '1', + type: FormPromptType.FieldsStructuredProperty, + title: '', + formUrn: 'urn:li:form:1', + required: false, + }, + ], + }, + }, + }, + ], + }, +} as GenericEntityProperties; diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/Prompt.tsx b/datahub-web-react/src/app/entity/shared/entityForm/prompts/Prompt.tsx new file mode 100644 index 00000000000000..23512dff599393 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/Prompt.tsx @@ -0,0 +1,65 @@ +import { message } from 'antd'; +import React, { useState } from 'react'; +import styled from 'styled-components'; +import { + FormPrompt as PromptEntity, + FormPromptType, + SubmitFormPromptInput, + SchemaField, +} from '../../../../../types.generated'; +import StructuredPropertyPrompt from './StructuredPropertyPrompt/StructuredPropertyPrompt'; +import { useSubmitFormPromptMutation } from '../../../../../graphql/form.generated'; +import { useMutationUrn } from '../../EntityContext'; + +export const PromptWrapper = styled.div` + background-color: white; + border-radius: 8px; + padding: 24px; + margin-bottom: 8px; +`; + +interface Props { + promptNumber?: number; + prompt: PromptEntity; + field?: SchemaField; + associatedUrn?: string; +} + +export default function Prompt({ promptNumber, prompt, field, associatedUrn }: Props) { + const [optimisticCompletedTimestamp, setOptimisticCompletedTimestamp] = useState(null); + const urn = useMutationUrn(); + const [submitFormPrompt] = useSubmitFormPromptMutation(); + + function submitResponse(input: SubmitFormPromptInput, onSuccess: () => void) { + submitFormPrompt({ variables: { urn: associatedUrn || urn, input } }) + .then(() => { + onSuccess(); + setOptimisticCompletedTimestamp(Date.now()); + }) + .catch(() => { + message.error('Unknown error while submitting form response'); + }); + } + + return ( + + {prompt.type === FormPromptType.StructuredProperty && ( + + )} + {prompt.type === FormPromptType.FieldsStructuredProperty && ( + + )} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/CompletedPromptAuditStamp.tsx b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/CompletedPromptAuditStamp.tsx new file mode 100644 index 00000000000000..ff11f0db5f8bc4 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/CompletedPromptAuditStamp.tsx @@ -0,0 +1,69 @@ +import { Typography } from 'antd'; +import React from 'react'; +import Icon from '@ant-design/icons'; +import styled from 'styled-components'; +import GreenCircleIcon from '../../../../../../images/greenCircleTwoTone.svg?react'; +import { ANTD_GRAY_V2 } from '../../../constants'; + +const PadIcon = styled.div` + align-items: flex-start; + padding-top: 1px; + padding-right: 2px; +`; + +const CompletedPromptContainer = styled.div` + display: flex; + align-self: end; + max-width: 350px; +`; + +const AuditStamp = styled.div` + color: #373d44; + font-size: 14px; + font-family: Manrope; + font-weight: 600; + line-height: 18px; + overflow: hidden; + white-space: nowrap; + display: flex; +`; + +const AuditStampSubTitle = styled.div` + color: ${ANTD_GRAY_V2[8]}; + font-size: 12px; + font-family: Manrope; + font-weight: 500; + line-height: 16px; + word-wrap: break-word; +`; + +const StyledIcon = styled(Icon)` + font-size: 16px; + margin-right: 4px; +`; + +const AuditWrapper = styled.div` + max-width: 95%; +`; + +interface Props { + completedByName: string; + completedByTime: string; +} + +export default function CompletedPromptAuditStamp({ completedByName, completedByTime }: Props) { + return ( + + + + + + + Completed by  + {completedByName} + + {completedByTime} + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/DateInput.tsx b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/DateInput.tsx new file mode 100644 index 00000000000000..23c322ea4c8f2b --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/DateInput.tsx @@ -0,0 +1,18 @@ +import { DatePicker } from 'antd'; +import React from 'react'; +import moment, { Moment } from 'moment'; + +interface Props { + selectedValues: any[]; + updateSelectedValues: (values: string[] | number[]) => void; +} + +export default function DateInput({ selectedValues, updateSelectedValues }: Props) { + function updateInput(_: Moment | null, value: string) { + updateSelectedValues([value]); + } + + const currentValue = selectedValues[0] ? moment(selectedValues[0]) : undefined; + + return ; +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/DropdownLabel.tsx b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/DropdownLabel.tsx new file mode 100644 index 00000000000000..0e002f8cc6150f --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/DropdownLabel.tsx @@ -0,0 +1,34 @@ +import React from 'react'; +import styled from 'styled-components'; + +const StyledValue = styled.div` + font-family: Manrope; + font-size: 14px; + font-style: normal; + font-weight: 400; + line-height: 22px; + color: #373d44; +`; + +const StyledDescription = styled.div` + font-family: Manrope; + font-size: 12px; + font-style: normal; + font-weight: 500; + line-height: 16px; + color: #5e666e; +`; + +interface Props { + value: string | number | null; + description?: string | null; +} + +export default function DropdownLabel({ value, description }: Props) { + return ( + <> + {value} + {description} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/MultiSelectInput.tsx b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/MultiSelectInput.tsx new file mode 100644 index 00000000000000..606430e68400fe --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/MultiSelectInput.tsx @@ -0,0 +1,82 @@ +import { Checkbox, Select, Tag } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import { ANTD_GRAY_V2 } from '../../../constants'; +import { getStructuredPropertyValue } from '../../../utils'; +import ValueDescription from './ValueDescription'; +import { AllowedValue } from '../../../../../../types.generated'; +import DropdownLabel from './DropdownLabel'; + +const StyledCheckbox = styled(Checkbox)` + display: flex; + margin: 0 0 4px 0; + .ant-checkbox-inner { + border-color: ${ANTD_GRAY_V2[8]}; + } + &&& { + margin-left: 0; + } +`; + +const StyleTag = styled(Tag)` + font-family: Manrope; + font-size: 14px; + font-style: normal; + font-weight: 400; +`; + +const DROPDOWN_STYLE = { minWidth: 320, maxWidth: 320, textAlign: 'left' }; + +interface Props { + selectedValues: any[]; + allowedValues: AllowedValue[]; + toggleSelectedValue: (value: string | number) => void; + updateSelectedValues: (values: string[] | number[]) => void; +} + +export default function MultiSelectInput({ + toggleSelectedValue, + updateSelectedValues, + allowedValues, + selectedValues, +}: Props) { + return allowedValues.length > 5 ? ( + selectSingleValue(value)} + optionLabelProp="value" + > + {allowedValues.map((allowedValue) => ( + + + + ))} + + ) : ( + selectSingleValue(e.target.value)}> + {allowedValues.map((allowedValue) => ( + + {getStructuredPropertyValue(allowedValue.value)} + {allowedValue.description && } + + ))} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/StringInput.tsx b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/StringInput.tsx new file mode 100644 index 00000000000000..8c69174a35bf3d --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/StringInput.tsx @@ -0,0 +1,31 @@ +import { Input } from 'antd'; +import React, { ChangeEvent } from 'react'; +import styled from 'styled-components'; +import { ANTD_GRAY_V2 } from '../../../constants'; +import { PropertyCardinality } from '../../../../../../types.generated'; +import MultipleStringInput from './MultipleStringInput'; + +const StyledInput = styled(Input)` + width: 75%; + min-width: 350px; + max-width: 500px; + border: 1px solid ${ANTD_GRAY_V2[6]}; +`; + +interface Props { + selectedValues: any[]; + cardinality?: PropertyCardinality | null; + updateSelectedValues: (values: string[] | number[]) => void; +} + +export default function StringInput({ selectedValues, cardinality, updateSelectedValues }: Props) { + function updateInput(event: ChangeEvent) { + updateSelectedValues([event.target.value]); + } + + if (cardinality === PropertyCardinality.Multiple) { + return ; + } + + return ; +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/StructuredPropertyPrompt.tsx b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/StructuredPropertyPrompt.tsx new file mode 100644 index 00000000000000..d7f29779156816 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/StructuredPropertyPrompt.tsx @@ -0,0 +1,207 @@ +import { Button } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import { + EntityType, + FormPrompt, + PropertyCardinality, + SchemaField, + StdDataType, + SubmitFormPromptInput, +} from '../../../../../../types.generated'; +import SingleSelectInput from './SingleSelectInput'; +import MultiSelectInput from './MultiSelectInput'; +import useStructuredPropertyPrompt from './useStructuredPropertyPrompt'; +import StringInput from './StringInput'; +import RichTextInput from './RichTextInput'; +import DateInput from './DateInput'; +import NumberInput from './NumberInput'; +import UrnInput from './UrnInput/UrnInput'; +import { useEntityData } from '../../../EntityContext'; +import { + findCompletedFieldPrompt, + findPromptAssociation, + getCompletedPrompts, + getIncompletePrompts, + isFieldPromptComplete, + isPromptComplete, +} from '../../../containers/profile/sidebar/FormInfo/utils'; +import { useEntityRegistry } from '../../../../../useEntityRegistry'; +import { getTimeFromNow } from '../../../../../shared/time/timeUtils'; +import CompletedPromptAuditStamp from './CompletedPromptAuditStamp'; +import { applyOpacity } from '../../../../../shared/styleUtils'; +import { useUserContext } from '../../../../../context/useUserContext'; + +const PromptWrapper = styled.div<{ displayBulkStyles?: boolean }>` + display: flex; + justify-content: space-between; + height: min-content; + ${(props) => props.displayBulkStyles && `color: white;`} +`; + +const PromptTitle = styled.div<{ displayBulkStyles?: boolean }>` + font-size: 16px; + font-weight: 600; + line-height: 20px; + ${(props) => props.displayBulkStyles && `font-size: 20px;`} +`; + +const RequiredText = styled.span<{ displayBulkStyles?: boolean }>` + font-size: 12px; + margin-left: 4px; + color: #a8071a; + ${(props) => + props.displayBulkStyles && + ` + color: #FFCCC7; + margin-left: 8px; + `} +`; + +export const PromptSubTitle = styled.div` + font-size: 14px; + font-weight: 500; + line-height: 18px; + margin-top: 4px; +`; + +const InputSection = styled.div` + margin-top: 8px; +`; + +const StyledButton = styled(Button)` + align-self: end; + margin-left: 8px; + + &:focus { + box-shadow: 0 0 3px 2px ${(props) => applyOpacity(props.theme.styles['primary-color'] || '', 50)}; + } +`; + +const PromptInputWrapper = styled.div` + flex: 1; +`; + +interface Props { + promptNumber?: number; + prompt: FormPrompt; + submitResponse: (input: SubmitFormPromptInput, onSuccess: () => void) => void; + field?: SchemaField; + optimisticCompletedTimestamp?: number | null; +} + +export default function StructuredPropertyPrompt({ + promptNumber, + prompt, + submitResponse, + field, + optimisticCompletedTimestamp, +}: Props) { + const { + isSaveVisible, + selectedValues, + selectSingleValue, + toggleSelectedValue, + submitStructuredPropertyResponse, + updateSelectedValues, + } = useStructuredPropertyPrompt({ prompt, submitResponse, field }); + const { entityData } = useEntityData(); + const { user } = useUserContext(); + const entityRegistry = useEntityRegistry(); + const completedPrompts = getCompletedPrompts(entityData); + const incompletePrompts = getIncompletePrompts(entityData); + const promptAssociation = findPromptAssociation(prompt, completedPrompts.concat(incompletePrompts)); + const completedFieldPrompt = findCompletedFieldPrompt(field, promptAssociation); + + const structuredProperty = prompt.structuredPropertyParams?.structuredProperty; + if (!structuredProperty) return null; + + const { displayName, description, allowedValues, cardinality, valueType } = structuredProperty.definition; + + function getCompletedByName() { + let actor = completedFieldPrompt?.lastModified?.actor || promptAssociation?.lastModified?.actor; + if (optimisticCompletedTimestamp) { + actor = user; + } + return actor ? entityRegistry.getDisplayName(EntityType.CorpUser, actor) : ''; + } + + function getCompletedByRelativeTime() { + let completedTimestamp = completedFieldPrompt?.lastModified?.time || promptAssociation?.lastModified?.time; + if (optimisticCompletedTimestamp) { + completedTimestamp = optimisticCompletedTimestamp; + } + return completedTimestamp ? getTimeFromNow(completedTimestamp) : ''; + } + + return ( + + + + {promptNumber !== undefined && <>{promptNumber}. } + {displayName} + {prompt.required && required} + + {description && {description}} + + {allowedValues && allowedValues.length > 0 && ( + <> + {cardinality === PropertyCardinality.Single && ( + + )} + {cardinality === PropertyCardinality.Multiple && ( + + )} + + )} + {!allowedValues && valueType.info.type === StdDataType.String && ( + + )} + {!allowedValues && valueType.info.type === StdDataType.RichText && ( + + )} + {!allowedValues && valueType.info.type === StdDataType.Date && ( + + )} + {!allowedValues && valueType.info.type === StdDataType.Number && ( + + )} + {!allowedValues && valueType.info.type === StdDataType.Urn && ( + + )} + + + {isSaveVisible && selectedValues.length > 0 && ( + + Save + + )} + {(isPromptComplete(prompt, completedPrompts) || + isFieldPromptComplete(field, promptAssociation) || + optimisticCompletedTimestamp) && + !isSaveVisible && ( + + )} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/UrnInput/SelectedEntity.tsx b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/UrnInput/SelectedEntity.tsx new file mode 100644 index 00000000000000..d5ed2e9693fc9c --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/UrnInput/SelectedEntity.tsx @@ -0,0 +1,40 @@ +import { Typography } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import { Entity } from '../../../../../../../types.generated'; +import { useEntityRegistry } from '../../../../../../useEntityRegistry'; +import EntityIcon from '../../../../components/styled/EntityIcon'; + +const SelectedEntityWrapper = styled.div` + display: flex; + align-items: center; + font-size: 14px; + overflow: hidden; +`; + +const IconWrapper = styled.span` + margin-right: 4px; + display: flex; +`; + +const NameWrapper = styled(Typography.Text)` + margin-right: 4px; +`; + +interface Props { + entity: Entity; +} + +export default function SelectedEntity({ entity }: Props) { + const entityRegistry = useEntityRegistry(); + const displayName = entityRegistry.getDisplayName(entity.type, entity); + + return ( + + + + + {displayName} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/UrnInput/UrnInput.tsx b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/UrnInput/UrnInput.tsx new file mode 100644 index 00000000000000..54d53c75607e5f --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/UrnInput/UrnInput.tsx @@ -0,0 +1,78 @@ +import { LoadingOutlined } from '@ant-design/icons'; +import { Select } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import { StructuredPropertyEntity } from '../../../../../../../types.generated'; +import useUrnInput from './useUrnInput'; +import SelectedEntity from './SelectedEntity'; + +const EntitySelect = styled(Select)` + width: 75%; + min-width: 400px; + max-width: 600px; + + .ant-select-selector { + padding: 4px; + } +`; + +const LoadingWrapper = styled.div` + padding: 8px; + display: flex; + justify-content: center; + + svg { + height: 24px; + width: 24px; + } +`; + +interface Props { + structuredProperty: StructuredPropertyEntity; + selectedValues: any[]; + updateSelectedValues: (values: string[] | number[]) => void; +} + +export default function UrnInput({ structuredProperty, selectedValues, updateSelectedValues }: Props) { + const { + onSelectValue, + onDeselectValue, + handleSearch, + tagRender, + selectedEntities, + searchResults, + loading, + entityTypeNames, + } = useUrnInput({ structuredProperty, selectedValues, updateSelectedValues }); + + const placeholder = `Search for ${entityTypeNames ? entityTypeNames.map((name) => ` ${name}`) : 'entities'}...`; + + return ( + onSelectValue(urn)} + onDeselect={(urn: any) => onDeselectValue(urn)} + onSearch={(value: string) => handleSearch(value.trim())} + tagRender={tagRender} + value={selectedEntities.map((e) => e.urn)} + loading={loading} + notFoundContent={ + loading ? ( + + + + ) : undefined + } + > + {searchResults?.map((searchResult) => ( + + + + ))} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/UrnInput/useUrnInput.tsx b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/UrnInput/useUrnInput.tsx new file mode 100644 index 00000000000000..4f621f7018f12e --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/UrnInput/useUrnInput.tsx @@ -0,0 +1,108 @@ +import { Tag } from 'antd'; +import React, { useEffect, useMemo, useState } from 'react'; +import styled from 'styled-components'; +import { Entity, PropertyCardinality, StructuredPropertyEntity } from '../../../../../../../types.generated'; +import { useGetSearchResultsForMultipleLazyQuery } from '../../../../../../../graphql/search.generated'; +import { useEntityData } from '../../../../EntityContext'; +import { getInitialEntitiesForUrnPrompt } from '../utils'; +import SelectedEntity from './SelectedEntity'; +import { useEntityRegistry } from '../../../../../../useEntityRegistry'; +import usePrevious from '../../../../../../shared/usePrevious'; + +const StyleTag = styled(Tag)` + margin: 2px; + padding: 4px 6px; + display: flex; + justify-content: start; + align-items: center; + white-space: nowrap; + opacity: 1; + color: #434343; + line-height: 16px; + font-size: 12px; + max-width: 100%; +`; + +interface Props { + structuredProperty: StructuredPropertyEntity; + selectedValues: any[]; + updateSelectedValues: (values: any[]) => void; +} + +export default function useUrnInput({ structuredProperty, selectedValues, updateSelectedValues }: Props) { + const entityRegistry = useEntityRegistry(); + const { entityData } = useEntityData(); + const initialEntities = useMemo( + () => getInitialEntitiesForUrnPrompt(structuredProperty.urn, entityData, selectedValues), + [structuredProperty.urn, entityData, selectedValues], + ); + + // we store the selected entity objects here to render display name, platform, etc. + // selectedValues contains a list of urns that we store for the structured property values + const [selectedEntities, setSelectedEntities] = useState(initialEntities); + const [searchAcrossEntities, { data: searchData, loading }] = useGetSearchResultsForMultipleLazyQuery(); + const searchResults = + searchData?.searchAcrossEntities?.searchResults?.map((searchResult) => searchResult.entity) || []; + const allowedEntityTypes = structuredProperty.definition.typeQualifier?.allowedTypes?.map( + (allowedType) => allowedType.info.type, + ); + const entityTypeNames: string[] | undefined = allowedEntityTypes?.map( + (entityType) => entityRegistry.getEntityName(entityType) || '', + ); + const isMultiple = structuredProperty.definition.cardinality === PropertyCardinality.Multiple; + + const previousEntityUrn = usePrevious(entityData?.urn); + useEffect(() => { + if (entityData?.urn !== previousEntityUrn) { + setSelectedEntities(initialEntities || []); + } + }, [entityData?.urn, previousEntityUrn, initialEntities]); + + function handleSearch(query: string) { + if (query.length > 0) { + searchAcrossEntities({ variables: { input: { query, types: allowedEntityTypes } } }); + } + } + + const onSelectValue = (urn: string) => { + const newValues = isMultiple ? [...selectedValues, urn] : [urn]; + updateSelectedValues(newValues); + + const selectedEntity = searchResults?.find((result) => result.urn === urn) as Entity; + const newEntities = isMultiple ? [...selectedEntities, selectedEntity] : [selectedEntity]; + setSelectedEntities(newEntities); + }; + + const onDeselectValue = (urn: string) => { + const newValues = selectedValues.filter((value) => value !== urn); + updateSelectedValues(newValues); + + const newSelectedEntities = selectedEntities.filter((entity) => entity.urn !== urn); + setSelectedEntities(newSelectedEntities); + }; + + const tagRender = (props: any) => { + // eslint-disable-next-line react/prop-types + const { closable, onClose, value } = props; + const selectedEntity = selectedEntities.find((term) => term.urn === value); + + if (!selectedEntity) return <>; + + return ( + + + + ); + }; + + return { + tagRender, + handleSearch, + onSelectValue, + onDeselectValue, + selectedEntities, + searchResults, + loading, + entityTypeNames, + }; +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/ValueDescription.tsx b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/ValueDescription.tsx new file mode 100644 index 00000000000000..716bd74fe6630d --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/ValueDescription.tsx @@ -0,0 +1,24 @@ +import React from 'react'; +import styled from 'styled-components'; +import { ANTD_GRAY_V2 } from '../../../constants'; + +const DescriptionText = styled.span` + color: ${ANTD_GRAY_V2[8]}; +`; + +const DescriptionSeparator = styled.span` + margin: 0 8px; +`; + +interface Props { + description: string; +} + +export default function ValueDescription({ description }: Props) { + return ( + <> + - + {description} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/useStructuredPropertyPrompt.ts b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/useStructuredPropertyPrompt.ts new file mode 100644 index 00000000000000..d238a17b097992 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/useStructuredPropertyPrompt.ts @@ -0,0 +1,99 @@ +import { useEffect, useMemo, useState } from 'react'; +import { useEntityContext } from '../../../EntityContext'; +import { FormPrompt, FormPromptType, SchemaField, SubmitFormPromptInput } from '../../../../../../types.generated'; +import { getInitialValues } from './utils'; +import usePrevious from '../../../../../shared/usePrevious'; +import { useGetEntityWithSchema } from '../../../tabs/Dataset/Schema/useGetEntitySchema'; +import { FormView, useEntityFormContext } from '../../EntityFormContext'; + +interface Props { + prompt: FormPrompt; + submitResponse: (input: SubmitFormPromptInput, onSuccess: () => void) => void; + field?: SchemaField; +} + +export default function useStructuredPropertyPrompt({ prompt, submitResponse, field }: Props) { + const { refetch: refetchSchema } = useGetEntityWithSchema(); + const { refetch, entityData } = useEntityContext(); + const { selectedPromptId, formView } = useEntityFormContext(); + const [isSaveVisible, setIsSaveVisible] = useState(false); + const initialValues = useMemo( + () => (formView === FormView.BY_ENTITY ? getInitialValues(prompt, entityData, field) : []), + [formView, entityData, prompt, field], + ); + const [selectedValues, setSelectedValues] = useState(initialValues || []); + + const structuredProperty = prompt.structuredPropertyParams?.structuredProperty; + + const previousEntityUrn = usePrevious(entityData?.urn); + useEffect(() => { + if (entityData?.urn !== previousEntityUrn) { + setSelectedValues(initialValues || []); + } + }, [entityData?.urn, previousEntityUrn, initialValues]); + + const previousSelectedPromptId = usePrevious(selectedPromptId); + useEffect(() => { + if (selectedPromptId !== previousSelectedPromptId) { + setIsSaveVisible(false); + setSelectedValues(initialValues || []); + } + }, [previousSelectedPromptId, selectedPromptId, initialValues]); + + // respond to prompts + function selectSingleValue(value: string | number) { + setIsSaveVisible(true); + setSelectedValues([value as string]); + } + + function toggleSelectedValue(value: string | number) { + setIsSaveVisible(true); + if (selectedValues.includes(value)) { + setSelectedValues((prev) => prev.filter((v) => v !== value)); + } else { + setSelectedValues((prev) => [...prev, value]); + } + } + + function updateSelectedValues(values: any[]) { + setSelectedValues(values); + setIsSaveVisible(true); + } + + // submit structured property prompt + function submitStructuredPropertyResponse() { + submitResponse( + { + promptId: prompt.id, + formUrn: prompt.formUrn, + type: field ? FormPromptType.FieldsStructuredProperty : FormPromptType.StructuredProperty, + fieldPath: field?.fieldPath, + structuredPropertyParams: { + structuredPropertyUrn: structuredProperty?.urn as string, + values: selectedValues.map((value) => { + if (typeof value === 'string') { + return { stringValue: value as string }; + } + return { numberValue: value as number }; + }), + }, + }, + () => { + refetch(); + setIsSaveVisible(false); + if (field) { + refetchSchema(); + } + }, + ); + } + + return { + isSaveVisible, + selectedValues, + selectSingleValue, + toggleSelectedValue, + submitStructuredPropertyResponse, + updateSelectedValues, + }; +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/utils.ts b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/utils.ts new file mode 100644 index 00000000000000..1050c5fcde7283 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/StructuredPropertyPrompt/utils.ts @@ -0,0 +1,36 @@ +import { getStructuredPropertyValue } from '../../../utils'; +import { GenericEntityProperties } from '../../../types'; +import { + Entity, + FormPrompt, + PropertyValue, + SchemaField, + StructuredPropertiesEntry, +} from '../../../../../../types.generated'; + +export function getInitialValues(prompt: FormPrompt, entityData: GenericEntityProperties | null, field?: SchemaField) { + const structuredProperty = prompt.structuredPropertyParams?.structuredProperty; + let structuredPropertyAssignment: StructuredPropertiesEntry | undefined; + if (field) { + structuredPropertyAssignment = field?.schemaFieldEntity?.structuredProperties?.properties?.find( + (propAssignment) => propAssignment.structuredProperty.urn === structuredProperty?.urn, + ); + } else { + structuredPropertyAssignment = entityData?.structuredProperties?.properties?.find( + (propAssignment) => propAssignment.structuredProperty.urn === structuredProperty?.urn, + ); + } + return structuredPropertyAssignment?.values.map((value) => getStructuredPropertyValue(value as PropertyValue)); +} + +export function getInitialEntitiesForUrnPrompt( + structuredPropertyUrn: string, + entityData: GenericEntityProperties | null, + selectedValues: any[], +) { + const structuredPropertyEntry = entityData?.structuredProperties?.properties?.find( + (p) => p.structuredProperty.urn === structuredPropertyUrn, + ); + const entities = structuredPropertyEntry?.valueEntities?.filter((e) => selectedValues.includes(e?.urn)); + return entities ? (entities as Entity[]) : []; +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/prompts/VerificationPrompt.tsx b/datahub-web-react/src/app/entity/shared/entityForm/prompts/VerificationPrompt.tsx new file mode 100644 index 00000000000000..7578436cc993ac --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/prompts/VerificationPrompt.tsx @@ -0,0 +1,72 @@ +import React, { useEffect, useRef } from 'react'; +import styled from 'styled-components'; +import { Button, Divider, message } from 'antd'; +import { useVerifyFormMutation } from '../../../../../graphql/form.generated'; +import { useEntityContext, useMutationUrn } from '../../EntityContext'; +import { PromptWrapper } from './Prompt'; +import { useUpdateEducationStepsAllowList } from '../../../../onboarding/useUpdateEducationStepsAllowList'; +import { FORM_ASSET_COMPLETION } from '../../../../onboarding/config/FormOnboardingConfig'; + +const ContentWrapper = styled.div` + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; + font-size: 16px; + font-weight: 600; +`; + +const VerifyButton = styled(Button)` + margin-top: 16px; + width: 60%; + max-width: 600px; + font-size: 16px; + font-weight: 600; + height: auto; +`; + +interface Props { + formUrn: string; + associatedUrn?: string; +} + +export default function VerificationPrompt({ formUrn, associatedUrn }: Props) { + const urn = useMutationUrn(); + const { refetch } = useEntityContext(); + const [verifyFormMutation] = useVerifyFormMutation(); + const { addIdToAllowList } = useUpdateEducationStepsAllowList(); + + function verifyForm() { + verifyFormMutation({ variables: { input: { entityUrn: associatedUrn || urn || '', formUrn } } }) + .then(() => { + refetch(); + addIdToAllowList(FORM_ASSET_COMPLETION); + }) + .catch(() => { + message.error('Error when verifying responses on form'); + }); + } + + const verificationPrompt = useRef(null); + useEffect(() => { + (verificationPrompt?.current as any)?.scrollIntoView({ + behavior: 'smooth', + block: 'start', + inline: 'nearest', + }); + }, []); + + return ( + <> + + + + All questions for verification have been completed. Please verify your responses. + + Verify Responses + + + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/schemaFieldPrompts/DropdownHeader.tsx b/datahub-web-react/src/app/entity/shared/entityForm/schemaFieldPrompts/DropdownHeader.tsx new file mode 100644 index 00000000000000..0d09cce4a97aa8 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/schemaFieldPrompts/DropdownHeader.tsx @@ -0,0 +1,62 @@ +import Icon from '@ant-design/icons/lib/components/Icon'; +import React, { useMemo } from 'react'; +import styled from 'styled-components'; +import GreenCircleIcon from '../../../../../images/greenCircleTwoTone.svg?react'; +import { SchemaField } from '../../../../../types.generated'; +import translateFieldPath from '../../../dataset/profile/schema/utils/translateFieldPath'; +import { getNumPromptsCompletedForField } from '../../containers/profile/sidebar/FormInfo/utils'; +import { useEntityData } from '../../EntityContext'; +import { ANTD_GRAY_V2 } from '../../constants'; +import { pluralize } from '../../../../shared/textUtil'; +import { useEntityFormContext } from '../EntityFormContext'; + +const HeaderWrapper = styled.div` + display: flex; + justify-content: space-between; + font-size: 16px; + align-items: center; +`; + +const PromptsRemainingText = styled.span` + font-size: 14px; + color: ${ANTD_GRAY_V2[8]}; + font-weight: 400; +`; + +const PromptsCompletedText = styled.span` + font-size: 14px; + color: #373d44; + font-weight: 600; +`; + +interface Props { + field: SchemaField; + numPrompts: number; + isExpanded: boolean; +} + +export default function DropdownHeader({ field, numPrompts, isExpanded }: Props) { + const { entityData } = useEntityData(); + const { formUrn } = useEntityFormContext(); + const numPromptsCompletedForField = useMemo( + () => getNumPromptsCompletedForField(field.fieldPath, entityData, formUrn), + [entityData, field.fieldPath, formUrn], + ); + const numPromptsRemaining = numPrompts - numPromptsCompletedForField; + + return ( + + Field: {translateFieldPath(field.fieldPath)} + {numPromptsRemaining > 0 && ( + + {numPromptsRemaining} {pluralize(numPrompts, 'question')} remaining + + )} + {numPromptsRemaining === 0 && !isExpanded && ( + + {numPrompts} {pluralize(numPrompts, 'Question')} Completed + + )} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/schemaFieldPrompts/SchemaFieldDropdown.tsx b/datahub-web-react/src/app/entity/shared/entityForm/schemaFieldPrompts/SchemaFieldDropdown.tsx new file mode 100644 index 00000000000000..bdb6b99dc1dbf6 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/schemaFieldPrompts/SchemaFieldDropdown.tsx @@ -0,0 +1,45 @@ +import React, { useState } from 'react'; +import styled from 'styled-components'; +import { Collapse } from 'antd'; +import { FormPrompt, SchemaField } from '../../../../../types.generated'; +import Prompt from '../prompts/Prompt'; +import DropdownHeader from './DropdownHeader'; + +const StyledCollapse = styled(Collapse)` + margin-bottom: 16px; + + .ant-collapse-header { + font-size: 14px; + font-weight: bold; + padding: 12px 0; + } + &&& .ant-collapse-item { + background-color: white; + border-radius: 5px; + } + .ant-collapse-content-box { + padding: 0; + } +`; + +interface Props { + field: SchemaField; + prompts: FormPrompt[]; + associatedUrn?: string; +} + +export default function SchemaFieldDropdown({ field, prompts, associatedUrn }: Props) { + const [isExpanded, setIsExpanded] = useState(false); + return ( + setIsExpanded(!isExpanded)}> + } + key="0" + > + {prompts.map((prompt) => ( + + ))} + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/schemaFieldPrompts/SchemaFieldPrompts.tsx b/datahub-web-react/src/app/entity/shared/entityForm/schemaFieldPrompts/SchemaFieldPrompts.tsx new file mode 100644 index 00000000000000..087a42e3f8000c --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/schemaFieldPrompts/SchemaFieldPrompts.tsx @@ -0,0 +1,36 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Divider } from 'antd'; +import { FormPrompt, SchemaField } from '../../../../../types.generated'; +import { useGetEntityWithSchema } from '../../tabs/Dataset/Schema/useGetEntitySchema'; +import SchemaFieldDropdown from './SchemaFieldDropdown'; +import VirtualScrollChild from '../../../../shared/VirtualScrollChild'; + +const FieldPromptsTitle = styled.div` + margin-bottom: 16px; + font-size: 16px; + font-weight: 600; +`; + +interface Props { + prompts: FormPrompt[]; + associatedUrn?: string; +} + +export default function SchemaFieldPrompts({ prompts, associatedUrn }: Props) { + const { entityWithSchema } = useGetEntityWithSchema(); + + if (!entityWithSchema?.schemaMetadata || !entityWithSchema.schemaMetadata.fields.length) return null; + + return ( + <> + + Field-Level Requirements + {entityWithSchema?.schemaMetadata?.fields.map((field) => ( + + + + ))} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/entityForm/useShouldShowVerificationPrompt.ts b/datahub-web-react/src/app/entity/shared/entityForm/useShouldShowVerificationPrompt.ts new file mode 100644 index 00000000000000..d7a8a417a0c866 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/entityForm/useShouldShowVerificationPrompt.ts @@ -0,0 +1,38 @@ +import { FormType, FormVerificationAssociation } from '../../../../types.generated'; +import { useEntityData } from '../EntityContext'; +import useGetPromptInfo from '../containers/profile/sidebar/FormInfo/useGetPromptInfo'; +import { getFormAssociation, getFormVerification } from '../containers/profile/sidebar/FormInfo/utils'; + +interface ShowVerificationPromptProps { + formType?: FormType; + numRequiredPromptsRemaining: number; + formVerification?: FormVerificationAssociation; +} + +export function shouldShowVerificationPrompt({ + formType, + numRequiredPromptsRemaining, + formVerification, +}: ShowVerificationPromptProps) { + return formType === FormType.Verification && numRequiredPromptsRemaining === 0 && !formVerification; +} + +/* + * Returns whether or not we should show ther verification prompt for a given form. + * We want to show this prompt if (1) the form is a VERIFICATION form (2) there are no more + * require prompts remaining and either (3a) the form is not verified or (3b) it has been + * edited more recently than the verification timestamp. + */ +export default function useShouldShowVerificationPrompt(formUrn: string) { + const { numRequiredPromptsRemaining } = useGetPromptInfo(formUrn); + const { entityData } = useEntityData(); + const formVerification = getFormVerification(formUrn, entityData); + const formAssociation = getFormAssociation(formUrn, entityData); + const formType = formAssociation?.form.info.type; + + return shouldShowVerificationPrompt({ + formType, + numRequiredPromptsRemaining, + formVerification, + }); +} diff --git a/datahub-web-react/src/app/entity/shared/siblingUtils.ts b/datahub-web-react/src/app/entity/shared/siblingUtils.ts index 66481051055ec1..5e21c2a7c5ac44 100644 --- a/datahub-web-react/src/app/entity/shared/siblingUtils.ts +++ b/datahub-web-react/src/app/entity/shared/siblingUtils.ts @@ -117,6 +117,9 @@ const customMerge = (isPrimary, key) => { if (key === 'platform' || key === 'siblings') { return (secondary, primary) => (isPrimary ? primary : secondary); } + if (key === 'forms') { + return (_secondary, primary) => primary; + } if ( key === 'tags' || key === 'terms' || diff --git a/datahub-web-react/src/app/entity/shared/types.ts b/datahub-web-react/src/app/entity/shared/types.ts index 47cad4a69096de..919dfb78a52f6b 100644 --- a/datahub-web-react/src/app/entity/shared/types.ts +++ b/datahub-web-react/src/app/entity/shared/types.ts @@ -39,6 +39,7 @@ import { DataJobInputOutput, ParentDomainsResult, StructuredProperties, + Forms, } from '../../../types.generated'; import { FetchedEntity } from '../../lineage/types'; @@ -119,6 +120,7 @@ export type GenericEntityProperties = { origin?: Maybe; browsePathV2?: Maybe; inputOutput?: Maybe; + forms?: Maybe; }; export type GenericEntityUpdate = { diff --git a/datahub-web-react/src/app/home/HomePageRecommendations.tsx b/datahub-web-react/src/app/home/HomePageRecommendations.tsx index 6ce7735c4a7c85..cc9f4b265455b2 100644 --- a/datahub-web-react/src/app/home/HomePageRecommendations.tsx +++ b/datahub-web-react/src/app/home/HomePageRecommendations.tsx @@ -20,7 +20,7 @@ import { HOME_PAGE_MOST_POPULAR_ID, HOME_PAGE_PLATFORMS_ID, } from '../onboarding/config/HomePageOnboardingConfig'; -import { useUpdateEducationStepIdsAllowlist } from '../onboarding/useUpdateEducationStepIdsAllowlist'; +import { useToggleEducationStepIdsAllowList } from '../onboarding/useToggleEducationStepIdsAllowList'; const PLATFORMS_MODULE_ID = 'Platforms'; const MOST_POPULAR_MODULE_ID = 'HighUsageEntities'; @@ -147,15 +147,15 @@ export const HomePageRecommendations = ({ user }: Props) => { // Render domain onboarding step if the domains module exists const hasDomains = !!domainRecommendationModule; - useUpdateEducationStepIdsAllowlist(hasDomains, HOME_PAGE_DOMAINS_ID); + useToggleEducationStepIdsAllowList(hasDomains, HOME_PAGE_DOMAINS_ID); // Render platforms onboarding step if the platforms module exists const hasPlatforms = !!recommendationModules?.some((module) => module?.moduleId === PLATFORMS_MODULE_ID); - useUpdateEducationStepIdsAllowlist(hasPlatforms, HOME_PAGE_PLATFORMS_ID); + useToggleEducationStepIdsAllowList(hasPlatforms, HOME_PAGE_PLATFORMS_ID); // Render most popular onboarding step if the most popular module exists const hasMostPopular = !!recommendationModules?.some((module) => module?.moduleId === MOST_POPULAR_MODULE_ID); - useUpdateEducationStepIdsAllowlist(hasMostPopular, HOME_PAGE_MOST_POPULAR_ID); + useToggleEducationStepIdsAllowList(hasMostPopular, HOME_PAGE_MOST_POPULAR_ID); return ( diff --git a/datahub-web-react/src/app/identity/user/UserList.tsx b/datahub-web-react/src/app/identity/user/UserList.tsx index 178f54325ecde0..4a9e84d6e2248a 100644 --- a/datahub-web-react/src/app/identity/user/UserList.tsx +++ b/datahub-web-react/src/app/identity/user/UserList.tsx @@ -21,7 +21,7 @@ import { USERS_INVITE_LINK_ID, USERS_SSO_ID, } from '../../onboarding/config/UsersOnboardingConfig'; -import { useUpdateEducationStepIdsAllowlist } from '../../onboarding/useUpdateEducationStepIdsAllowlist'; +import { useToggleEducationStepIdsAllowList } from '../../onboarding/useToggleEducationStepIdsAllowList'; import { DEFAULT_USER_LIST_PAGE_SIZE, removeUserFromListUsersCache } from './cacheUtils'; import { useUserContext } from '../../context/useUserContext'; @@ -113,7 +113,7 @@ export const UserList = () => { const error = usersError || rolesError; const selectRoleOptions = rolesData?.listRoles?.roles?.map((role) => role as DataHubRole) || []; - useUpdateEducationStepIdsAllowlist(canManagePolicies, USERS_INVITE_LINK_ID); + useToggleEducationStepIdsAllowList(canManagePolicies, USERS_INVITE_LINK_ID); return ( <> diff --git a/datahub-web-react/src/app/onboarding/OnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/OnboardingConfig.tsx index 7cc382fe8f2798..83fa6acd0cc252 100644 --- a/datahub-web-react/src/app/onboarding/OnboardingConfig.tsx +++ b/datahub-web-react/src/app/onboarding/OnboardingConfig.tsx @@ -1,6 +1,7 @@ import { BusinessGlossaryOnboardingConfig } from './config/BusinessGlossaryOnboardingConfig'; import { DomainsOnboardingConfig } from './config/DomainsOnboardingConfig'; import { EntityProfileOnboardingConfig } from './config/EntityProfileOnboardingConfig'; +import { FormOnboardingConfig } from './config/FormOnboardingConfig'; import { GroupsOnboardingConfig } from './config/GroupsOnboardingConfig'; import { HomePageOnboardingConfig } from './config/HomePageOnboardingConfig'; import { IngestionOnboardingConfig } from './config/IngestionOnboardingConfig'; @@ -23,6 +24,7 @@ const ALL_ONBOARDING_CONFIGS: OnboardingStep[][] = [ RolesOnboardingConfig, PoliciesOnboardingConfig, LineageGraphOnboardingConfig, + FormOnboardingConfig, ]; export const OnboardingConfig: OnboardingStep[] = ALL_ONBOARDING_CONFIGS.reduce( (acc, config) => [...acc, ...config], diff --git a/datahub-web-react/src/app/onboarding/config/FormOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/FormOnboardingConfig.tsx new file mode 100644 index 00000000000000..d50a25badfabb0 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/config/FormOnboardingConfig.tsx @@ -0,0 +1,178 @@ +import { SmileOutlined } from '@ant-design/icons'; +import React from 'react'; +import { Typography } from 'antd'; +import styled from 'styled-components'; +import { OnboardingStep } from '../OnboardingStep'; +import BulkTypeComparions from '../../../images/bulk-form-type-comparison.svg'; + +const DiagramHeader = styled.div` + display: flex; + justify-content: center; + margin: 16px 0 4px 0; +`; + +const AssetCompletionHeader = styled.div` + font-size: 20px; + font-weight: normal; +`; + +const ByAssetWrapper = styled.span` + margin-left: 10px; + font-size: 14px; +`; + +const ByQuestionWrapper = styled.span` + margin-left: 80px; + font-size: 14px; +`; + +const StyledSmile = styled(SmileOutlined)` + color: ${(props) => props.theme.styles['primary-color']}; + margin-right: 4px; +`; + +export const WELCOME_TO_BULK_BY_ENTITY_ID = 'welcome-to-bulk-by-entity'; +export const FORM_QUESTION_VIEW_BUTTON = 'form-question-view-button'; +export const FORM_ASSET_COMPLETION = 'form-asset-completion'; +export const WELCOME_TO_BULK_BY_QUESTION_ID = 'welcome-to-bulk-by-question'; +export const FORM_ASSETS_ASSIGNED_ID = 'form-assets-assigned'; +export const FORM_FILTER_AND_BROWSE_ID = 'form-filter-and-browse'; +export const FORM_ANSWER_IN_BULK_ID = 'form-answer-in-bulk'; +export const FORM_BULK_VERIFY_INTRO_ID = 'form-bulk-verify-intro'; +export const FORM_CHECK_RESPONSES_ID = 'form-check-responses'; +export const FORM_BULK_VERIFY_ID = 'form-bulk-verify'; + +export const FormOnboardingConfig: OnboardingStep[] = [ + { + id: WELCOME_TO_BULK_BY_ENTITY_ID, + selector: `#${WELCOME_TO_BULK_BY_ENTITY_ID}`, + title: 'Let’s complete your documentation requests!', + style: { width: '520px', maxWidth: '520px' }, + content: ( + + Here you can easily respond to all documentation requests efficiently. We’ll track your progress and + move you seamlessly through all your requests. +
+ Let’s get started completing the needs for this form. +
+ ), + }, + { + id: FORM_QUESTION_VIEW_BUTTON, + selector: `#${FORM_QUESTION_VIEW_BUTTON}`, + title: "Switch to the 'Complete by Question' view.", + style: { width: '520px', maxWidth: '520px' }, + content: ( + + If an answer fits multiple assets, this view lets you tackle questions across different assets at once, + making documentation even faster and more efficient. + + ), + }, + { + id: FORM_ASSET_COMPLETION, + selector: `#${FORM_ASSET_COMPLETION}`, + isActionStep: true, + title: ( + + Congratulations, You’ve Completed 1 Asset! + + ), + style: { width: '640px', maxWidth: '640px' }, + content: ( + + Now that you’ve completed one asset, try switching to the ‘Complete by Question’ view. If an answer fits + multiple assets, this view lets you tackle questions across different assets at once, making + documentation even faster and more efficient. + + By Asset + By Question + + bulk form type comparions + + ), + }, + { + id: WELCOME_TO_BULK_BY_QUESTION_ID, + selector: `#${WELCOME_TO_BULK_BY_QUESTION_ID}`, + title: "Welcome to the 'Complete by Question' view!", + style: { width: '520px', maxWidth: '520px' }, + content: ( + + Here, you can easily provide the same response for multiple assets at once for a faster documenting + experience. + + ), + }, + { + id: FORM_ASSETS_ASSIGNED_ID, + selector: `#${FORM_ASSETS_ASSIGNED_ID}`, + title: 'Focus on only the assets that require your attention', + style: { width: '520px', maxWidth: '520px' }, + content: ( + + In this view, we’ve simplified your workflow by only showing assets that require documentation from you. + + ), + }, + { + id: FORM_FILTER_AND_BROWSE_ID, + selector: `#${FORM_FILTER_AND_BROWSE_ID}`, + title: 'Filter and Browse to Select the Specific Assets', + style: { width: '520px', maxWidth: '520px' }, + content: ( + + Filter by type, terms, or browse by platform, database and schemas to select only the assets that you’d + like to set the response for. + + ), + }, + { + id: FORM_ANSWER_IN_BULK_ID, + selector: `#${FORM_ANSWER_IN_BULK_ID}`, + title: 'Answer in Bulk', + style: { width: '520px', maxWidth: '520px' }, + content: ( + + After selecting your assets, set a collective response and start answering for groups of 1,000 assets at + a time. + + ), + }, + { + id: FORM_BULK_VERIFY_INTRO_ID, + selector: `#${FORM_BULK_VERIFY_INTRO_ID}`, + title: 'Streamline Verification in Bulk!', + style: { width: '520px', maxWidth: '520px' }, + content: ( + + Here you can quickly review responses for a few datasets, ensuring accuracy. When you're ready, + proceed to verify all assets at once, simplifying the entire verification process. + + ), + }, + { + id: FORM_CHECK_RESPONSES_ID, + selector: `#${FORM_CHECK_RESPONSES_ID}`, + title: 'Check Responses', + style: { width: '520px', maxWidth: '520px' }, + content: ( + + Click on "View Responses" to easily spot-check your responses before the final Verification + step. + + ), + }, + { + id: FORM_BULK_VERIFY_ID, + selector: `#${FORM_BULK_VERIFY_ID}`, + title: 'Bulk Verify Assets', + style: { width: '520px', maxWidth: '520px' }, + content: ( + + Once you're confident in your responses, verify up to 1,000 assets at a time for this form with a + click of a button. + + ), + }, +]; diff --git a/datahub-web-react/src/app/onboarding/config/HomePageOnboardingConfig.tsx b/datahub-web-react/src/app/onboarding/config/HomePageOnboardingConfig.tsx index 8b361db5ab344c..65c703db38fe9b 100644 --- a/datahub-web-react/src/app/onboarding/config/HomePageOnboardingConfig.tsx +++ b/datahub-web-react/src/app/onboarding/config/HomePageOnboardingConfig.tsx @@ -2,6 +2,7 @@ import React from 'react'; import { Image, Typography } from 'antd'; import { OnboardingStep } from '../OnboardingStep'; import { ANTD_GRAY } from '../../entity/shared/constants'; +import dataHubFlowDiagram from '../../../images/datahub-flow-diagram-light.png'; export const GLOBAL_WELCOME_TO_DATAHUB_ID = 'global-welcome-to-datahub'; export const HOME_PAGE_INGESTION_ID = 'home-page-ingestion'; @@ -20,7 +21,7 @@ export const HomePageOnboardingConfig: OnboardingStep[] = [ height={184} width={500} style={{ marginLeft: '50px' }} - src="https://datahubproject.io/assets/ideal-img/datahub-flow-diagram-light.5ce651b.1600.png" + src={dataHubFlowDiagram} /> Welcome to DataHub! 👋 diff --git a/datahub-web-react/src/app/onboarding/useToggleEducationStepIdsAllowList.tsx b/datahub-web-react/src/app/onboarding/useToggleEducationStepIdsAllowList.tsx new file mode 100644 index 00000000000000..acf85d0a87b109 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/useToggleEducationStepIdsAllowList.tsx @@ -0,0 +1,18 @@ +import { useContext, useEffect } from 'react'; +import { EducationStepsContext } from '../../providers/EducationStepsContext'; +import { useUpdateEducationStepsAllowList } from './useUpdateEducationStepsAllowList'; + +export function useToggleEducationStepIdsAllowList(condition: boolean, id: string) { + const { educationStepIdsAllowlist } = useContext(EducationStepsContext); + const { addIdToAllowList, removeIdFromAllowList } = useUpdateEducationStepsAllowList(); + + useEffect(() => { + const allowlistIncludesStepId = educationStepIdsAllowlist.has(id); + + if (condition && !allowlistIncludesStepId) { + addIdToAllowList(id); + } else if (!condition && allowlistIncludesStepId) { + removeIdFromAllowList(id); + } + }, [condition, id, addIdToAllowList, removeIdFromAllowList, educationStepIdsAllowlist]); +} diff --git a/datahub-web-react/src/app/onboarding/useUpdateEducationStepIdsAllowlist.tsx b/datahub-web-react/src/app/onboarding/useUpdateEducationStepIdsAllowlist.tsx deleted file mode 100644 index 4eb1f6c02b6b81..00000000000000 --- a/datahub-web-react/src/app/onboarding/useUpdateEducationStepIdsAllowlist.tsx +++ /dev/null @@ -1,20 +0,0 @@ -import { useContext, useEffect } from 'react'; -import { EducationStepsContext } from '../../providers/EducationStepsContext'; - -export function useUpdateEducationStepIdsAllowlist(condition: boolean, id: string) { - const { educationStepIdsAllowlist, setEducationStepIdsAllowlist } = useContext(EducationStepsContext); - - useEffect(() => { - const allowlistIncludesStepId = educationStepIdsAllowlist.has(id); - - if (condition && !allowlistIncludesStepId) { - const newStepdIdsAllowlist: Set = new Set(educationStepIdsAllowlist); - newStepdIdsAllowlist.add(id); - setEducationStepIdsAllowlist(newStepdIdsAllowlist); - } else if (!condition && allowlistIncludesStepId) { - const newStepdIdsAllowlist: Set = new Set(educationStepIdsAllowlist); - newStepdIdsAllowlist.delete(id); - setEducationStepIdsAllowlist(newStepdIdsAllowlist); - } - }, [condition, id, educationStepIdsAllowlist, setEducationStepIdsAllowlist]); -} diff --git a/datahub-web-react/src/app/onboarding/useUpdateEducationStepsAllowList.tsx b/datahub-web-react/src/app/onboarding/useUpdateEducationStepsAllowList.tsx new file mode 100644 index 00000000000000..86b9000205b8b5 --- /dev/null +++ b/datahub-web-react/src/app/onboarding/useUpdateEducationStepsAllowList.tsx @@ -0,0 +1,22 @@ +import { useContext } from 'react'; +import { EducationStepsContext } from '../../providers/EducationStepsContext'; + +// function use + +export function useUpdateEducationStepsAllowList() { + const { educationStepIdsAllowlist, setEducationStepIdsAllowlist } = useContext(EducationStepsContext); + + function removeIdFromAllowList(id: string) { + const newStepdIdsAllowlist: Set = new Set(educationStepIdsAllowlist); + newStepdIdsAllowlist.delete(id); + setEducationStepIdsAllowlist(newStepdIdsAllowlist); + } + + function addIdToAllowList(id: string) { + const newStepdIdsAllowlist: Set = new Set(educationStepIdsAllowlist); + newStepdIdsAllowlist.add(id); + setEducationStepIdsAllowlist(newStepdIdsAllowlist); + } + + return { removeIdFromAllowList, addIdToAllowList }; +} diff --git a/datahub-web-react/src/app/permissions/policy/ManagePolicies.tsx b/datahub-web-react/src/app/permissions/policy/ManagePolicies.tsx index 72c22f3bddc2cd..5765babcb575ef 100644 --- a/datahub-web-react/src/app/permissions/policy/ManagePolicies.tsx +++ b/datahub-web-react/src/app/permissions/policy/ManagePolicies.tsx @@ -370,6 +370,7 @@ export const ManagePolicies = () => { {showPolicyBuilderModal && ( ; setPolicy: (policy: Omit) => void; visible: boolean; + focusPolicyUrn: string | undefined; onClose: () => void; onSave: (savePolicy: Omit) => void; }; @@ -39,9 +40,11 @@ const NextButtonContainer = styled.div` * Component used for constructing new policies. The purpose of this flow is to populate or edit a Policy * object through a sequence of steps. */ -export default function PolicyBuilderModal({ policy, setPolicy, visible, onClose, onSave }: Props) { +export default function PolicyBuilderModal({ policy, setPolicy, visible, onClose, onSave, focusPolicyUrn }: Props) { // Step control-flow. const [activeStepIndex, setActiveStepIndex] = useState(0); + const [selectedTags, setSelectedTags] = useState([]); + const [isEditState,setEditState] = useState(true) // Go to next step const next = () => { @@ -90,12 +93,17 @@ export default function PolicyBuilderModal({ policy, setPolicy, visible, onClose title: 'Configure Privileges', content: ( { setPolicy({ ...policy, resources }); }} + setSelectedTags={setSelectedTags} + selectedTags={selectedTags} + setEditState={setEditState} + isEditState={isEditState} privileges={policy.privileges} setPrivileges={(privileges: string[]) => setPolicy({ ...policy, privileges })} /> diff --git a/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx b/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx index ac73a1f5ece7ce..7a0de67f414192 100644 --- a/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx +++ b/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx @@ -1,6 +1,6 @@ -import React, { useMemo, useState } from 'react'; +import React, { useEffect, useMemo, useRef, useState } from 'react'; import { Link } from 'react-router-dom'; -import { Form, Select, Tag, Tooltip, Typography } from 'antd'; +import { Form, Select, Tag, Tooltip, Typography, Tag as CustomTag } from 'antd'; import styled from 'styled-components/macro'; import { useEntityRegistry } from '../../useEntityRegistry'; @@ -9,13 +9,14 @@ import { useGetSearchResultsForMultipleLazyQuery, useGetSearchResultsLazyQuery, } from '../../../graphql/search.generated'; -import { ResourceFilter, PolicyType, EntityType, Domain } from '../../../types.generated'; +import { ResourceFilter, PolicyType, EntityType, Domain, Entity } from '../../../types.generated'; import { convertLegacyResourceFilter, createCriterionValue, createCriterionValueWithEntity, EMPTY_POLICY, getFieldValues, + getFieldValuesOfTags, mapResourceTypeToDisplayName, mapResourceTypeToEntityType, mapResourceTypeToPrivileges, @@ -24,20 +25,28 @@ import { import DomainNavigator from '../../domain/nestedDomains/domainNavigator/DomainNavigator'; import { BrowserWrapper } from '../../shared/tags/AddTagsTermsModal'; import ClickOutside from '../../shared/ClickOutside'; +import { TagTermLabel } from '../../shared/tags/TagTermLabel'; +import { ENTER_KEY_CODE } from '../../shared/constants'; +import { useGetRecommendations } from '../../shared/recommendation'; type Props = { policyType: PolicyType; resources?: ResourceFilter; setResources: (resources: ResourceFilter) => void; + selectedTags?: any[]; + setSelectedTags: (data: any) => void; + setEditState: (data: boolean) => void; + isEditState: boolean; privileges: Array; setPrivileges: (newPrivs: Array) => void; + focusPolicyUrn: string | undefined; }; const SearchResultContainer = styled.div` display: flex; justify-content: space-between; align-items: center; - padding: 12px; + padding: 4px; `; const PrivilegesForm = styled(Form)` @@ -46,6 +55,21 @@ const PrivilegesForm = styled(Form)` margin-bottom: 40px; `; +const TagSelect = styled(Select)` + width: 480px; +`; + +const StyleTag = styled(CustomTag)` + margin: 2px; + display: flex; + justify-content: start; + align-items: center; + white-space: nowrap; + opacity: 1; + color: #434343; + line-height: 16px; +`; + /** * Component used to construct the "privileges" and "resources" portion of a DataHub * access Policy. @@ -56,10 +80,21 @@ export default function PolicyPrivilegeForm({ setResources, privileges, setPrivileges, + setSelectedTags, + selectedTags, + setEditState, + isEditState, + focusPolicyUrn, }: Props) { const entityRegistry = useEntityRegistry(); const [domainInputValue, setDomainInputValue] = useState(''); const [isFocusedOnInput, setIsFocusedOnInput] = useState(false); + const [inputValue, setInputValue] = useState(''); + const [tagTermSearch, { data: tagTermSearchData }] = useGetSearchResultsLazyQuery(); + const [recommendedData] = useGetRecommendations([EntityType.Tag]); + const tagSearchResults = tagTermSearchData?.search?.searchResults?.map((searchResult) => searchResult.entity) || []; + + const inputEl = useRef(null); // Configuration used for displaying options const { @@ -295,6 +330,131 @@ export default function PolicyPrivilegeForm({ setDomainInputValue(''); } + function handleBlurTag() { + setInputValue(''); + } + + const renderSearchResultTags = (entity: Entity) => { + const displayName = + entity.type === EntityType.Tag ? (entity as any).name : entityRegistry.getDisplayName(entity.type, entity); + const tagOrTermComponent = ; + return ( + + {tagOrTermComponent} + + ); + }; + const tags = getFieldValues(resources.filter, 'TAG') || []; + const newTag = getFieldValues(resources.filter, 'TAG').map((criterionValue) => { + if (criterionValue?.value) { + return criterionValue?.value; + } + return criterionValue; + }); + + const editTags = getFieldValuesOfTags(resources.filter, 'TAG').map((criterionValue) => { + if (criterionValue?.value) { + return criterionValue?.entity; + } + return criterionValue; + }); + const tagResult = !inputValue || inputValue.length === 0 ? recommendedData : tagSearchResults; + useEffect(() => { + if (focusPolicyUrn && isEditState && setEditState && editTags && newTag) { + setEditState(false); + const filter = resources.filter || { + criteria: [], + }; + setSelectedTags(editTags); + setResources({ + ...resources, + filter: setFieldValues(filter, 'TAG', [...(newTag as any)]), + }); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [focusPolicyUrn, isEditState, setEditState, editTags, newTag]); + + const onSelectValue = (urn: string) => { + const filter = resources.filter || { + criteria: [], + }; + const selectedTagOption = tagResult?.find((tag) => tag.urn === urn); + + setResources({ + ...resources, + filter: setFieldValues(filter, 'TAG', [...tags, urn as any]), + }); + setSelectedTags([...(selectedTags as any), selectedTagOption]); + if (inputEl && inputEl.current) { + (inputEl.current as any).blur(); + } + }; + + // When a Tag search result is deselected, remove the Tags + const onDeselectValue = (urn: string) => { + const filter = resources.filter || { + criteria: [], + }; + setInputValue(''); + setSelectedTags(selectedTags?.filter((term) => term.urn !== urn)); + + setResources({ + ...resources, + filter: setFieldValues( + filter, + 'TAG', + tags?.filter((criterionValue) => (criterionValue as any) !== urn), + ), + }); + }; + + const type = EntityType.Tag; + const handleSearch = (text: string) => { + if (text.length > 0) { + tagTermSearch({ + variables: { + input: { + type, + query: text, + start: 0, + count: 10, + }, + }, + }); + } + }; + + const tagSearchOptions = tagResult?.map((result) => { + return renderSearchResultTags(result); + }); + + function clearInput() { + setInputValue(''); + setTimeout(() => setIsFocusedOnInput(true), 0); // call after click outside + } + + const tagRender = (props) => { + // eslint-disable-next-line react/prop-types + const { closable, onClose, value } = props; + const onPreventMouseDown = (event) => { + event.preventDefault(); + event.stopPropagation(); + }; + + const selectedItem = selectedTags?.find((term) => term?.urn === value); + return ( + + {selectedItem?.name} + + ); + }; + + function handleKeyDown(event) { + if (event.keyCode === ENTER_KEY_CODE) { + (inputEl.current as any).blur(); + } + } + return ( {showResourceFilterInput && ( @@ -362,6 +522,38 @@ export default function PolicyPrivilegeForm({ )} + {showResourceFilterInput && ( + Select Tags}> + + The policy will apply to all entities containing all of the chosen tags. If no tags are + selected, the policy will not account for tags. + + onSelectValue(asset)} + onDeselect={(asset: any) => onDeselectValue(asset)} + onSearch={(value: string) => { + // eslint-disable-next-line react/prop-types + handleSearch(value.trim()); + // eslint-disable-next-line react/prop-types + setInputValue(value.trim()); + }} + tagRender={tagRender} + value={tags} + onClear={clearInput} + onBlur={handleBlurTag} + onInputKeyDown={handleKeyDown} + > + {tagSearchOptions} + + + )} {showResourceFilterInput && ( Select Domains}> diff --git a/datahub-web-react/src/app/permissions/policy/policyUtils.ts b/datahub-web-react/src/app/permissions/policy/policyUtils.ts index 27aa8fcd351e9b..c7ec171bc2c290 100644 --- a/datahub-web-react/src/app/permissions/policy/policyUtils.ts +++ b/datahub-web-react/src/app/permissions/policy/policyUtils.ts @@ -118,6 +118,10 @@ export const getFieldValues = (filter: Maybe | undefined, res return filter?.criteria?.find((criterion) => criterion.field === resourceFieldType)?.values || []; }; +export const getFieldValuesOfTags = (filter: Maybe | undefined, resourceFieldType: string) => { + return filter?.criteria?.find((criterion) => criterion.field === resourceFieldType)?.values || []; +}; + export const setFieldValues = ( filter: PolicyMatchFilter, resourceFieldType: string, diff --git a/datahub-web-react/src/app/permissions/policy/usePolicy.ts b/datahub-web-react/src/app/permissions/policy/usePolicy.ts index 6f359805e42db1..d04ea25d20b239 100644 --- a/datahub-web-react/src/app/permissions/policy/usePolicy.ts +++ b/datahub-web-react/src/app/permissions/policy/usePolicy.ts @@ -44,19 +44,22 @@ export function usePolicy( const [deletePolicy, { error: deletePolicyError }] = useDeletePolicyMutation(); - const toFilterInput = (filter: PolicyMatchFilter): PolicyMatchFilterInput => { + const toFilterInput = (filter: PolicyMatchFilter,state?:string | undefined): PolicyMatchFilterInput => { + console.log({state}) return { criteria: filter.criteria?.map((criterion): PolicyMatchCriterionInput => { return { field: criterion.field, - values: criterion.values.map((criterionValue) => criterionValue.value), + values: criterion.values.map((criterionValue) => + criterion.field === 'TAG' && state !=='TOGGLE' ? (criterionValue as any) : criterionValue.value, + ), condition: criterion.condition, }; }), }; }; - const toPolicyInput = (policy: Omit): PolicyUpdateInput => { + const toPolicyInput = (policy: Omit,state?:string | undefined): PolicyUpdateInput => { let policyInput: PolicyUpdateInput = { type: policy.type, name: policy.name, @@ -79,7 +82,7 @@ export function usePolicy( allResources: policy.resources.allResources, }; if (policy.resources.filter) { - resourceFilter = { ...resourceFilter, filter: toFilterInput(policy.resources.filter) }; + resourceFilter = { ...resourceFilter, filter: toFilterInput(policy.resources.filter,state) }; } // Add the resource filters. policyInput = { @@ -151,7 +154,7 @@ export function usePolicy( updatePolicy({ variables: { urn: policy?.urn as string, // There must be a focus policy urn. - input: toPolicyInput(newPolicy), + input: toPolicyInput(newPolicy,'TOGGLE'), }, }).then(()=>{ const updatePolicies= { @@ -178,6 +181,7 @@ export function usePolicy( __typename: 'ListPoliciesResult', urn: focusPolicyUrn, ...savePolicy, + resources: null, }; analytics.event({ type: EventType.UpdatePolicyEvent, diff --git a/datahub-web-react/src/app/search/SearchHeader.tsx b/datahub-web-react/src/app/search/SearchHeader.tsx index 76e78a11d3e9d9..0b6bf5488a3013 100644 --- a/datahub-web-react/src/app/search/SearchHeader.tsx +++ b/datahub-web-react/src/app/search/SearchHeader.tsx @@ -1,7 +1,6 @@ import React, { useState } from 'react'; -import { Image, Layout } from 'antd'; -import { Link } from 'react-router-dom'; -import styled, { useTheme } from 'styled-components'; +import { Layout } from 'antd'; +import styled from 'styled-components'; import { SearchBar } from './SearchBar'; import { ManageAccount } from '../shared/ManageAccount'; @@ -10,8 +9,8 @@ import EntityRegistry from '../entity/EntityRegistry'; import { ANTD_GRAY } from '../entity/shared/constants'; import { HeaderLinks } from '../shared/admin/HeaderLinks'; import { useAppConfig, useIsShowAcrylInfoEnabled } from '../useAppConfig'; -import { DEFAULT_APP_CONFIG } from '../../appConfigContext'; import DemoButton from '../entity/shared/components/styled/DemoButton'; +import AppLogoLink from '../shared/AppLogoLink'; const { Header } = Layout; @@ -29,13 +28,6 @@ const styles = { }, }; -const LogoImage = styled(Image)` - display: inline-block; - height: 32px; - width: auto; - margin-top: 2px; -`; - const LogoSearchContainer = styled.div` display: flex; flex: 1; @@ -77,7 +69,6 @@ export const SearchHeader = ({ entityRegistry, }: Props) => { const [isSearchBarFocused, setIsSearchBarFocused] = useState(false); - const themeConfig = useTheme(); const showAcrylInfo = useIsShowAcrylInfoEnabled(); const appConfig = useAppConfig(); const viewsEnabled = appConfig.config?.viewsConfig?.enabled || false; @@ -85,16 +76,7 @@ export const SearchHeader = ({ return (
- - - + { }, [isSelectMode]); // Render new search filters v2 onboarding step if the feature flag is on - useUpdateEducationStepIdsAllowlist(showSearchFiltersV2, SEARCH_RESULTS_FILTERS_V2_INTRO); + useToggleEducationStepIdsAllowList(showSearchFiltersV2, SEARCH_RESULTS_FILTERS_V2_INTRO); // Render new browse v2 onboarding step if the feature flag is on - useUpdateEducationStepIdsAllowlist(showBrowseV2, SEARCH_RESULTS_BROWSE_SIDEBAR_ID); + useToggleEducationStepIdsAllowList(showBrowseV2, SEARCH_RESULTS_BROWSE_SIDEBAR_ID); return ( <> diff --git a/datahub-web-react/src/app/shared/AppLogoLink.tsx b/datahub-web-react/src/app/shared/AppLogoLink.tsx new file mode 100644 index 00000000000000..7d647194b07c4d --- /dev/null +++ b/datahub-web-react/src/app/shared/AppLogoLink.tsx @@ -0,0 +1,31 @@ +import { Image } from 'antd'; +import React from 'react'; +import { Link } from 'react-router-dom'; +import styled, { useTheme } from 'styled-components'; +import { useAppConfig } from '../useAppConfig'; +import { DEFAULT_APP_CONFIG } from '../../appConfigContext'; + +const LogoImage = styled(Image)` + display: inline-block; + height: 32px; + width: auto; + margin-top: 2px; +`; + +export default function AppLogoLink() { + const appConfig = useAppConfig(); + const themeConfig = useTheme(); + + return ( + + + + ); +} diff --git a/datahub-web-react/src/app/shared/DeferredRenderComponent.tsx b/datahub-web-react/src/app/shared/DeferredRenderComponent.tsx new file mode 100644 index 00000000000000..7b5bdb949fc3db --- /dev/null +++ b/datahub-web-react/src/app/shared/DeferredRenderComponent.tsx @@ -0,0 +1,23 @@ +import React, { useEffect, useState } from 'react'; + +interface Props { + wrappedComponent: React.ReactNode; + loadingComponent?: React.ReactNode; + delay?: number; +} + +export function DeferredRenderComponent({ wrappedComponent, loadingComponent, delay = 250 }: Props) { + const [shouldRender, setShouldRender] = useState(false); + + useEffect(() => { + setTimeout(() => { + setShouldRender(true); + }, delay); + }, [delay]); + + if (shouldRender) { + return <>{wrappedComponent}; + } + + return loadingComponent ? <>{loadingComponent} : null; +} diff --git a/datahub-web-react/src/app/shared/Loading.tsx b/datahub-web-react/src/app/shared/Loading.tsx new file mode 100644 index 00000000000000..d03fb0a585dc1d --- /dev/null +++ b/datahub-web-react/src/app/shared/Loading.tsx @@ -0,0 +1,27 @@ +import { LoadingOutlined } from '@ant-design/icons'; +import React from 'react'; +import styled from 'styled-components'; + +const LoadingWrapper = styled.div` + display: flex; + justify-content: center; + margin-top: 25%; + width: 100%; +`; + +const StyledLoading = styled(LoadingOutlined)<{ $height: number }>` + font-size: ${(props) => props.$height}px; + height: ${(props) => props.$height}px; +`; + +interface Props { + height?: number; +} + +export default function Loading({ height = 32 }: Props) { + return ( + + + + ); +} diff --git a/datahub-web-react/src/app/shared/VirtualScrollChild.tsx b/datahub-web-react/src/app/shared/VirtualScrollChild.tsx new file mode 100644 index 00000000000000..4cd539e0943062 --- /dev/null +++ b/datahub-web-react/src/app/shared/VirtualScrollChild.tsx @@ -0,0 +1,24 @@ +import React from 'react'; +import styled from 'styled-components'; +import { useInView } from 'react-intersection-observer'; + +const VirtualChildWrapper = styled.div<{ $inView: boolean; $height: number }>` + height: ${(props) => (props.$inView ? 'auto' : `${props.$height}px`)}; + ${(props) => !props.$inView && 'visiblity: hidden;'} +`; + +interface VirtualProps { + height: number; + children: React.ReactNode; + triggerOnce?: boolean; +} + +export default function VirtualScrollChild({ height, children, triggerOnce }: VirtualProps) { + const [ref, inView] = useInView({ triggerOnce }); + + return ( + + {inView ? children : null} + + ); +} diff --git a/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx b/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx index 4a7a4938ea9709..7d53afda2aa3a6 100644 --- a/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx +++ b/datahub-web-react/src/app/shared/admin/HeaderLinks.tsx @@ -13,7 +13,7 @@ import { Button, Dropdown, Menu, Tooltip } from 'antd'; import { useAppConfig } from '../../useAppConfig'; import { ANTD_GRAY } from '../../entity/shared/constants'; import { HOME_PAGE_INGESTION_ID } from '../../onboarding/config/HomePageOnboardingConfig'; -import { useUpdateEducationStepIdsAllowlist } from '../../onboarding/useUpdateEducationStepIdsAllowlist'; +import { useToggleEducationStepIdsAllowList } from '../../onboarding/useToggleEducationStepIdsAllowList'; import { useUserContext } from '../../context/useUserContext'; import DomainIcon from '../../domain/DomainIcon'; @@ -74,7 +74,7 @@ export function HeaderLinks(props: Props) { const showIngestion = isIngestionEnabled && me && me.platformPrivileges?.manageIngestion && me.platformPrivileges?.manageSecrets; - useUpdateEducationStepIdsAllowlist(!!showIngestion, HOME_PAGE_INGESTION_ID); + useToggleEducationStepIdsAllowList(!!showIngestion, HOME_PAGE_INGESTION_ID); return ( diff --git a/datahub-web-react/src/app/shared/components.tsx b/datahub-web-react/src/app/shared/components.tsx index 68d2fb52cfdba0..3977a9c36b4026 100644 --- a/datahub-web-react/src/app/shared/components.tsx +++ b/datahub-web-react/src/app/shared/components.tsx @@ -47,3 +47,9 @@ export const BodyGridExpander = styled.div<{ isOpen: boolean }>` export const BodyContainer = styled.div` min-height: 0; `; + +export const WhiteButton = styled(Button)` + background-color: white; + color: ${(props) => props.theme.styles['primary-color']}; + text-shadow: none; +`; diff --git a/datahub-web-react/src/app/shared/useHasComponentRendered.ts b/datahub-web-react/src/app/shared/useHasComponentRendered.ts new file mode 100644 index 00000000000000..ff9b6987ecf11d --- /dev/null +++ b/datahub-web-react/src/app/shared/useHasComponentRendered.ts @@ -0,0 +1,18 @@ +import { useEffect, useState } from 'react'; + +/* + * Returns whether a desired component is rendered or not. + * By setting a time out we place the state update at the + * end of the queue after this component has rendered. + */ +export default function useHasComponentRendered() { + const [hasRendered, setHasRendered] = useState(false); + + useEffect(() => { + setTimeout(() => { + setHasRendered(true); + }, 0); + }, []); + + return { hasRendered }; +} diff --git a/datahub-web-react/src/graphql/dataset.graphql b/datahub-web-react/src/graphql/dataset.graphql index 57c74e0c65d69a..e25d4fe6c86355 100644 --- a/datahub-web-react/src/graphql/dataset.graphql +++ b/datahub-web-react/src/graphql/dataset.graphql @@ -168,6 +168,27 @@ fragment nonSiblingDatasetFields on Dataset { canEditEmbed canEditQueries } + forms { + completedForms { + ...formAssociationFields + } + incompleteForms { + ...formAssociationFields + } + verifications { + form { + urn + } + lastModified { + time + actor { + urn + type + ...entityDisplayNameFields + } + } + } + } } query getRecentQueries($urn: String!) { diff --git a/datahub-web-react/src/graphql/form.graphql b/datahub-web-react/src/graphql/form.graphql new file mode 100644 index 00000000000000..3cd09697f79bd9 --- /dev/null +++ b/datahub-web-react/src/graphql/form.graphql @@ -0,0 +1,7 @@ +mutation submitFormPrompt($urn: String!, $input: SubmitFormPromptInput!) { + submitFormPrompt(urn: $urn, input: $input) +} + +mutation verifyForm($input: VerifyFormInput!) { + verifyForm(input: $input) +} diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index e901c9af554ad1..dc534b315aadfa 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -1249,3 +1249,67 @@ fragment autoRenderAspectFields on RawAspect { key } } + +fragment formAssociationFields on FormAssociation { + associatedUrn + incompletePrompts { + ...formPromptAssociationFields + } + completedPrompts { + ...formPromptAssociationFields + } + form { + urn + type + info { + name + description + type + prompts { + id + formUrn + title + description + type + required + structuredPropertyParams { + structuredProperty { + ...structuredPropertyFields + } + } + } + actors { + owners + isAssignedToMe + } + } + ownership { + ...ownershipFields + } + } +} + +fragment formPromptAssociationFields on FormPromptAssociation { + id + lastModified { + time + actor { + urn + type + ...entityDisplayNameFields + } + } + fieldAssociations { + completedFieldPrompts { + fieldPath + lastModified { + time + actor { + urn + type + ...entityDisplayNameFields + } + } + } + } +} diff --git a/datahub-web-react/src/images/background_dots.svg b/datahub-web-react/src/images/background_dots.svg new file mode 100644 index 00000000000000..f4b3b917808b4d --- /dev/null +++ b/datahub-web-react/src/images/background_dots.svg @@ -0,0 +1,330 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/datahub-web-react/src/images/bulk-form-type-comparison.svg b/datahub-web-react/src/images/bulk-form-type-comparison.svg new file mode 100644 index 00000000000000..ac94f42394e361 --- /dev/null +++ b/datahub-web-react/src/images/bulk-form-type-comparison.svg @@ -0,0 +1,68 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/datahub-web-react/src/images/datahub-flow-diagram-light.png b/datahub-web-react/src/images/datahub-flow-diagram-light.png new file mode 100644 index 00000000000000..cf110f7fc48761 Binary files /dev/null and b/datahub-web-react/src/images/datahub-flow-diagram-light.png differ diff --git a/datahub-web-react/src/images/greenCircleTwoTone.svg b/datahub-web-react/src/images/greenCircleTwoTone.svg new file mode 100644 index 00000000000000..ca9fd1ae742412 --- /dev/null +++ b/datahub-web-react/src/images/greenCircleTwoTone.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/datahub-web-react/src/images/verificationBlue.svg b/datahub-web-react/src/images/verificationBlue.svg new file mode 100644 index 00000000000000..dafdd60fb156d8 --- /dev/null +++ b/datahub-web-react/src/images/verificationBlue.svg @@ -0,0 +1,4 @@ + + + + diff --git a/datahub-web-react/src/images/verificationGreen.svg b/datahub-web-react/src/images/verificationGreen.svg new file mode 100644 index 00000000000000..d082db5dfc456b --- /dev/null +++ b/datahub-web-react/src/images/verificationGreen.svg @@ -0,0 +1,4 @@ + + + + diff --git a/datahub-web-react/src/images/verificationPurple.svg b/datahub-web-react/src/images/verificationPurple.svg new file mode 100644 index 00000000000000..a9549195779f06 --- /dev/null +++ b/datahub-web-react/src/images/verificationPurple.svg @@ -0,0 +1,4 @@ + + + + diff --git a/datahub-web-react/src/images/verificationPurpleWhite.svg b/datahub-web-react/src/images/verificationPurpleWhite.svg new file mode 100644 index 00000000000000..c57d8b3105ebed --- /dev/null +++ b/datahub-web-react/src/images/verificationPurpleWhite.svg @@ -0,0 +1,4 @@ + + + + diff --git a/datahub-web-react/src/images/verificationWarningGray.svg b/datahub-web-react/src/images/verificationWarningGray.svg new file mode 100644 index 00000000000000..725f448894532d --- /dev/null +++ b/datahub-web-react/src/images/verificationWarningGray.svg @@ -0,0 +1,4 @@ + + + + diff --git a/datahub-web-react/src/images/verticalogo copy.png b/datahub-web-react/src/images/verticalogo copy.png new file mode 100644 index 00000000000000..5da38f4e67c7d4 Binary files /dev/null and b/datahub-web-react/src/images/verticalogo copy.png differ diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock index 37801e42b3eab9..c57454107ee0e7 100644 --- a/datahub-web-react/yarn.lock +++ b/datahub-web-react/yarn.lock @@ -9165,6 +9165,11 @@ react-icons@4.3.1: resolved "https://registry.yarnpkg.com/react-icons/-/react-icons-4.3.1.tgz#2fa92aebbbc71f43d2db2ed1aed07361124e91ca" integrity sha512-cB10MXLTs3gVuXimblAdI71jrJx8njrJZmNMEMC+sQu5B/BIOmlsAjskdqpn81y8UBVEGuHODd7/ci5DvoSzTQ== +react-intersection-observer@^9.5.3: + version "9.5.3" + resolved "https://registry.yarnpkg.com/react-intersection-observer/-/react-intersection-observer-9.5.3.tgz#f47a31ed3a0359cbbfdb91a53d7470ac2ab7b3c7" + integrity sha512-NJzagSdUPS5rPhaLsHXYeJbsvdpbJwL6yCHtMk91hc0ufQ2BnXis+0QQ9NBh6n9n+Q3OyjR6OQLShYbaNBkThQ== + react-is@^16.12.0, react-is@^16.13.1, react-is@^16.6.0, react-is@^16.7.0, react-is@^16.8.1: version "16.13.1" resolved "https://registry.yarnpkg.com/react-is/-/react-is-16.13.1.tgz#789729a4dc36de2999dc156dd6c1d9c18cea56a4" diff --git a/docker/datahub-ingestion-base/smoke.Dockerfile b/docker/datahub-ingestion-base/smoke.Dockerfile index 5c6738720e05ec..34654faaad729d 100644 --- a/docker/datahub-ingestion-base/smoke.Dockerfile +++ b/docker/datahub-ingestion-base/smoke.Dockerfile @@ -1,6 +1,6 @@ FROM acryldata/datahub-ingestion-base as base -RUN apt-get update && apt-get install -y \ +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ sudo \ python3-dev \ libgtk2.0-0 \ @@ -13,14 +13,16 @@ RUN apt-get update && apt-get install -y \ libasound2 \ libxtst6 \ xauth \ - xvfb - -RUN DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-17-jdk + xvfb \ + openjdk-17-jdk && \ + rm -rf /var/lib/apt/lists/* /var/cache/apk/* COPY . /datahub-src ARG RELEASE_VERSION -RUN cd /datahub-src/metadata-ingestion && \ - sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/__init__.py && \ - cat src/datahub/__init__.py && \ - cd ../ && \ - ./gradlew :metadata-ingestion:installAll +RUN cd /datahub-src && \ + sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" metadata-ingestion/src/datahub/__init__.py && \ + sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py && \ + cat metadata-ingestion/src/datahub/__init__.py && \ + ./gradlew :metadata-ingestion:codegen && \ + pip install file:metadata-ingestion-modules/airflow-plugin#egg=acryl-datahub-airflow-plugin file:metadata-ingestion#egg=acryl-datahub + diff --git a/docker/profiles/docker-compose.prerequisites.yml b/docker/profiles/docker-compose.prerequisites.yml index b587f811128fe5..47df17f3c5e46d 100644 --- a/docker/profiles/docker-compose.prerequisites.yml +++ b/docker/profiles/docker-compose.prerequisites.yml @@ -117,10 +117,10 @@ services: restart: on-failure healthcheck: test: mysqladmin ping -h mysql -u $$MYSQL_USER --password=$$MYSQL_PASSWORD - start_period: 10s - interval: 1s - retries: 3 - timeout: 5s + start_period: 20s + interval: 2s + timeout: 10s + retries: 5 volumes: - ./mysql/init.sql:/docker-entrypoint-initdb.d/init.sql - mysqldata:/var/lib/mysql diff --git a/docs-website/src/pages/champions/index.js b/docs-website/src/pages/champions/index.js index ca161354ecbdcd..7109edde6a8805 100644 --- a/docs-website/src/pages/champions/index.js +++ b/docs-website/src/pages/champions/index.js @@ -166,7 +166,7 @@ const championSections = [ bio: ( <>

- Reliably provided dirxection to Community Members across all support channels in Slack. + Reliably provides direction to community members and submitted 5 pull request, including improvements to Athena ingestion (support for nested schemas) and the REST emitter.

), diff --git a/docs/cli.md b/docs/cli.md index 927270b42259d4..3f67f1de6204d0 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -180,7 +180,7 @@ failure_log: ### init The init command is used to tell `datahub` about where your DataHub instance is located. The CLI will point to localhost DataHub by default. -Running `datahub init` will allow you to customize the datahub instance you are communicating with. +Running `datahub init` will allow you to customize the datahub instance you are communicating with. It has an optional `--use-password` option which allows to initialise the config using username, password. We foresee this mainly being used by admins as majority of organisations will be using SSO and there won't be any passwords to use. **_Note_**: Provide your GMS instance's host when the prompt asks you for the DataHub host. diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java index 806fd47c721ec8..3d803d238b4f92 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java @@ -3,6 +3,7 @@ import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; +import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -17,14 +18,14 @@ * SystemMetadata} and record/message created time */ public interface AspectsBatch { - List getItems(); + Collection getItems(); /** * Returns MCP items. Can be patch, upsert, etc. * * @return batch items */ - default List getMCPItems() { + default Collection getMCPItems() { return getItems().stream() .filter(item -> item instanceof MCPBatchItem) .map(item -> (MCPBatchItem) item) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java index a4c0624150532c..60033cd6919d60 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java @@ -2,11 +2,13 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.mxe.SystemMetadata; import javax.annotation.Nonnull; +import javax.annotation.Nullable; public interface BatchItem { /** @@ -63,4 +65,12 @@ default String getAspectName() { */ @Nonnull AspectSpec getAspectSpec(); + + /** + * The aspect's record template. Null when patch + * + * @return record template if it exists + */ + @Nullable + RecordTemplate getRecordTemplate(); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLBatchItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLBatchItem.java index 30e882705da453..17a910b125a34f 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLBatchItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLBatchItem.java @@ -26,7 +26,7 @@ default String getAspectName() { if (getMetadataChangeLog().getAspectName() != null) { return getMetadataChangeLog().getAspectName(); } else { - return getAspect().schema().getName(); + return getRecordTemplate().schema().getName(); } } @@ -40,10 +40,7 @@ default SystemMetadata getPreviousSystemMetadata() { } @Nullable - RecordTemplate getPreviousAspect(); - - @Nonnull - RecordTemplate getAspect(); + RecordTemplate getPreviousRecordTemplate(); @Override @Nonnull diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/UpsertItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/UpsertItem.java index c337e4f848e5c7..c64105637dfcc6 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/UpsertItem.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/UpsertItem.java @@ -11,8 +11,6 @@ * related data stored along with the aspect */ public abstract class UpsertItem extends MCPBatchItem { - public abstract RecordTemplate getAspect(); - public abstract SystemAspect toLatestEntityAspect(); public abstract void validatePreCommit( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/GlobalTagsPatchBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/GlobalTagsPatchBuilder.java index ff34b187f6151e..bfd10da37bb3f0 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/GlobalTagsPatchBuilder.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/builder/GlobalTagsPatchBuilder.java @@ -4,7 +4,7 @@ import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.linkedin.common.TagUrn; +import com.linkedin.common.urn.TagUrn; import com.linkedin.metadata.aspect.patch.PatchOperationType; import javax.annotation.Nonnull; import javax.annotation.Nullable; diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index f8d51997330a9d..bbbab73fd1cf54 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -186,8 +186,6 @@ "cryptography", "msal", "acryl-datahub-classify==0.0.9", - # spacy version restricted to reduce backtracking, used by acryl-datahub-classify, - "spacy==3.4.3", } trino = { @@ -309,10 +307,8 @@ # https://github.com/elastic/elasticsearch-py/issues/1639#issuecomment-883587433 "elasticsearch": {"elasticsearch==7.13.4"}, "feast": { - "feast~=0.35.0", + "feast>=0.34.0,<1", "flask-openid>=1.3.0", - # typeguard 3.x, released on 2023-03-14, seems to cause issues with Feast. - "typeguard<3", }, "glue": aws_common, # hdbcli is supported officially by SAP, sqlalchemy-hana is built on top but not officially supported @@ -468,7 +464,7 @@ pytest_dep, "pytest-asyncio>=0.16.0", "pytest-cov>=2.8.1", - "pytest-docker>=1.0.1", + "pytest-docker>=1.1.0", deepdiff_dep, "requests-mock", "freezegun", diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py index 28e4a03b8f75f7..61bda90447c624 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py +++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py @@ -69,23 +69,9 @@ class Ownership(ConfigModel): type: str @pydantic.validator("type") - def ownership_type_must_be_mappable(cls, v: str) -> str: - _ownership_types = [ - OwnershipTypeClass.BUSINESS_OWNER, - OwnershipTypeClass.CONSUMER, - OwnershipTypeClass.DATA_STEWARD, - OwnershipTypeClass.DATAOWNER, - OwnershipTypeClass.DELEGATE, - OwnershipTypeClass.DEVELOPER, - OwnershipTypeClass.NONE, - OwnershipTypeClass.PRODUCER, - OwnershipTypeClass.STAKEHOLDER, - OwnershipTypeClass.TECHNICAL_OWNER, - ] - if v.upper() not in _ownership_types: - raise ValueError(f"Ownership type {v} not in {_ownership_types}") - - return v.upper() + def ownership_type_must_be_mappable_or_custom(cls, v: str) -> str: + _, _ = builder.validate_ownership_type(v) + return v class DataProduct(ConfigModel): @@ -155,9 +141,13 @@ def _mint_owner(self, owner: Union[str, Ownership]) -> OwnerClass: ) else: assert isinstance(owner, Ownership) + ownership_type, ownership_type_urn = builder.validate_ownership_type( + owner.type + ) return OwnerClass( owner=builder.make_user_urn(owner.id), - type=owner.type, + type=ownership_type, + typeUrn=ownership_type_urn, ) def _generate_properties_mcp( @@ -314,6 +304,8 @@ def from_datahub(cls, graph: DataHubGraph, id: str) -> "DataProduct": for o in owners.owners: if o.type == OwnershipTypeClass.TECHNICAL_OWNER: yaml_owners.append(o.owner) + elif o.type == OwnershipTypeClass.CUSTOM: + yaml_owners.append(Ownership(id=o.owner, type=str(o.typeUrn))) else: yaml_owners.append(Ownership(id=o.owner, type=str(o.type))) glossary_terms: Optional[GlossaryTermsClass] = graph.get_aspect( @@ -355,7 +347,7 @@ def _patch_ownership( if isinstance(new_owner, Ownership): new_owner_type_map[new_owner.id] = new_owner.type else: - new_owner_type_map[new_owner] = "TECHNICAL_OWNER" + new_owner_type_map[new_owner] = OwnershipTypeClass.TECHNICAL_OWNER owners_matched = set() patches_add: list = [] patches_drop: dict = {} @@ -385,7 +377,7 @@ def _patch_ownership( owners_matched.add(owner_urn) if new_owner_type_map[owner_urn] != o.type: patches_replace[i] = { - "id": o, + "id": o.id, "type": new_owner_type_map[owner_urn], } else: diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py index 8ac9a101121be6..1bb3b01e078dd9 100644 --- a/metadata-ingestion/src/datahub/cli/cli_utils.py +++ b/metadata-ingestion/src/datahub/cli/cli_utils.py @@ -9,12 +9,11 @@ import click import requests -import yaml from deprecated import deprecated -from pydantic import BaseModel, ValidationError from requests.models import Response from requests.sessions import Session +from datahub.cli import config_utils from datahub.emitter.aspect import ASPECT_MAP, TIMESERIES_ASPECT_MAP from datahub.emitter.request_helper import make_curl_command from datahub.emitter.serialization_helper import post_json_transform @@ -23,13 +22,6 @@ log = logging.getLogger(__name__) -DEFAULT_GMS_HOST = "http://localhost:8080" -CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv" -DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH) - -DATAHUB_ROOT_FOLDER = os.path.expanduser("~/.datahub") - -ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG" ENV_METADATA_HOST_URL = "DATAHUB_GMS_URL" ENV_METADATA_HOST = "DATAHUB_GMS_HOST" ENV_METADATA_PORT = "DATAHUB_GMS_PORT" @@ -45,25 +37,6 @@ # For the methods that aren't duplicates, that logic should be moved to the client. -class GmsConfig(BaseModel): - server: str - token: Optional[str] = None - - -class DatahubConfig(BaseModel): - gms: GmsConfig - - -def get_boolean_env_variable(key: str, default: bool = False) -> bool: - value = os.environ.get(key) - if value is None: - return default - elif value.lower() in ("true", "1"): - return True - else: - return False - - def set_env_variables_override_config(url: str, token: Optional[str]) -> None: """Should be used to override the config when using rest emitter""" config_override[ENV_METADATA_HOST_URL] = url @@ -71,78 +44,6 @@ def set_env_variables_override_config(url: str, token: Optional[str]) -> None: config_override[ENV_METADATA_TOKEN] = token -def persist_datahub_config(config: dict) -> None: - with open(DATAHUB_CONFIG_PATH, "w+") as outfile: - yaml.dump(config, outfile, default_flow_style=False) - return None - - -def write_gms_config( - host: str, token: Optional[str], merge_with_previous: bool = True -) -> None: - config = DatahubConfig(gms=GmsConfig(server=host, token=token)) - if merge_with_previous: - try: - previous_config = get_client_config(as_dict=True) - assert isinstance(previous_config, dict) - except Exception as e: - # ok to fail on this - previous_config = {} - log.debug( - f"Failed to retrieve config from file {DATAHUB_CONFIG_PATH}: {e}. This isn't fatal." - ) - config_dict = {**previous_config, **config.dict()} - else: - config_dict = config.dict() - persist_datahub_config(config_dict) - - -def should_skip_config() -> bool: - return get_boolean_env_variable(ENV_SKIP_CONFIG, False) - - -def ensure_datahub_config() -> None: - if not os.path.isfile(DATAHUB_CONFIG_PATH): - click.secho( - f"No {CONDENSED_DATAHUB_CONFIG_PATH} file found, generating one for you...", - bold=True, - ) - write_gms_config(DEFAULT_GMS_HOST, None) - - -def get_client_config(as_dict: bool = False) -> Union[Optional[DatahubConfig], dict]: - with open(DATAHUB_CONFIG_PATH, "r") as stream: - try: - config_json = yaml.safe_load(stream) - if as_dict: - return config_json - try: - datahub_config = DatahubConfig.parse_obj(config_json) - return datahub_config - except ValidationError as e: - click.echo( - f"Received error, please check your {CONDENSED_DATAHUB_CONFIG_PATH}" - ) - click.echo(e, err=True) - sys.exit(1) - except yaml.YAMLError as exc: - click.secho(f"{DATAHUB_CONFIG_PATH} malformed, error: {exc}", bold=True) - return None - - -def get_details_from_config(): - datahub_config = get_client_config(as_dict=False) - assert isinstance(datahub_config, DatahubConfig) - if datahub_config is not None: - gms_config = datahub_config.gms - - gms_host = gms_config.server - gms_token = gms_config.token - return gms_host, gms_token - else: - return None, None - - def get_details_from_env() -> Tuple[Optional[str], Optional[str]]: host = os.environ.get(ENV_METADATA_HOST) port = os.environ.get(ENV_METADATA_PORT) @@ -178,12 +79,12 @@ def get_url_and_token(): if len(config_override.keys()) > 0: gms_host = config_override.get(ENV_METADATA_HOST_URL) gms_token = config_override.get(ENV_METADATA_TOKEN) - elif should_skip_config(): + elif config_utils.should_skip_config(): gms_host = gms_host_env gms_token = gms_token_env else: - ensure_datahub_config() - gms_host_conf, gms_token_conf = get_details_from_config() + config_utils.ensure_datahub_config() + gms_host_conf, gms_token_conf = config_utils.get_details_from_config() gms_host = first_non_null([gms_host_env, gms_host_conf]) gms_token = first_non_null([gms_token_env, gms_token_conf]) return gms_host, gms_token @@ -253,14 +154,18 @@ def parse_run_restli_response(response: requests.Response) -> dict: exit() if not isinstance(response_json, dict): - click.echo(f"Received error, please check your {CONDENSED_DATAHUB_CONFIG_PATH}") + click.echo( + f"Received error, please check your {config_utils.CONDENSED_DATAHUB_CONFIG_PATH}" + ) click.echo() click.echo(response_json) exit() summary = response_json.get("value") if not isinstance(summary, dict): - click.echo(f"Received error, please check your {CONDENSED_DATAHUB_CONFIG_PATH}") + click.echo( + f"Received error, please check your {config_utils.CONDENSED_DATAHUB_CONFIG_PATH}" + ) click.echo() click.echo(response_json) exit() @@ -686,3 +591,95 @@ def command(ctx: click.Context) -> None: ctx.exit(1) return command + + +def get_session_login_as( + username: str, password: str, frontend_url: str +) -> requests.Session: + session = requests.Session() + headers = { + "Content-Type": "application/json", + } + system_auth = get_system_auth() + if system_auth is not None: + session.headers.update({"Authorization": system_auth}) + else: + data = '{"username":"' + username + '", "password":"' + password + '"}' + response = session.post(f"{frontend_url}/logIn", headers=headers, data=data) + response.raise_for_status() + return session + + +def _ensure_valid_gms_url_acryl_cloud(url: str) -> str: + if "acryl.io" not in url: + return url + if url.startswith("http://"): + url = url.replace("http://", "https://") + if url.endswith("acryl.io"): + url = f"{url}/gms" + return url + + +def fixup_gms_url(url: str) -> str: + if url is None: + return "" + if url.endswith("/"): + url = url.rstrip("/") + url = _ensure_valid_gms_url_acryl_cloud(url) + return url + + +def guess_frontend_url_from_gms_url(gms_url: str) -> str: + gms_url = fixup_gms_url(gms_url) + url = gms_url + if url.endswith("/gms"): + url = gms_url.rstrip("/gms") + if url.endswith("8080"): + url = url[:-4] + "9002" + return url + + +def generate_access_token( + username: str, + password: str, + gms_url: str, + token_name: Optional[str] = None, + validity: str = "ONE_HOUR", +) -> Tuple[str, str]: + frontend_url = guess_frontend_url_from_gms_url(gms_url) + session = get_session_login_as( + username=username, + password=password, + frontend_url=frontend_url, + ) + now = datetime.now() + timestamp = now.astimezone().isoformat() + if token_name is None: + token_name = f"cli token {timestamp}" + json = { + "query": """mutation createAccessToken($input: CreateAccessTokenInput!) { + createAccessToken(input: $input) { + accessToken + metadata { + id + actorUrn + ownerUrn + name + description + } + } + }""", + "variables": { + "input": { + "type": "PERSONAL", + "actorUrn": f"urn:li:corpuser:{username}", + "duration": validity, + "name": token_name, + } + }, + } + response = session.post(f"{frontend_url}/api/v2/graphql", json=json) + response.raise_for_status() + return token_name, response.json().get("data", {}).get("createAccessToken", {}).get( + "accessToken", None + ) diff --git a/metadata-ingestion/src/datahub/cli/config_utils.py b/metadata-ingestion/src/datahub/cli/config_utils.py new file mode 100644 index 00000000000000..7877a6bf6df593 --- /dev/null +++ b/metadata-ingestion/src/datahub/cli/config_utils.py @@ -0,0 +1,103 @@ +""" +For helper methods to contain manipulation of the config file in local system. +""" + +import logging +import os +import sys +from typing import Optional, Union + +import click +import yaml +from pydantic import BaseModel, ValidationError + +from datahub.cli.env_utils import get_boolean_env_variable + +log = logging.getLogger(__name__) + +DEFAULT_GMS_HOST = "http://localhost:8080" +CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv" +DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH) +DATAHUB_ROOT_FOLDER = os.path.expanduser("~/.datahub") +ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG" + + +class GmsConfig(BaseModel): + server: str + token: Optional[str] = None + + +class DatahubConfig(BaseModel): + gms: GmsConfig + + +def persist_datahub_config(config: dict) -> None: + with open(DATAHUB_CONFIG_PATH, "w+") as outfile: + yaml.dump(config, outfile, default_flow_style=False) + return None + + +def write_gms_config( + host: str, token: Optional[str], merge_with_previous: bool = True +) -> None: + config = DatahubConfig(gms=GmsConfig(server=host, token=token)) + if merge_with_previous: + try: + previous_config = get_client_config(as_dict=True) + assert isinstance(previous_config, dict) + except Exception as e: + # ok to fail on this + previous_config = {} + log.debug( + f"Failed to retrieve config from file {DATAHUB_CONFIG_PATH}: {e}. This isn't fatal." + ) + config_dict = {**previous_config, **config.dict()} + else: + config_dict = config.dict() + persist_datahub_config(config_dict) + + +def get_details_from_config(): + datahub_config = get_client_config(as_dict=False) + assert isinstance(datahub_config, DatahubConfig) + if datahub_config is not None: + gms_config = datahub_config.gms + + gms_host = gms_config.server + gms_token = gms_config.token + return gms_host, gms_token + else: + return None, None + + +def should_skip_config() -> bool: + return get_boolean_env_variable(ENV_SKIP_CONFIG, False) + + +def ensure_datahub_config() -> None: + if not os.path.isfile(DATAHUB_CONFIG_PATH): + click.secho( + f"No {CONDENSED_DATAHUB_CONFIG_PATH} file found, generating one for you...", + bold=True, + ) + write_gms_config(DEFAULT_GMS_HOST, None) + + +def get_client_config(as_dict: bool = False) -> Union[Optional[DatahubConfig], dict]: + with open(DATAHUB_CONFIG_PATH, "r") as stream: + try: + config_json = yaml.safe_load(stream) + if as_dict: + return config_json + try: + datahub_config = DatahubConfig.parse_obj(config_json) + return datahub_config + except ValidationError as e: + click.echo( + f"Received error, please check your {CONDENSED_DATAHUB_CONFIG_PATH}" + ) + click.echo(e, err=True) + sys.exit(1) + except yaml.YAMLError as exc: + click.secho(f"{DATAHUB_CONFIG_PATH} malformed, error: {exc}", bold=True) + return None diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py index 0e0bc37c61573d..099f57d975bbb3 100644 --- a/metadata-ingestion/src/datahub/cli/docker_cli.py +++ b/metadata-ingestion/src/datahub/cli/docker_cli.py @@ -21,7 +21,7 @@ from expandvars import expandvars from requests_file import FileAdapter -from datahub.cli.cli_utils import DATAHUB_ROOT_FOLDER +from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER from datahub.cli.docker_check import ( DATAHUB_COMPOSE_LEGACY_VOLUME_FILTERS, DATAHUB_COMPOSE_PROJECT_FILTER, diff --git a/metadata-ingestion/src/datahub/cli/env_utils.py b/metadata-ingestion/src/datahub/cli/env_utils.py new file mode 100644 index 00000000000000..8909036eab6cdf --- /dev/null +++ b/metadata-ingestion/src/datahub/cli/env_utils.py @@ -0,0 +1,11 @@ +import os + + +def get_boolean_env_variable(key: str, default: bool = False) -> bool: + value = os.environ.get(key) + if value is None: + return default + elif value.lower() in ("true", "1"): + return True + else: + return False diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index 9c55f52497c0ea..2e66b18e481453 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -15,12 +15,7 @@ import datahub as datahub_package from datahub.cli import cli_utils -from datahub.cli.cli_utils import ( - CONDENSED_DATAHUB_CONFIG_PATH, - format_aspect_summaries, - get_session_and_host, - post_rollback_endpoint, -) +from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH from datahub.configuration.config_loader import load_config_file from datahub.ingestion.graph.client import get_default_graph from datahub.ingestion.run.connection import ConnectionManager @@ -436,7 +431,7 @@ def mcps(path: str) -> None: def list_runs(page_offset: int, page_size: int, include_soft_deletes: bool) -> None: """List recent ingestion runs to datahub""" - session, gms_host = get_session_and_host() + session, gms_host = cli_utils.get_session_and_host() url = f"{gms_host}/runs?action=list" @@ -485,7 +480,7 @@ def show( run_id: str, start: int, count: int, include_soft_deletes: bool, show_aspect: bool ) -> None: """Describe a provided ingestion run to datahub""" - session, gms_host = get_session_and_host() + session, gms_host = cli_utils.get_session_and_host() url = f"{gms_host}/runs?action=describe" @@ -504,7 +499,11 @@ def show( rows = parse_restli_response(response) if not show_aspect: click.echo( - tabulate(format_aspect_summaries(rows), RUN_TABLE_COLUMNS, tablefmt="grid") + tabulate( + cli_utils.format_aspect_summaries(rows), + RUN_TABLE_COLUMNS, + tablefmt="grid", + ) ) else: for row in rows: @@ -546,7 +545,7 @@ def rollback( aspects_affected, unsafe_entity_count, unsafe_entities, - ) = post_rollback_endpoint(payload_obj, "/runs?action=rollback") + ) = cli_utils.post_rollback_endpoint(payload_obj, "/runs?action=rollback") click.echo( "Rolling back deletes the entities created by a run and reverts the updated aspects" diff --git a/metadata-ingestion/src/datahub/cli/lite_cli.py b/metadata-ingestion/src/datahub/cli/lite_cli.py index 8636187a51d09f..7e2ad23a7753f4 100644 --- a/metadata-ingestion/src/datahub/cli/lite_cli.py +++ b/metadata-ingestion/src/datahub/cli/lite_cli.py @@ -9,7 +9,7 @@ from click.shell_completion import CompletionItem from click_default_group import DefaultGroup -from datahub.cli.cli_utils import ( +from datahub.cli.config_utils import ( DATAHUB_ROOT_FOLDER, DatahubConfig, get_client_config, diff --git a/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py b/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py index a52a9dddff1276..afac38e29722e9 100644 --- a/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py +++ b/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py @@ -14,7 +14,11 @@ from datahub.api.entities.dataproduct.dataproduct import DataProduct from datahub.cli.specific.file_loader import load_file -from datahub.emitter.mce_builder import make_group_urn, make_user_urn +from datahub.emitter.mce_builder import ( + make_group_urn, + make_user_urn, + validate_ownership_type, +) from datahub.ingestion.graph.client import DataHubGraph, get_default_graph from datahub.metadata.schema_classes import OwnerClass, OwnershipTypeClass from datahub.specific.dataproduct import DataProductPatchBuilder @@ -332,8 +336,11 @@ def add_owner(urn: str, owner: str, owner_type: str) -> None: if not urn.startswith("urn:li:dataProduct:"): urn = f"urn:li:dataProduct:{urn}" dataproduct_patcher: DataProductPatchBuilder = DataProduct.get_patch_builder(urn) + owner_type, owner_type_urn = validate_ownership_type(owner_type) dataproduct_patcher.add_owner( - owner=OwnerClass(owner=_get_owner_urn(owner), type=owner_type) + owner=OwnerClass( + owner=_get_owner_urn(owner), type=owner_type, typeUrn=owner_type_urn + ) ) with get_default_graph() as graph: _abort_if_non_existent_urn(graph, urn, "add owners") diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index 9da1b0ab56f890..fe9ecee8f80d0b 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -1,4 +1,5 @@ """Convenience functions for creating MCEs""" + import hashlib import json import logging diff --git a/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py index be68d46472a553..5a9eb074f1b096 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py @@ -12,6 +12,7 @@ MetadataChangeProposalClass, SystemMetadataClass, ) +from datahub.utilities.urns.urn import guess_entity_type def _recursive_to_obj(obj: Any) -> Any: @@ -47,13 +48,11 @@ class MetadataPatchProposal: def __init__( self, urn: str, - entity_type: str, system_metadata: Optional[SystemMetadataClass] = None, audit_header: Optional[KafkaAuditHeaderClass] = None, ) -> None: self.urn = urn - # TODO: Remove the entity_type parameter, as MCPW can infer it from the URN. - self.entity_type = entity_type + self.entity_type = guess_entity_type(urn) self.system_metadata = system_metadata self.audit_header = audit_header self.patches = defaultdict(list) diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py index 1bf090a2e514ec..4f6c596b7bf20a 100644 --- a/metadata-ingestion/src/datahub/entrypoints.py +++ b/metadata-ingestion/src/datahub/entrypoints.py @@ -9,9 +9,13 @@ import datahub as datahub_package from datahub.cli.check_cli import check from datahub.cli.cli_utils import ( + fixup_gms_url, + generate_access_token, + make_shim_command, +) +from datahub.cli.config_utils import ( DATAHUB_CONFIG_PATH, get_boolean_env_variable, - make_shim_command, write_gms_config, ) from datahub.cli.delete_cli import delete @@ -99,8 +103,15 @@ def version() -> None: @datahub.command() +@click.option( + "--use-password", + type=bool, + is_flag=True, + default=False, + help="If passed then uses password to initialise token.", +) @telemetry.with_telemetry() -def init() -> None: +def init(use_password: bool = False) -> None: """Configure which datahub instance to connect to""" if os.path.isfile(DATAHUB_CONFIG_PATH): @@ -110,11 +121,22 @@ def init() -> None: host = click.prompt( "Enter your DataHub host", type=str, default="http://localhost:8080" ) - token = click.prompt( - "Enter your DataHub access token (Supports env vars via `{VAR_NAME}` syntax)", - type=str, - default="", - ) + host = fixup_gms_url(host) + if use_password: + username = click.prompt("Enter your DataHub username", type=str) + password = click.prompt( + "Enter your DataHub password", + type=str, + ) + _, token = generate_access_token( + username=username, password=password, gms_url=host + ) + else: + token = click.prompt( + "Enter your DataHub access token (Supports env vars via `{VAR_NAME}` syntax)", + type=str, + default="", + ) write_gms_config(host, token) click.echo(f"Written to {DATAHUB_CONFIG_PATH}") diff --git a/metadata-ingestion/src/datahub/ingestion/api/common.py b/metadata-ingestion/src/datahub/ingestion/api/common.py index a6761a3c77d5e8..097859939cfea5 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/common.py +++ b/metadata-ingestion/src/datahub/ingestion/api/common.py @@ -64,7 +64,7 @@ def _set_dataset_urn_to_lower_if_needed(self) -> None: # TODO: Get rid of this function once lower-casing is the standard. if self.graph: server_config = self.graph.get_config() - if server_config and server_config.get("datasetUrnNameCasing"): + if server_config and server_config.get("datasetUrnNameCasing") is True: set_dataset_urn_to_lower(True) def register_checkpointer(self, committable: Committable) -> None: diff --git a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py index 7ec71a22f6fe69..653b80c116adfd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py +++ b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py @@ -299,9 +299,13 @@ class ElasticsearchSourceConfig(PlatformInstanceConfigMixin, EnvConfigMixin): profiling: ElasticProfiling = Field( default_factory=ElasticProfiling, + description="Configs to ingest data profiles from ElasticSearch.", ) collapse_urns: CollapseUrns = Field( default_factory=CollapseUrns, + description="""List of regex patterns to remove from the name of the URN. All of the indices before removal of URNs are considered as the same dataset. These are applied in order for each URN. + The main case where you would want to have multiple of these if the name where you are trying to remove suffix from have different formats. + e.g. ending with -YYYY-MM-DD as well as ending -epochtime would require you to have 2 regex patterns to remove the suffixes across all URNs.""", ) def is_profiling_enabled(self) -> bool: diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py index bdef28e30db2b4..93e2e51acba19f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py @@ -158,11 +158,13 @@ def _get_user_name(self, user_id: Optional[str]) -> Optional[str]: return None user_details = self._query( self.fivetran_log_query.get_user_query(user_id=user_id) - )[0] - return ( - f"{user_details[Constant.GIVEN_NAME]} {user_details[Constant.FAMILY_NAME]}" ) + if not user_details: + return None + + return f"{user_details[0][Constant.GIVEN_NAME]} {user_details[0][Constant.FAMILY_NAME]}" + def get_allowed_connectors_list( self, connector_patterns: AllowDenyPattern, report: FivetranSourceReport ) -> List[Connector]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index d22bfb2b8b52ff..a65bb76345f4b0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -6,7 +6,7 @@ import dateutil.parser as dp import pydantic import requests -from pydantic import Field, validator +from pydantic import Field, root_validator, validator from requests.models import HTTPError import datahub.emitter.mce_builder as builder @@ -53,6 +53,10 @@ class MetabaseConfig(DatasetLineageProviderConfigBase): # See the Metabase /api/session endpoint for details # https://www.metabase.com/docs/latest/api-documentation.html#post-apisession connect_uri: str = Field(default="localhost:3000", description="Metabase host URL.") + display_uri: Optional[str] = Field( + default=None, + description="optional URL to use in links (if `connect_uri` is only for ingestion)", + ) username: Optional[str] = Field(default=None, description="Metabase username.") password: Optional[pydantic.SecretStr] = Field( default=None, description="Metabase password." @@ -76,10 +80,17 @@ class MetabaseConfig(DatasetLineageProviderConfigBase): description="Default schema name to use when schema is not provided in an SQL query", ) - @validator("connect_uri") + @validator("connect_uri", "display_uri") def remove_trailing_slash(cls, v): return config_clean.remove_trailing_slashes(v) + @root_validator(skip_on_failure=True) + def default_display_uri_to_connect_uri(cls, values): + base = values.get("display_uri") + if base is None: + values["display_uri"] = values.get("connect_uri") + return values + @platform_name("Metabase") @config_class(MetabaseConfig) @@ -239,7 +250,7 @@ def construct_dashboard_from_api_data( self, dashboard_info: dict ) -> Optional[DashboardSnapshot]: dashboard_id = dashboard_info.get("id", "") - dashboard_url = f"{self.config.connect_uri}/api/dashboard/{dashboard_id}" + dashboard_url = f"{self.config.display_uri}/api/dashboard/{dashboard_id}" try: dashboard_response = self.session.get(dashboard_url) dashboard_response.raise_for_status() @@ -297,7 +308,7 @@ def construct_dashboard_from_api_data( @lru_cache(maxsize=None) def _get_ownership(self, creator_id: int) -> Optional[OwnershipClass]: - user_info_url = f"{self.config.connect_uri}/api/user/{creator_id}" + user_info_url = f"{self.config.display_uri}/api/user/{creator_id}" try: user_info_response = self.session.get(user_info_url) user_info_response.raise_for_status() @@ -362,7 +373,7 @@ def get_card_details_by_id(self, card_id: Union[int, str]) -> dict: :param int datasource_id: Numeric datasource ID received from Metabase API :return: dict with info or empty dict """ - card_url = f"{self.config.connect_uri}/api/card/{card_id}" + card_url = f"{self.config.display_uri}/api/card/{card_id}" try: card_response = self.session.get(card_url) card_response.raise_for_status() diff --git a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py index 675c87b13313d2..e3f0fd118625a3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py @@ -34,44 +34,44 @@ class Owners(ConfigModel): - users: Optional[List[str]] - groups: Optional[List[str]] + users: Optional[List[str]] = None + groups: Optional[List[str]] = None class KnowledgeCard(ConfigModel): - url: Optional[str] - label: Optional[str] + url: Optional[str] = None + label: Optional[str] = None class GlossaryTermConfig(ConfigModel): - id: Optional[str] + id: Optional[str] = None name: str description: str - term_source: Optional[str] - source_ref: Optional[str] - source_url: Optional[str] - owners: Optional[Owners] - inherits: Optional[List[str]] - contains: Optional[List[str]] - values: Optional[List[str]] - related_terms: Optional[List[str]] - custom_properties: Optional[Dict[str, str]] - knowledge_links: Optional[List[KnowledgeCard]] - domain: Optional[str] + term_source: Optional[str] = None + source_ref: Optional[str] = None + source_url: Optional[str] = None + owners: Optional[Owners] = None + inherits: Optional[List[str]] = None + contains: Optional[List[str]] = None + values: Optional[List[str]] = None + related_terms: Optional[List[str]] = None + custom_properties: Optional[Dict[str, str]] = None + knowledge_links: Optional[List[KnowledgeCard]] = None + domain: Optional[str] = None # Private fields. _urn: str class GlossaryNodeConfig(ConfigModel): - id: Optional[str] + id: Optional[str] = None name: str description: str - owners: Optional[Owners] - terms: Optional[List["GlossaryTermConfig"]] - nodes: Optional[List["GlossaryNodeConfig"]] - knowledge_links: Optional[List[KnowledgeCard]] - custom_properties: Optional[Dict[str, str]] + owners: Optional[Owners] = None + terms: Optional[List["GlossaryTermConfig"]] = None + nodes: Optional[List["GlossaryNodeConfig"]] = None + knowledge_links: Optional[List[KnowledgeCard]] = None + custom_properties: Optional[Dict[str, str]] = None # Private fields. _urn: str @@ -80,7 +80,7 @@ class GlossaryNodeConfig(ConfigModel): class DefaultConfig(ConfigModel): """Holds defaults for populating fields in glossary terms""" - source: Optional[str] + source: Optional[str] = None owners: Owners url: Optional[str] = None source_type: str = "INTERNAL" @@ -98,8 +98,8 @@ class BusinessGlossarySourceConfig(ConfigModel): class BusinessGlossaryConfig(DefaultConfig): version: str - terms: Optional[List["GlossaryTermConfig"]] - nodes: Optional[List["GlossaryNodeConfig"]] + terms: Optional[List["GlossaryTermConfig"]] = None + nodes: Optional[List["GlossaryNodeConfig"]] = None @validator("version") def version_must_be_1(cls, v): @@ -337,12 +337,14 @@ def get_mces_from_term( ] = [] term_info = models.GlossaryTermInfoClass( definition=glossaryTerm.description, - termSource=glossaryTerm.term_source - if glossaryTerm.term_source is not None - else defaults.source_type, - sourceRef=glossaryTerm.source_ref - if glossaryTerm.source_ref - else defaults.source, + termSource=( + glossaryTerm.term_source + if glossaryTerm.term_source is not None + else defaults.source_type + ), + sourceRef=( + glossaryTerm.source_ref if glossaryTerm.source_ref else defaults.source + ), sourceUrl=glossaryTerm.source_url if glossaryTerm.source_url else defaults.url, parentNode=parentNode, customProperties=glossaryTerm.custom_properties, diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py index 304e999f81a843..d72624bd70512a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py @@ -195,6 +195,7 @@ def get_workspaces(self) -> List[Workspace]: groups = self._get_resolver().get_groups() except: self.log_http_error(message="Unable to fetch list of workspaces") + raise # we want this exception to bubble up workspaces = [ Workspace( diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py index 540adbf4bfd15a..fe66ef006ec692 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py @@ -94,10 +94,10 @@ class RedshiftConfig( description="The default schema to use if the sql parser fails to parse the schema with `sql_based` lineage collector", ) - include_table_lineage: Optional[bool] = Field( + include_table_lineage: bool = Field( default=True, description="Whether table lineage should be ingested." ) - include_copy_lineage: Optional[bool] = Field( + include_copy_lineage: bool = Field( default=True, description="Whether lineage should be collected from copy commands", ) @@ -107,17 +107,15 @@ class RedshiftConfig( description="Generate usage statistic. email_domain config parameter needs to be set if enabled", ) - include_unload_lineage: Optional[bool] = Field( + include_unload_lineage: bool = Field( default=True, description="Whether lineage should be collected from unload commands", ) - capture_lineage_query_parser_failures: Optional[bool] = Field( - hide_from_schema=True, + include_table_rename_lineage: bool = Field( default=False, - description="Whether to capture lineage query parser errors with dataset properties for debugging", + description="Whether we should follow `alter table ... rename to` statements when computing lineage. ", ) - table_lineage_mode: Optional[LineageMode] = Field( default=LineageMode.STL_SCAN_BASED, description="Which table lineage collector mode to use. Available modes are: [stl_scan_based, sql_based, mixed]", @@ -139,6 +137,11 @@ class RedshiftConfig( description="When enabled, emits lineage as incremental to existing lineage already in DataHub. When disabled, re-states lineage on each run. This config works with rest-sink only.", ) + resolve_temp_table_in_lineage: bool = Field( + default=False, + description="Whether to resolve temp table appear in lineage to upstream permanent tables.", + ) + @root_validator(pre=True) def check_email_is_set_on_usage(cls, values): if values.get("include_usage_statistics"): diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py index 3efef58737c6e9..898e6db0b14b06 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py @@ -4,11 +4,12 @@ from dataclasses import dataclass, field from datetime import datetime from enum import Enum -from typing import Dict, List, Optional, Set, Tuple, Union +from typing import Dict, List, Optional, Set, Tuple, Union, cast from urllib.parse import urlparse import humanfriendly import redshift_connector +import sqlglot import datahub.emitter.mce_builder as builder import datahub.utilities.sqlglot_lineage as sqlglot_l @@ -24,17 +25,24 @@ RedshiftSchema, RedshiftTable, RedshiftView, + TempTableRow, ) from datahub.ingestion.source.redshift.report import RedshiftReport from datahub.ingestion.source.state.redundant_run_skip_handler import ( RedundantLineageRunSkipHandler, ) +from datahub.metadata._schema_classes import SchemaFieldDataTypeClass from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( FineGrainedLineage, FineGrainedLineageDownstreamType, FineGrainedLineageUpstreamType, UpstreamLineage, ) +from datahub.metadata.com.linkedin.pegasus2avro.schema import ( + OtherSchema, + SchemaField, + SchemaMetadata, +) from datahub.metadata.schema_classes import ( DatasetLineageTypeClass, UpstreamClass, @@ -111,6 +119,34 @@ def merge_lineage( self.cll = self.cll or None +def parse_alter_table_rename(default_schema: str, query: str) -> Tuple[str, str, str]: + """ + Parses an ALTER TABLE ... RENAME TO ... query and returns the schema, previous table name, and new table name. + """ + + parsed_query = sqlglot.parse_one(query, dialect="redshift") + assert isinstance(parsed_query, sqlglot.exp.AlterTable) + prev_name = parsed_query.this.name + rename_clause = parsed_query.args["actions"][0] + assert isinstance(rename_clause, sqlglot.exp.RenameTable) + new_name = rename_clause.this.name + + schema = parsed_query.this.db or default_schema + + return schema, prev_name, new_name + + +def split_qualified_table_name(urn: str) -> Tuple[str, str, str]: + qualified_table_name = dataset_urn.DatasetUrn.create_from_string( + urn + ).get_entity_id()[1] + + # -3 because platform instance is optional and that can cause the split to have more than 3 elements + db, schema, table = qualified_table_name.split(".")[-3:] + + return db, schema, table + + class RedshiftLineageExtractor: def __init__( self, @@ -130,6 +166,95 @@ def __init__( self.report.lineage_end_time, ) = self.get_time_window() + self.temp_tables: Dict[str, TempTableRow] = {} + + def _init_temp_table_schema( + self, database: str, temp_tables: List[TempTableRow] + ) -> None: + if self.context.graph is None: # to silent lint + return + + schema_resolver: sqlglot_l.SchemaResolver = ( + self.context.graph._make_schema_resolver( + platform=LineageDatasetPlatform.REDSHIFT.value, + platform_instance=self.config.platform_instance, + env=self.config.env, + ) + ) + + dataset_vs_columns: Dict[str, List[SchemaField]] = {} + # prepare dataset_urn vs List of schema fields + for table in temp_tables: + logger.debug( + f"Processing temp table: {table.create_command} with query text {table.query_text}" + ) + result = sqlglot_l.create_lineage_sql_parsed_result( + platform=LineageDatasetPlatform.REDSHIFT.value, + platform_instance=self.config.platform_instance, + env=self.config.env, + default_db=database, + default_schema=self.config.default_schema, + query=table.query_text, + graph=self.context.graph, + ) + + if ( + result is None + or result.column_lineage is None + or result.query_type != sqlglot_l.QueryType.CREATE + or not result.out_tables + ): + logger.debug(f"Unsupported temp table query found: {table.query_text}") + continue + + table.parsed_result = result + if result.column_lineage[0].downstream.table: + table.urn = result.column_lineage[0].downstream.table + + self.temp_tables[result.out_tables[0]] = table + + for table in self.temp_tables.values(): + if ( + table.parsed_result is None + or table.parsed_result.column_lineage is None + ): + continue + for column_lineage in table.parsed_result.column_lineage: + if column_lineage.downstream.table not in dataset_vs_columns: + dataset_vs_columns[cast(str, column_lineage.downstream.table)] = [] + # Initialise the temp table urn, we later need this to merge CLL + + dataset_vs_columns[cast(str, column_lineage.downstream.table)].append( + SchemaField( + fieldPath=column_lineage.downstream.column, + type=cast( + SchemaFieldDataTypeClass, + column_lineage.downstream.column_type, + ), + nativeDataType=cast( + str, column_lineage.downstream.native_column_type + ), + ) + ) + + # Add datasets, and it's respective fields in schema_resolver, so that later schema_resolver would be able + # correctly generates the upstreams for temporary tables + for urn in dataset_vs_columns: + db, schema, table_name = split_qualified_table_name(urn) + schema_resolver.add_schema_metadata( + urn=urn, + schema_metadata=SchemaMetadata( + schemaName=table_name, + platform=builder.make_data_platform_urn( + LineageDatasetPlatform.REDSHIFT.value + ), + version=0, + hash="", + platformSchema=OtherSchema(rawSchema=""), + fields=dataset_vs_columns[urn], + ), + ) + def get_time_window(self) -> Tuple[datetime, datetime]: if self.redundant_run_skip_handler: self.report.stateful_lineage_ingestion_enabled = True @@ -157,25 +282,32 @@ def _get_s3_path(self, path: str) -> str: return path def _get_sources_from_query( - self, db_name: str, query: str + self, + db_name: str, + query: str, + parsed_result: Optional[sqlglot_l.SqlParsingResult] = None, ) -> Tuple[List[LineageDataset], Optional[List[sqlglot_l.ColumnLineageInfo]]]: sources: List[LineageDataset] = list() - parsed_result: Optional[ - sqlglot_l.SqlParsingResult - ] = sqlglot_l.create_lineage_sql_parsed_result( - query=query, - platform=LineageDatasetPlatform.REDSHIFT.value, - platform_instance=self.config.platform_instance, - default_db=db_name, - default_schema=str(self.config.default_schema), - graph=self.context.graph, - env=self.config.env, - ) + if parsed_result is None: + parsed_result = sqlglot_l.create_lineage_sql_parsed_result( + query=query, + platform=LineageDatasetPlatform.REDSHIFT.value, + platform_instance=self.config.platform_instance, + default_db=db_name, + default_schema=str(self.config.default_schema), + graph=self.context.graph, + env=self.config.env, + ) if parsed_result is None: logger.debug(f"native query parsing failed for {query}") return sources, None + elif parsed_result.debug_info.table_error: + logger.debug( + f"native query parsing failed for {query} with error: {parsed_result.debug_info.table_error}" + ) + return sources, None logger.debug(f"parsed_result = {parsed_result}") @@ -277,7 +409,7 @@ def _populate_lineage_map( database: str, lineage_type: LineageCollectorType, connection: redshift_connector.Connection, - all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]], + all_tables_set: Dict[str, Dict[str, Set[str]]], ) -> None: """ This method generate table level lineage based with the given query. @@ -292,7 +424,10 @@ def _populate_lineage_map( return: The method does not return with anything as it directly modify the self._lineage_map property. :rtype: None """ + + logger.info(f"Extracting {lineage_type.name} lineage for db {database}") try: + logger.debug(f"Processing lineage query: {query}") cll: Optional[List[sqlglot_l.ColumnLineageInfo]] = None raw_db_name = database alias_db_name = self.config.database @@ -301,11 +436,18 @@ def _populate_lineage_map( conn=connection, query=query ): target = self._get_target_lineage( - alias_db_name, lineage_row, lineage_type + alias_db_name, + lineage_row, + lineage_type, + all_tables_set=all_tables_set, ) if not target: continue + logger.debug( + f"Processing {lineage_type.name} lineage row: {lineage_row}" + ) + sources, cll = self._get_sources( lineage_type, alias_db_name, @@ -318,9 +460,12 @@ def _populate_lineage_map( target.upstreams.update( self._get_upstream_lineages( sources=sources, - all_tables=all_tables, + target_table=target.dataset.urn, + target_dataset_cll=cll, + all_tables_set=all_tables_set, alias_db_name=alias_db_name, raw_db_name=raw_db_name, + connection=connection, ) ) target.cll = cll @@ -344,21 +489,50 @@ def _populate_lineage_map( ) self.report_status(f"extract-{lineage_type.name}", False) + def _update_lineage_map_for_table_renames( + self, table_renames: Dict[str, str] + ) -> None: + if not table_renames: + return + + logger.info(f"Updating lineage map for {len(table_renames)} table renames") + for new_table_urn, prev_table_urn in table_renames.items(): + # This table was renamed from some other name, copy in the lineage + # for the previous name as well. + prev_table_lineage = self._lineage_map.get(prev_table_urn) + if prev_table_lineage: + logger.debug( + f"including lineage for {prev_table_urn} in {new_table_urn} due to table rename" + ) + self._lineage_map[new_table_urn].merge_lineage( + upstreams=prev_table_lineage.upstreams, + cll=prev_table_lineage.cll, + ) + def _get_target_lineage( self, alias_db_name: str, lineage_row: LineageRow, lineage_type: LineageCollectorType, + all_tables_set: Dict[str, Dict[str, Set[str]]], ) -> Optional[LineageItem]: if ( lineage_type != LineageCollectorType.UNLOAD and lineage_row.target_schema and lineage_row.target_table ): - if not self.config.schema_pattern.allowed( - lineage_row.target_schema - ) or not self.config.table_pattern.allowed( - f"{alias_db_name}.{lineage_row.target_schema}.{lineage_row.target_table}" + if ( + not self.config.schema_pattern.allowed(lineage_row.target_schema) + or not self.config.table_pattern.allowed( + f"{alias_db_name}.{lineage_row.target_schema}.{lineage_row.target_table}" + ) + ) and not ( + # We also check the all_tables_set, since this might be a renamed table + # that we don't want to drop lineage for. + alias_db_name in all_tables_set + and lineage_row.target_schema in all_tables_set[alias_db_name] + and lineage_row.target_table + in all_tables_set[alias_db_name][lineage_row.target_schema] ): return None # Target @@ -400,18 +574,19 @@ def _get_target_lineage( def _get_upstream_lineages( self, sources: List[LineageDataset], - all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]], + target_table: str, + all_tables_set: Dict[str, Dict[str, Set[str]]], alias_db_name: str, raw_db_name: str, + connection: redshift_connector.Connection, + target_dataset_cll: Optional[List[sqlglot_l.ColumnLineageInfo]], ) -> List[LineageDataset]: - targe_source = [] + target_source = [] + probable_temp_tables: List[str] = [] + for source in sources: if source.platform == LineageDatasetPlatform.REDSHIFT: - qualified_table_name = dataset_urn.DatasetUrn.create_from_string( - source.urn - ).get_entity_id()[1] - # -3 because platform instance is optional and that can cause the split to have more than 3 elements - db, schema, table = qualified_table_name.split(".")[-3:] + db, schema, table = split_qualified_table_name(source.urn) if db == raw_db_name: db = alias_db_name path = f"{db}.{schema}.{table}" @@ -427,19 +602,40 @@ def _get_upstream_lineages( # Filtering out tables which does not exist in Redshift # It was deleted in the meantime or query parser did not capture well the table name + # Or it might be a temp table if ( - db not in all_tables - or schema not in all_tables[db] - or not any(table == t.name for t in all_tables[db][schema]) + db not in all_tables_set + or schema not in all_tables_set[db] + or table not in all_tables_set[db][schema] ): logger.debug( - f"{source.urn} missing table, dropping from lineage.", + f"{source.urn} missing table. Adding it to temp table list for target table {target_table}.", ) + probable_temp_tables.append(f"{schema}.{table}") self.report.num_lineage_tables_dropped += 1 continue - targe_source.append(source) - return targe_source + target_source.append(source) + + if probable_temp_tables and self.config.resolve_temp_table_in_lineage: + self.report.num_lineage_processed_temp_tables += len(probable_temp_tables) + # Generate lineage dataset from temporary tables + number_of_permanent_dataset_found: int = ( + self.update_table_and_column_lineage( + db_name=raw_db_name, + connection=connection, + temp_table_names=probable_temp_tables, + target_source_dataset=target_source, + target_dataset_cll=target_dataset_cll, + ) + ) + + logger.debug( + f"Number of permanent datasets found for {target_table} = {number_of_permanent_dataset_found} in " + f"temp tables {probable_temp_tables}" + ) + + return target_source def populate_lineage( self, @@ -447,8 +643,27 @@ def populate_lineage( connection: redshift_connector.Connection, all_tables: Dict[str, Dict[str, List[Union[RedshiftView, RedshiftTable]]]], ) -> None: + if self.config.resolve_temp_table_in_lineage: + self._init_temp_table_schema( + database=database, + temp_tables=self.get_temp_tables(connection=connection), + ) + populate_calls: List[Tuple[str, LineageCollectorType]] = [] + all_tables_set: Dict[str, Dict[str, Set[str]]] = { + db: {schema: {t.name for t in tables} for schema, tables in schemas.items()} + for db, schemas in all_tables.items() + } + + table_renames: Dict[str, str] = {} + if self.config.include_table_rename_lineage: + table_renames, all_tables_set = self._process_table_renames( + database=database, + connection=connection, + all_tables=all_tables_set, + ) + if self.config.table_lineage_mode in { LineageMode.STL_SCAN_BASED, LineageMode.MIXED, @@ -504,9 +719,12 @@ def populate_lineage( database=database, lineage_type=lineage_type, connection=connection, - all_tables=all_tables, + all_tables_set=all_tables_set, ) + # Handling for alter table statements. + self._update_lineage_map_for_table_renames(table_renames=table_renames) + self.report.lineage_mem_size[self.config.database] = humanfriendly.format_size( memory_footprint.total_size(self._lineage_map) ) @@ -613,3 +831,271 @@ def get_lineage( def report_status(self, step: str, status: bool) -> None: if self.redundant_run_skip_handler: self.redundant_run_skip_handler.report_current_run_status(step, status) + + def _process_table_renames( + self, + database: str, + connection: redshift_connector.Connection, + all_tables: Dict[str, Dict[str, Set[str]]], + ) -> Tuple[Dict[str, str], Dict[str, Dict[str, Set[str]]]]: + logger.info(f"Processing table renames for db {database}") + + # new urn -> prev urn + table_renames: Dict[str, str] = {} + + query = RedshiftQuery.alter_table_rename_query( + db_name=database, + start_time=self.start_time, + end_time=self.end_time, + ) + + for rename_row in RedshiftDataDictionary.get_alter_table_commands( + connection, query + ): + schema, prev_name, new_name = parse_alter_table_rename( + default_schema=self.config.default_schema, + query=rename_row.query_text, + ) + + prev_urn = make_dataset_urn_with_platform_instance( + platform=LineageDatasetPlatform.REDSHIFT.value, + platform_instance=self.config.platform_instance, + name=f"{database}.{schema}.{prev_name}", + env=self.config.env, + ) + new_urn = make_dataset_urn_with_platform_instance( + platform=LineageDatasetPlatform.REDSHIFT.value, + platform_instance=self.config.platform_instance, + name=f"{database}.{schema}.{new_name}", + env=self.config.env, + ) + + table_renames[new_urn] = prev_urn + + # We want to generate lineage for the previous name too. + all_tables[database][schema].add(prev_name) + + logger.info(f"Discovered {len(table_renames)} table renames") + return table_renames, all_tables + + def get_temp_tables( + self, connection: redshift_connector.Connection + ) -> List[TempTableRow]: + ddl_query: str = RedshiftQuery.temp_table_ddl_query( + start_time=self.config.start_time, + end_time=self.config.end_time, + ) + + logger.debug(f"Temporary table ddl query = {ddl_query}") + + temp_table_rows: List[TempTableRow] = [] + + for row in RedshiftDataDictionary.get_temporary_rows( + conn=connection, + query=ddl_query, + ): + temp_table_rows.append(row) + + return temp_table_rows + + def find_temp_tables( + self, temp_table_rows: List[TempTableRow], temp_table_names: List[str] + ) -> List[TempTableRow]: + matched_temp_tables: List[TempTableRow] = [] + + for table_name in temp_table_names: + prefixes = RedshiftQuery.get_temp_table_clause(table_name) + prefixes.extend( + RedshiftQuery.get_temp_table_clause(table_name.split(".")[-1]) + ) + + for row in temp_table_rows: + if any( + row.create_command.lower().startswith(prefix) for prefix in prefixes + ): + matched_temp_tables.append(row) + + return matched_temp_tables + + def resolve_column_refs( + self, column_refs: List[sqlglot_l.ColumnRef], depth: int = 0 + ) -> List[sqlglot_l.ColumnRef]: + """ + This method resolves the column reference to the original column reference. + For example, if the column reference is to a temporary table, it will be resolved to the original column + reference. + """ + max_depth = 10 + + resolved_column_refs: List[sqlglot_l.ColumnRef] = [] + if not column_refs: + return column_refs + + if depth >= max_depth: + logger.warning( + f"Max depth reached for resolving temporary columns: {column_refs}" + ) + self.report.num_unresolved_temp_columns += 1 + return column_refs + + for ref in column_refs: + resolved = False + if ref.table in self.temp_tables: + table = self.temp_tables[ref.table] + if table.parsed_result and table.parsed_result.column_lineage: + for column_lineage in table.parsed_result.column_lineage: + if ( + column_lineage.downstream.table == ref.table + and column_lineage.downstream.column == ref.column + ): + resolved_column_refs.extend( + self.resolve_column_refs( + column_lineage.upstreams, depth=depth + 1 + ) + ) + resolved = True + break + # If we reach here, it means that we were not able to resolve the column reference. + if resolved is False: + logger.warning( + f"Unable to resolve column reference {ref} to a permanent table" + ) + else: + logger.debug( + f"Resolved column reference {ref} is not resolved because referenced table {ref.table} is not a temp table or not found. Adding reference as non-temp table. This is normal." + ) + resolved_column_refs.append(ref) + return resolved_column_refs + + def _update_target_dataset_cll( + self, + temp_table_urn: str, + target_dataset_cll: List[sqlglot_l.ColumnLineageInfo], + source_dataset_cll: List[sqlglot_l.ColumnLineageInfo], + ) -> None: + for target_column_lineage in target_dataset_cll: + upstreams: List[sqlglot_l.ColumnRef] = [] + # Look for temp_table_urn in upstream of column_lineage, if found then we need to replace it with + # column of permanent table + for target_column_ref in target_column_lineage.upstreams: + if target_column_ref.table == temp_table_urn: + # Look for column_ref.table and column_ref.column in downstream of source_dataset_cll. + # The source_dataset_cll contains CLL generated from create statement of temp table (temp_table_urn) + for source_column_lineage in source_dataset_cll: + if ( + source_column_lineage.downstream.table + == target_column_ref.table + and source_column_lineage.downstream.column + == target_column_ref.column + ): + resolved_columns = self.resolve_column_refs( + source_column_lineage.upstreams + ) + # Add all upstream of above temporary column into upstream of target column + upstreams.extend(resolved_columns) + continue + + upstreams.append(target_column_ref) + + if upstreams: + # update the upstreams + target_column_lineage.upstreams = upstreams + + def _add_permanent_datasets_recursively( + self, + db_name: str, + temp_table_rows: List[TempTableRow], + visited_tables: Set[str], + connection: redshift_connector.Connection, + permanent_lineage_datasets: List[LineageDataset], + target_dataset_cll: Optional[List[sqlglot_l.ColumnLineageInfo]], + ) -> None: + transitive_temp_tables: List[TempTableRow] = [] + + for temp_table in temp_table_rows: + logger.debug( + f"Processing temp table with transaction id: {temp_table.transaction_id} and query text {temp_table.query_text}" + ) + + intermediate_l_datasets, cll = self._get_sources_from_query( + db_name=db_name, + query=temp_table.query_text, + parsed_result=temp_table.parsed_result, + ) + + if ( + temp_table.urn is not None + and target_dataset_cll is not None + and cll is not None + ): # condition to silent the lint + self._update_target_dataset_cll( + temp_table_urn=temp_table.urn, + target_dataset_cll=target_dataset_cll, + source_dataset_cll=cll, + ) + + # make sure lineage dataset should not contain a temp table + # if such dataset is present then add it to transitive_temp_tables to resolve it to original permanent table + for lineage_dataset in intermediate_l_datasets: + db, schema, table = split_qualified_table_name(lineage_dataset.urn) + + if table in visited_tables: + # The table is already processed + continue + + # Check if table found is again a temp table + repeated_temp_table: List[TempTableRow] = self.find_temp_tables( + temp_table_rows=list(self.temp_tables.values()), + temp_table_names=[table], + ) + + if not repeated_temp_table: + logger.debug(f"Unable to find table {table} in temp tables.") + + if repeated_temp_table: + transitive_temp_tables.extend(repeated_temp_table) + visited_tables.add(table) + continue + + permanent_lineage_datasets.append(lineage_dataset) + + if transitive_temp_tables: + # recursive call + self._add_permanent_datasets_recursively( + db_name=db_name, + temp_table_rows=transitive_temp_tables, + visited_tables=visited_tables, + connection=connection, + permanent_lineage_datasets=permanent_lineage_datasets, + target_dataset_cll=target_dataset_cll, + ) + + def update_table_and_column_lineage( + self, + db_name: str, + temp_table_names: List[str], + connection: redshift_connector.Connection, + target_source_dataset: List[LineageDataset], + target_dataset_cll: Optional[List[sqlglot_l.ColumnLineageInfo]], + ) -> int: + permanent_lineage_datasets: List[LineageDataset] = [] + + temp_table_rows: List[TempTableRow] = self.find_temp_tables( + temp_table_rows=list(self.temp_tables.values()), + temp_table_names=temp_table_names, + ) + + visited_tables: Set[str] = set(temp_table_names) + + self._add_permanent_datasets_recursively( + db_name=db_name, + temp_table_rows=temp_table_rows, + visited_tables=visited_tables, + connection=connection, + permanent_lineage_datasets=permanent_lineage_datasets, + target_dataset_cll=target_dataset_cll, + ) + + target_source_dataset.extend(permanent_lineage_datasets) + + return len(permanent_lineage_datasets) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py index 92e36fffd6bb41..93beb5980ea624 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/query.py @@ -1,9 +1,14 @@ from datetime import datetime +from typing import List redshift_datetime_format = "%Y-%m-%d %H:%M:%S" class RedshiftQuery: + CREATE_TEMP_TABLE_CLAUSE = "create temp table" + CREATE_TEMPORARY_TABLE_CLAUSE = "create temporary table" + CREATE_TABLE_CLAUSE = "create table" + list_databases: str = """SELECT datname FROM pg_database WHERE (datname <> ('padb_harvest')::name) AND (datname <> ('template0')::name) @@ -97,7 +102,7 @@ class RedshiftQuery: NULL as table_description FROM pg_catalog.svv_external_tables ORDER BY "schema", - "relname"; + "relname" """ list_columns: str = """ SELECT @@ -379,7 +384,8 @@ def list_insert_create_queries_sql( target_schema, target_table, username, - querytxt as ddl + query as query_id, + LISTAGG(CASE WHEN LEN(RTRIM(querytxt)) = 0 THEN querytxt ELSE RTRIM(querytxt) END) WITHIN GROUP (ORDER BY sequence) as ddl from ( select @@ -388,7 +394,9 @@ def list_insert_create_queries_sql( sti.table as target_table, sti.database as cluster, usename as username, - querytxt, + text as querytxt, + sq.query, + sequence, si.starttime as starttime from stl_insert as si @@ -396,19 +404,20 @@ def list_insert_create_queries_sql( sti.table_id = tbl left join svl_user_info sui on si.userid = sui.usesysid - left join stl_query sq on + left join STL_QUERYTEXT sq on si.query = sq.query left join stl_load_commits slc on slc.query = si.query where sui.usename <> 'rdsdb' - and sq.aborted = 0 and slc.query IS NULL and cluster = '{db_name}' and si.starttime >= '{start_time}' and si.starttime < '{end_time}' + and sequence < 320 ) as target_tables - order by cluster, target_schema, target_table, starttime asc + group by cluster, query_id, target_schema, target_table, username, starttime + order by cluster, query_id, target_schema, target_table, starttime asc """.format( # We need the original database name for filtering db_name=db_name, @@ -443,3 +452,118 @@ def list_copy_commands_sql( start_time=start_time.strftime(redshift_datetime_format), end_time=end_time.strftime(redshift_datetime_format), ) + + @staticmethod + def get_temp_table_clause(table_name: str) -> List[str]: + return [ + f"{RedshiftQuery.CREATE_TABLE_CLAUSE} {table_name}", + f"{RedshiftQuery.CREATE_TEMP_TABLE_CLAUSE} {table_name}", + f"{RedshiftQuery.CREATE_TEMPORARY_TABLE_CLAUSE} {table_name}", + ] + + @staticmethod + def temp_table_ddl_query(start_time: datetime, end_time: datetime) -> str: + start_time_str: str = start_time.strftime(redshift_datetime_format) + + end_time_str: str = end_time.strftime(redshift_datetime_format) + + return rf"""-- DataHub Redshift Source temp table DDL query + select + * + from + ( + select + session_id, + transaction_id, + start_time, + userid, + REGEXP_REPLACE(REGEXP_SUBSTR(REGEXP_REPLACE(query_text,'\\\\n','\\n'), '(CREATE(?:[\\n\\s\\t]+(?:temp|temporary))?(?:[\\n\\s\\t]+)table(?:[\\n\\s\\t]+)[^\\n\\s\\t()-]+)', 0, 1, 'ipe'),'[\\n\\s\\t]+',' ',1,'p') as create_command, + query_text, + row_number() over ( + partition by TRIM(query_text) + order by start_time desc + ) rn + from + ( + select + pid as session_id, + xid as transaction_id, + starttime as start_time, + type, + query_text, + userid + from + ( + select + starttime, + pid, + xid, + type, + userid, + LISTAGG(case + when LEN(RTRIM(text)) = 0 then text + else RTRIM(text) + end, + '') within group ( + order by sequence + ) as query_text + from + SVL_STATEMENTTEXT + where + type in ('DDL', 'QUERY') + AND starttime >= '{start_time_str}' + AND starttime < '{end_time_str}' + -- See https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext + AND sequence < 320 + group by + starttime, + pid, + xid, + type, + userid + order by + starttime, + pid, + xid, + type, + userid + asc) + where + type in ('DDL', 'QUERY') + ) + where + (create_command ilike 'create temp table %' + or create_command ilike 'create temporary table %' + -- we want to get all the create table statements and not just temp tables if non temp table is created and dropped in the same transaction + or create_command ilike 'create table %') + -- Redshift creates temp tables with the following names: volt_tt_%. We need to filter them out. + and query_text not ilike 'CREATE TEMP TABLE volt_tt_%' + and create_command not like 'CREATE TEMP TABLE volt_tt_' + -- We need to filter out our query and it was not possible earlier when we did not have any comment in the query + and query_text not ilike '%https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl-statementtext%' + + ) + where + rn = 1; + """ + + @staticmethod + def alter_table_rename_query( + db_name: str, start_time: datetime, end_time: datetime + ) -> str: + start_time_str: str = start_time.strftime(redshift_datetime_format) + end_time_str: str = end_time.strftime(redshift_datetime_format) + + return f""" + SELECT transaction_id, + session_id, + start_time, + query_text + FROM sys_query_history SYS + WHERE SYS.status = 'success' + AND SYS.query_type = 'DDL' + AND SYS.database_name = '{db_name}' + AND SYS.start_time >= '{start_time_str}' + AND SYS.end_time < '{end_time_str}' + AND SYS.query_text ILIKE 'alter table % rename to %' + """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py index ca81682ae00e49..0ea073c050502c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift_schema.py @@ -9,6 +9,7 @@ from datahub.ingestion.source.sql.sql_generic import BaseColumn, BaseTable from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column +from datahub.utilities.sqlglot_lineage import SqlParsingResult logger: logging.Logger = logging.getLogger(__name__) @@ -80,6 +81,26 @@ class LineageRow: filename: Optional[str] +@dataclass +class TempTableRow: + transaction_id: int + session_id: str + query_text: str + create_command: str + start_time: datetime + urn: Optional[str] + parsed_result: Optional[SqlParsingResult] = None + + +@dataclass +class AlterTableRow: + # TODO unify this type with TempTableRow + transaction_id: int + session_id: str + query_text: str + start_time: datetime + + # this is a class to be a proxy to query Redshift class RedshiftDataDictionary: @staticmethod @@ -359,9 +380,62 @@ def get_lineage_rows( target_table=row[field_names.index("target_table")] if "target_table" in field_names else None, - ddl=row[field_names.index("ddl")] if "ddl" in field_names else None, + # See https://docs.aws.amazon.com/redshift/latest/dg/r_STL_QUERYTEXT.html + # for why we need to remove the \\n. + ddl=row[field_names.index("ddl")].replace("\\n", "\n") + if "ddl" in field_names + else None, filename=row[field_names.index("filename")] if "filename" in field_names else None, ) rows = cursor.fetchmany() + + @staticmethod + def get_temporary_rows( + conn: redshift_connector.Connection, + query: str, + ) -> Iterable[TempTableRow]: + cursor = conn.cursor() + + cursor.execute(query) + + field_names = [i[0] for i in cursor.description] + + rows = cursor.fetchmany() + while rows: + for row in rows: + yield TempTableRow( + transaction_id=row[field_names.index("transaction_id")], + session_id=row[field_names.index("session_id")], + # See https://docs.aws.amazon.com/redshift/latest/dg/r_STL_QUERYTEXT.html + # for why we need to replace the \n with a newline. + query_text=row[field_names.index("query_text")].replace( + r"\n", "\n" + ), + create_command=row[field_names.index("create_command")], + start_time=row[field_names.index("start_time")], + urn=None, + ) + rows = cursor.fetchmany() + + @staticmethod + def get_alter_table_commands( + conn: redshift_connector.Connection, + query: str, + ) -> Iterable[AlterTableRow]: + # TODO: unify this with get_temporary_rows + cursor = RedshiftDataDictionary.get_query_result(conn, query) + + field_names = [i[0] for i in cursor.description] + + rows = cursor.fetchmany() + while rows: + for row in rows: + yield AlterTableRow( + transaction_id=row[field_names.index("transaction_id")], + session_id=row[field_names.index("session_id")], + query_text=row[field_names.index("query_text")], + start_time=row[field_names.index("start_time")], + ) + rows = cursor.fetchmany() diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py index 333c851650fb3a..36ac7955f15d51 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py @@ -35,6 +35,7 @@ class RedshiftReport(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowRep num_lineage_tables_dropped: int = 0 num_lineage_dropped_query_parser: int = 0 num_lineage_dropped_not_support_copy_path: int = 0 + num_lineage_processed_temp_tables = 0 lineage_start_time: Optional[datetime] = None lineage_end_time: Optional[datetime] = None @@ -43,6 +44,7 @@ class RedshiftReport(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowRep usage_start_time: Optional[datetime] = None usage_end_time: Optional[datetime] = None stateful_usage_ingestion_enabled: bool = False + num_unresolved_temp_columns: int = 0 def report_dropped(self, key: str) -> None: self.filtered.append(key) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index b896df1fa340e3..aad4a6ed27cb89 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -140,7 +140,9 @@ class SnowflakeV2Config( # This is required since access_history table does not capture whether the table was temporary table. temporary_tables_pattern: List[str] = Field( default=DEFAULT_TABLES_DENY_LIST, - description="[Advanced] Regex patterns for temporary tables to filter in lineage ingestion. Specify regex to match the entire table name in database.schema.table format. Defaults are to set in such a way to ignore the temporary staging tables created by known ETL tools.", + description="[Advanced] Regex patterns for temporary tables to filter in lineage ingestion. Specify regex to " + "match the entire table name in database.schema.table format. Defaults are to set in such a way " + "to ignore the temporary staging tables created by known ETL tools.", ) rename_upstreams_deny_pattern_to_temporary_table_pattern = pydantic_renamed_field( @@ -150,13 +152,16 @@ class SnowflakeV2Config( shares: Optional[Dict[str, SnowflakeShareConfig]] = Field( default=None, description="Required if current account owns or consumes snowflake share." - " If specified, connector creates lineage and siblings relationship between current account's database tables and consumer/producer account's database tables." + "If specified, connector creates lineage and siblings relationship between current account's database tables " + "and consumer/producer account's database tables." " Map of share name -> details of share.", ) email_as_user_identifier: bool = Field( default=True, - description="Format user urns as an email, if the snowflake user's email is set. If `email_domain` is provided, generates email addresses for snowflake users with unset emails, based on their username.", + description="Format user urns as an email, if the snowflake user's email is set. If `email_domain` is " + "provided, generates email addresses for snowflake users with unset emails, based on their " + "username.", ) @validator("convert_urns_to_lowercase") diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py index e8e25a061a665e..fb776ca8d23281 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py @@ -20,8 +20,8 @@ def _update_work_unit_id( envelope: RecordEnvelope, urn: str, aspect_name: str ) -> Dict[Any, Any]: - structured_urn = Urn.create_from_string(urn) - simple_name = "-".join(structured_urn.get_entity_id()) + structured_urn = Urn.from_string(urn) + simple_name = "-".join(structured_urn.entity_ids) record_metadata = envelope.metadata.copy() record_metadata.update({"workunit_id": f"txform-{simple_name}-{aspect_name}"}) return record_metadata diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py index 76f883b629d505..6266e0bca6c614 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py @@ -1,11 +1,14 @@ +import logging import re from functools import lru_cache -from typing import List, Optional, cast +from typing import List, Optional, Sequence, Union, cast from datahub.configuration.common import TransformerSemanticsConfigModel from datahub.emitter.mce_builder import Aspect +from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.transformer.dataset_transformer import DatasetTagsTransformer +from datahub.metadata._schema_classes import MetadataChangeProposalClass from datahub.metadata.schema_classes import ( GlobalTagsClass, OwnerClass, @@ -16,6 +19,8 @@ from datahub.utilities.urns.corpuser_urn import CorpuserUrn from datahub.utilities.urns.tag_urn import TagUrn +logger = logging.getLogger(__name__) + class ExtractOwnersFromTagsConfig(TransformerSemanticsConfigModel): tag_prefix: str @@ -38,11 +43,13 @@ class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer): ctx: PipelineContext config: ExtractOwnersFromTagsConfig + owner_mcps: List[MetadataChangeProposalWrapper] def __init__(self, config: ExtractOwnersFromTagsConfig, ctx: PipelineContext): super().__init__() self.ctx = ctx self.config = config + self.owner_mcps = [] @classmethod def create( @@ -56,6 +63,12 @@ def get_owner_urn(self, owner_str: str) -> str: return owner_str + "@" + self.config.email_domain return owner_str + def handle_end_of_stream( + self, + ) -> Sequence[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]: + + return self.owner_mcps + def transform_aspect( self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect] ) -> Optional[Aspect]: @@ -64,28 +77,39 @@ def transform_aspect( return None tags = in_tags_aspect.tags owners: List[OwnerClass] = [] + for tag_class in tags: tag_urn = TagUrn.from_string(tag_class.tag) - tag_str = tag_urn.get_entity_id()[0] + tag_str = tag_urn.entity_ids[0] re_match = re.search(self.config.tag_prefix, tag_str) if re_match: owner_str = tag_str[re_match.end() :].strip() owner_urn_str = self.get_owner_urn(owner_str) if self.config.is_user: - owner_urn = str(CorpuserUrn.create_from_id(owner_urn_str)) + owner_urn = str(CorpuserUrn(owner_urn_str)) else: - owner_urn = str(CorpGroupUrn.create_from_id(owner_urn_str)) + owner_urn = str(CorpGroupUrn(owner_urn_str)) owner_type = get_owner_type(self.config.owner_type) if owner_type == OwnershipTypeClass.CUSTOM: assert ( self.config.owner_type_urn is not None ), "owner_type_urn must be set if owner_type is CUSTOM" - owner = OwnerClass( - owner=owner_urn, - type=owner_type, - typeUrn=self.config.owner_type_urn, + + owners.append( + OwnerClass( + owner=owner_urn, + type=owner_type, + typeUrn=self.config.owner_type_urn, + ) ) - owners.append(owner) - owner_aspect = OwnershipClass(owners=owners) - return cast(Aspect, owner_aspect) + self.owner_mcps.append( + MetadataChangeProposalWrapper( + entityUrn=entity_urn, + aspect=OwnershipClass( + owners=owners, + ), + ) + ) + + return None diff --git a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py index 8b393a8f6f1c68..9890fea990c4ed 100644 --- a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py +++ b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py @@ -32,7 +32,7 @@ from sqlalchemy.engine.url import make_url import datahub.emitter.mce_builder as builder -from datahub.cli.cli_utils import get_boolean_env_variable +from datahub.cli.env_utils import get_boolean_env_variable from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.rest_emitter import DatahubRestEmitter from datahub.emitter.serialization_helper import pre_json_transform diff --git a/metadata-ingestion/src/datahub/specific/chart.py b/metadata-ingestion/src/datahub/specific/chart.py index 5dc394e8ebe0fb..47ea539491359d 100644 --- a/metadata-ingestion/src/datahub/specific/chart.py +++ b/metadata-ingestion/src/datahub/specific/chart.py @@ -40,7 +40,7 @@ def __init__( audit_header: The Kafka audit header of the chart (optional). """ super().__init__( - urn, "chart", system_metadata=system_metadata, audit_header=audit_header + urn, system_metadata=system_metadata, audit_header=audit_header ) self.custom_properties_patch_helper = CustomPropertiesPatchHelper( self, ChartInfo.ASPECT_NAME diff --git a/metadata-ingestion/src/datahub/specific/dashboard.py b/metadata-ingestion/src/datahub/specific/dashboard.py index 855dcc5685cea4..64f28a57cb1800 100644 --- a/metadata-ingestion/src/datahub/specific/dashboard.py +++ b/metadata-ingestion/src/datahub/specific/dashboard.py @@ -40,7 +40,7 @@ def __init__( audit_header: The Kafka audit header of the dashboard (optional). """ super().__init__( - urn, "dashboard", system_metadata=system_metadata, audit_header=audit_header + urn, system_metadata=system_metadata, audit_header=audit_header ) self.custom_properties_patch_helper = CustomPropertiesPatchHelper( self, DashboardInfo.ASPECT_NAME diff --git a/metadata-ingestion/src/datahub/specific/datajob.py b/metadata-ingestion/src/datahub/specific/datajob.py index 0338a1320c15b8..174749b3268bf2 100644 --- a/metadata-ingestion/src/datahub/specific/datajob.py +++ b/metadata-ingestion/src/datahub/specific/datajob.py @@ -41,7 +41,7 @@ def __init__( audit_header: The Kafka audit header of the data job (optional). """ super().__init__( - urn, "datajob", system_metadata=system_metadata, audit_header=audit_header + urn, system_metadata=system_metadata, audit_header=audit_header ) self.custom_properties_patch_helper = CustomPropertiesPatchHelper( self, DataJobInfo.ASPECT_NAME diff --git a/metadata-ingestion/src/datahub/specific/dataproduct.py b/metadata-ingestion/src/datahub/specific/dataproduct.py index 2c174e0c9a6cbb..c698c511fd9b58 100644 --- a/metadata-ingestion/src/datahub/specific/dataproduct.py +++ b/metadata-ingestion/src/datahub/specific/dataproduct.py @@ -30,7 +30,6 @@ def __init__( ) -> None: super().__init__( urn, - "dataProduct", system_metadata=system_metadata, audit_header=audit_header, ) diff --git a/metadata-ingestion/src/datahub/specific/dataset.py b/metadata-ingestion/src/datahub/specific/dataset.py index d3c3de36198e3f..c59cdb8ddfa382 100644 --- a/metadata-ingestion/src/datahub/specific/dataset.py +++ b/metadata-ingestion/src/datahub/specific/dataset.py @@ -98,7 +98,7 @@ def __init__( audit_header: Optional[KafkaAuditHeaderClass] = None, ) -> None: super().__init__( - urn, "dataset", system_metadata=system_metadata, audit_header=audit_header + urn, system_metadata=system_metadata, audit_header=audit_header ) self.custom_properties_patch_helper = CustomPropertiesPatchHelper( self, DatasetProperties.ASPECT_NAME diff --git a/metadata-ingestion/src/datahub/telemetry/telemetry.py b/metadata-ingestion/src/datahub/telemetry/telemetry.py index c399f2e1a27e55..a802125e76b4e1 100644 --- a/metadata-ingestion/src/datahub/telemetry/telemetry.py +++ b/metadata-ingestion/src/datahub/telemetry/telemetry.py @@ -13,7 +13,8 @@ from typing_extensions import ParamSpec import datahub as datahub_package -from datahub.cli.cli_utils import DATAHUB_ROOT_FOLDER, get_boolean_env_variable +from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER +from datahub.cli.env_utils import get_boolean_env_variable from datahub.configuration.common import ExceptionWithProps from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import _custom_package_path diff --git a/metadata-ingestion/src/datahub/utilities/mapping.py b/metadata-ingestion/src/datahub/utilities/mapping.py index 00f7d370d16765..5d26c3af54d5ef 100644 --- a/metadata-ingestion/src/datahub/utilities/mapping.py +++ b/metadata-ingestion/src/datahub/utilities/mapping.py @@ -31,6 +31,23 @@ def _get_best_match(the_match: Match, group_name: str) -> str: return the_match.group(0) +def _make_owner_category_list( + owner_type: OwnerType, + owner_category: Any, + owner_category_urn: Optional[str], + owner_ids: List[str], +) -> List[Dict]: + + return [ + { + "urn": mce_builder.make_owner_urn(owner_id, owner_type), + "category": owner_category, + "categoryUrn": owner_category_urn, + } + for owner_id in owner_ids + ] + + _match_regexp = re.compile(r"{{\s*\$match\s*}}", flags=re.MULTILINE) @@ -149,13 +166,26 @@ def process(self, raw_props: Mapping[str, Any]) -> Dict[str, Any]: operation = self.get_operation_value( operation_key, operation_type, operation_config, maybe_match ) + if operation_type == Constants.ADD_TERMS_OPERATION: # add_terms operation is a special case where the operation value is a list of terms. # We want to aggregate these values with the add_term operation. operation_type = Constants.ADD_TERM_OPERATION if operation: - if isinstance(operation, (str, list)): + if ( + isinstance(operation, list) + and operation_type == Constants.ADD_OWNER_OPERATION + ): + operation_value_list = operations_map.get( + operation_type, list() + ) + cast(List, operation_value_list).extend( + operation + ) # cast to silent the lint + operations_map[operation_type] = operation_value_list + + elif isinstance(operation, (str, list)): operations_value_set = operations_map.get( operation_type, set() ) @@ -184,8 +214,11 @@ def convert_to_aspects( tag_aspect = mce_builder.make_global_tag_aspect_with_tag_list( sorted(operation_map[Constants.ADD_TAG_OPERATION]) ) + aspect_map[Constants.ADD_TAG_OPERATION] = tag_aspect + if Constants.ADD_OWNER_OPERATION in operation_map: + owner_aspect = OwnershipClass( owners=[ OwnerClass( @@ -202,6 +235,7 @@ def convert_to_aspects( ) ] ) + aspect_map[Constants.ADD_OWNER_OPERATION] = owner_aspect if Constants.ADD_TERM_OPERATION in operation_map: @@ -262,7 +296,7 @@ def get_operation_value( operation_type: str, operation_config: Dict, match: Match, - ) -> Optional[Union[str, Dict, List[str]]]: + ) -> Optional[Union[str, Dict, List[str], List[Dict]]]: if ( operation_type == Constants.ADD_TAG_OPERATION and operation_config[Constants.TAG] @@ -278,30 +312,39 @@ def get_operation_value( and operation_config[Constants.OWNER_TYPE] ): owner_id = _get_best_match(match, "owner") + + owner_ids: List[str] = [_id.strip() for _id in owner_id.split(",")] + owner_category = ( operation_config.get(Constants.OWNER_CATEGORY) or OwnershipTypeClass.DATAOWNER ) - owner_category_urn = None + owner_category_urn: Optional[str] = None if owner_category.startswith("urn:li:"): owner_category_urn = owner_category owner_category = OwnershipTypeClass.DATAOWNER else: owner_category = owner_category.upper() + if self.strip_owner_email_id: - owner_id = self.sanitize_owner_ids(owner_id) - if operation_config[Constants.OWNER_TYPE] == Constants.USER_OWNER: - return { - "urn": mce_builder.make_owner_urn(owner_id, OwnerType.USER), - "category": owner_category, - "categoryUrn": owner_category_urn, - } - elif operation_config[Constants.OWNER_TYPE] == Constants.GROUP_OWNER: - return { - "urn": mce_builder.make_owner_urn(owner_id, OwnerType.GROUP), - "category": owner_category, - "categoryUrn": owner_category_urn, - } + owner_ids = [ + self.sanitize_owner_ids(owner_id) for owner_id in owner_ids + ] + + owner_type_mapping: Dict[str, OwnerType] = { + Constants.USER_OWNER: OwnerType.USER, + Constants.GROUP_OWNER: OwnerType.GROUP, + } + if operation_config[Constants.OWNER_TYPE] in owner_type_mapping: + return _make_owner_category_list( + owner_ids=owner_ids, + owner_category=owner_category, + owner_category_urn=owner_category_urn, + owner_type=owner_type_mapping[ + operation_config[Constants.OWNER_TYPE] + ], + ) + elif ( operation_type == Constants.ADD_TERM_OPERATION and operation_config[Constants.TERM] diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index abe4f826737778..5b063451df9cf4 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -1037,6 +1037,14 @@ def _sqlglot_lineage_inner( default_db = default_db.upper() if default_schema: default_schema = default_schema.upper() + if _is_dialect_instance(dialect, "redshift") and not default_schema: + # On Redshift, there's no "USE SCHEMA " command. The default schema + # is public, and "current schema" is the one at the front of the search path. + # See https://docs.aws.amazon.com/redshift/latest/dg/r_search_path.html + # and https://stackoverflow.com/questions/9067335/how-does-the-search-path-influence-identifier-resolution-and-the-current-schema?noredirect=1&lq=1 + # default_schema = "public" + # TODO: Re-enable this. + pass logger.debug("Parsing lineage from sql statement: %s", sql) statement = _parse_statement(sql, dialect=dialect) diff --git a/metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_empty_connection_user_golden.json b/metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_empty_connection_user_golden.json new file mode 100644 index 00000000000000..65f8620d0b4c42 --- /dev/null +++ b/metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_empty_connection_user_golden.json @@ -0,0 +1,618 @@ +[ +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)", + "changeType": "UPSERT", + "aspectName": "dataFlowInfo", + "aspect": { + "json": { + "customProperties": {}, + "name": "postgres" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:fivetran" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "paused": "False", + "sync_frequency": "1440", + "destination_id": "'interval_unconstitutional'" + }, + "name": "postgres", + "type": { + "string": "COMMAND" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD)" + ], + "inputDatajobs": [], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD),name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD),name)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:fivetran" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "4c9a03d6-eded-4422-a46a-163266e58243", + "type": "BATCH_SCHEDULED", + "created": { + "time": 1695191853000, + "actor": "urn:li:corpuser:datahub" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "upstreamInstances": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1695191853000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "STARTED" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1695191885000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResultType": "fivetran" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "f773d1e9-c791-48f4-894f-8cf9b3dfc834", + "type": "BATCH_SCHEDULED", + "created": { + "time": 1696343730000, + "actor": "urn:li:corpuser:datahub" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "upstreamInstances": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1696343730000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "STARTED" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1696343732000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "COMPLETE", + "result": { + "type": "SKIPPED", + "nativeResultType": "fivetran" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "63c2fc85-600b-455f-9ba0-f576522465be", + "type": "BATCH_SCHEDULED", + "created": { + "time": 1696343755000, + "actor": "urn:li:corpuser:datahub" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "upstreamInstances": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.employee,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,test_database.postgres_public.company,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1696343755000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "STARTED" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1696343790000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "COMPLETE", + "result": { + "type": "FAILURE", + "nativeResultType": "fivetran" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/fivetran/test_fivetran.py b/metadata-ingestion/tests/integration/fivetran/test_fivetran.py index 22dbd58acf1e51..a757975b2e1379 100644 --- a/metadata-ingestion/tests/integration/fivetran/test_fivetran.py +++ b/metadata-ingestion/tests/integration/fivetran/test_fivetran.py @@ -1,4 +1,5 @@ import datetime +from functools import partial from unittest import mock from unittest.mock import MagicMock @@ -18,24 +19,28 @@ FROZEN_TIME = "2022-06-07 17:00:00" +default_connector_query_results = [ + { + "connector_id": "calendar_elected", + "connecting_user_id": "reapply_phone", + "connector_type_id": "postgres", + "connector_name": "postgres", + "paused": False, + "sync_frequency": 1440, + "destination_id": "interval_unconstitutional", + }, +] -def default_query_results(query): + +def default_query_results( + query, connector_query_results=default_connector_query_results +): fivetran_log_query = FivetranLogQuery() fivetran_log_query.set_db("test") if query == fivetran_log_query.use_database("test_database"): return [] elif query == fivetran_log_query.get_connectors_query(): - return [ - { - "connector_id": "calendar_elected", - "connecting_user_id": "reapply_phone", - "connector_type_id": "postgres", - "connector_name": "postgres", - "paused": False, - "sync_frequency": 1440, - "destination_id": "interval_unconstitutional", - }, - ] + return connector_query_results elif query == fivetran_log_query.get_table_lineage_query("calendar_elected"): return [ { @@ -182,6 +187,92 @@ def test_fivetran_with_snowflake_dest(pytestconfig, tmp_path): ) +@freeze_time(FROZEN_TIME) +@pytest.mark.integration +def test_fivetran_with_snowflake_dest_and_null_connector_user(pytestconfig, tmp_path): + test_resources_dir = pytestconfig.rootpath / "tests/integration/fivetran" + + # Run the metadata ingestion pipeline. + output_file = tmp_path / "fivetran_test_events.json" + golden_file = ( + test_resources_dir / "fivetran_snowflake_empty_connection_user_golden.json" + ) + + with mock.patch( + "datahub.ingestion.source.fivetran.fivetran_log_api.create_engine" + ) as mock_create_engine: + connection_magic_mock = MagicMock() + + connector_query_results = [ + { + "connector_id": "calendar_elected", + "connecting_user_id": None, + "connector_type_id": "postgres", + "connector_name": "postgres", + "paused": False, + "sync_frequency": 1440, + "destination_id": "interval_unconstitutional", + }, + ] + + connection_magic_mock.execute.side_effect = partial( + default_query_results, connector_query_results=connector_query_results + ) + + mock_create_engine.return_value = connection_magic_mock + + pipeline = Pipeline.create( + { + "run_id": "powerbi-test", + "source": { + "type": "fivetran", + "config": { + "fivetran_log_config": { + "destination_platform": "snowflake", + "snowflake_destination_config": { + "account_id": "testid", + "warehouse": "test_wh", + "username": "test", + "password": "test@123", + "database": "test_database", + "role": "testrole", + "log_schema": "test", + }, + }, + "connector_patterns": { + "allow": [ + "postgres", + ] + }, + "sources_to_database": { + "calendar_elected": "postgres_db", + }, + "sources_to_platform_instance": { + "calendar_elected": { + "env": "DEV", + } + }, + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{output_file}", + }, + }, + } + ) + + pipeline.run() + pipeline.raise_from_status() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=f"{output_file}", + golden_path=f"{golden_file}", + ) + + @freeze_time(FROZEN_TIME) @pytest.mark.integration def test_fivetran_with_bigquery_dest(pytestconfig, tmp_path): diff --git a/metadata-ingestion/tests/test_helpers/docker_helpers.py b/metadata-ingestion/tests/test_helpers/docker_helpers.py index 2eb61068196a23..bacb8d80b9e721 100644 --- a/metadata-ingestion/tests/test_helpers/docker_helpers.py +++ b/metadata-ingestion/tests/test_helpers/docker_helpers.py @@ -2,7 +2,7 @@ import logging import os import subprocess -from typing import Callable, Optional, Union +from typing import Callable, Iterator, List, Optional, Union import pytest import pytest_docker.plugin @@ -37,9 +37,11 @@ def wait_for_port( docker_services.wait_until_responsive( timeout=timeout, pause=pause, - check=checker - if checker - else lambda: is_responsive(container_name, container_port, hostname), + check=( + checker + if checker + else lambda: is_responsive(container_name, container_port, hostname) + ), ) logger.info(f"Container {container_name} is ready!") finally: @@ -62,14 +64,16 @@ def docker_compose_runner( ): @contextlib.contextmanager def run( - compose_file_path: Union[str, list], key: str, cleanup: bool = True - ) -> pytest_docker.plugin.Services: + compose_file_path: Union[str, List[str]], key: str, cleanup: bool = True + ) -> Iterator[pytest_docker.plugin.Services]: with pytest_docker.plugin.get_docker_services( docker_compose_command=docker_compose_command, - docker_compose_file=compose_file_path, + # We can remove the type ignore once this is merged: + # https://github.com/avast/pytest-docker/pull/108 + docker_compose_file=compose_file_path, # type: ignore docker_compose_project_name=f"{docker_compose_project_name}-{key}", docker_setup=docker_setup, - docker_cleanup=docker_cleanup if cleanup else False, + docker_cleanup=docker_cleanup if cleanup else [], ) as docker_services: yield docker_services diff --git a/metadata-ingestion/tests/unit/api/entities/dataproducts/dataproduct_ownership_type_urn.yaml b/metadata-ingestion/tests/unit/api/entities/dataproducts/dataproduct_ownership_type_urn.yaml new file mode 100644 index 00000000000000..543f9db6abd225 --- /dev/null +++ b/metadata-ingestion/tests/unit/api/entities/dataproducts/dataproduct_ownership_type_urn.yaml @@ -0,0 +1,25 @@ +id: pet_of_the_week +domain: Marketing +display_name: Pet of the Week Campaign +description: |- + This campaign includes Pet of the Week data. + +assets: + - urn:li:container:DATABASE + - urn:li:container:SCHEMA + - urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,test_feature_table_all_feature_dtypes) + +owners: + - id: urn:li:corpuser:jdoe + type: urn:li:ownershipType:abc +properties: + version: 2.0 + classification: pii + +tags: + - urn:li:tag:awesome + +terms: + - urn:li:glossaryTerm:ClientsAndAccounts.AccountBalance + +external_url: https://github.com/datahub-project/datahub diff --git a/metadata-ingestion/tests/unit/api/entities/dataproducts/dataproduct_ownership_type_urn_different_owner.yaml b/metadata-ingestion/tests/unit/api/entities/dataproducts/dataproduct_ownership_type_urn_different_owner.yaml new file mode 100644 index 00000000000000..fcc79bcbe5edb7 --- /dev/null +++ b/metadata-ingestion/tests/unit/api/entities/dataproducts/dataproduct_ownership_type_urn_different_owner.yaml @@ -0,0 +1,28 @@ +id: pet_of_the_week +domain: Marketing +display_name: Pet of the Week Campaign +description: |- + This campaign includes Pet of the Week data. + +assets: + - urn:li:container:DATABASE + - urn:li:container:SCHEMA + - urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,test_feature_table_all_feature_dtypes) + +owners: + - id: urn:li:corpuser:jsmith + type: urn:li:ownershipType:abc + - id: urn:li:corpuser:jdoe + type: TECHNICAL_OWNER + +properties: + version: 2.0 + classification: pii + +tags: + - urn:li:tag:awesome + +terms: + - urn:li:glossaryTerm:ClientsAndAccounts.AccountBalance + +external_url: https://github.com/datahub-project/datahub diff --git a/metadata-ingestion/tests/unit/api/entities/dataproducts/golden_dataproduct_out_ownership_type_urn.json b/metadata-ingestion/tests/unit/api/entities/dataproducts/golden_dataproduct_out_ownership_type_urn.json new file mode 100644 index 00000000000000..35c46caa682db7 --- /dev/null +++ b/metadata-ingestion/tests/unit/api/entities/dataproducts/golden_dataproduct_out_ownership_type_urn.json @@ -0,0 +1,125 @@ +[ + { + "entityType": "dataProduct", + "entityUrn": "urn:li:dataProduct:pet_of_the_week", + "changeType": "UPSERT", + "aspectName": "dataProductProperties", + "aspect": { + "json": { + "customProperties": { + "version": "2.0", + "classification": "pii" + }, + "externalUrl": "https://github.com/datahub-project/datahub", + "name": "Pet of the Week Campaign", + "description": "This campaign includes Pet of the Week data.", + "assets": [ + { + "destinationUrn": "urn:li:container:DATABASE", + "created": { + "time": 1681455600000, + "actor": "urn:li:corpuser:datahub", + "message": "yaml" + } + }, + { + "destinationUrn": "urn:li:container:SCHEMA", + "created": { + "time": 1681455600000, + "actor": "urn:li:corpuser:datahub", + "message": "yaml" + } + }, + { + "destinationUrn": "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,test_feature_table_all_feature_dtypes)", + "created": { + "time": 1681455600000, + "actor": "urn:li:corpuser:datahub", + "message": "yaml" + } + } + ] + } + } + }, + { + "entityType": "dataProduct", + "entityUrn": "urn:li:dataProduct:pet_of_the_week", + "changeType": "UPSERT", + "aspectName": "domains", + "aspect": { + "json": { + "domains": [ + "urn:li:domain:12345" + ] + } + } + }, + { + "entityType": "dataProduct", + "entityUrn": "urn:li:dataProduct:pet_of_the_week", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:awesome" + } + ] + } + } + }, + { + "entityType": "dataProduct", + "entityUrn": "urn:li:dataProduct:pet_of_the_week", + "changeType": "UPSERT", + "aspectName": "glossaryTerms", + "aspect": { + "json": { + "terms": [ + { + "urn": "urn:li:glossaryTerm:ClientsAndAccounts.AccountBalance" + } + ], + "auditStamp": { + "time": 1681455600000, + "actor": "urn:li:corpuser:datahub", + "message": "yaml" + } + } + } + }, + { + "entityType": "dataProduct", + "entityUrn": "urn:li:dataProduct:pet_of_the_week", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:jdoe", + "type": "CUSTOM", + "typeUrn": "urn:li:ownershipType:abc" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + }, + { + "entityType": "dataProduct", + "entityUrn": "urn:li:dataProduct:pet_of_the_week", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } + } +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/api/entities/dataproducts/test_dataproduct.py b/metadata-ingestion/tests/unit/api/entities/dataproducts/test_dataproduct.py index 9900eb1e7f2838..e796f0b3f37219 100644 --- a/metadata-ingestion/tests/unit/api/entities/dataproducts/test_dataproduct.py +++ b/metadata-ingestion/tests/unit/api/entities/dataproducts/test_dataproduct.py @@ -153,3 +153,53 @@ def test_dataproduct_patch_yaml( ) is False ) + + +@freeze_time(FROZEN_TIME) +def test_dataproduct_ownership_type_urn_from_yaml( + pytestconfig: pytest.Config, + tmp_path: Path, + test_resources_dir: Path, + base_mock_graph: MockDataHubGraph, +) -> None: + data_product_file = test_resources_dir / "dataproduct_ownership_type_urn.yaml" + mock_graph = base_mock_graph + data_product = DataProduct.from_yaml(data_product_file, mock_graph) + + for mcp in data_product.generate_mcp(upsert=False): + mock_graph.emit(mcp) + + output_file = tmp_path / "test_dataproduct_out.json" + mock_graph.sink_to_file(output_file) + golden_file = test_resources_dir / "golden_dataproduct_out_ownership_type_urn.json" + check_golden_file(pytestconfig, output_file, golden_file) + + +@freeze_time(FROZEN_TIME) +def test_dataproduct_ownership_type_urn_patch_yaml( + tmp_path: Path, test_resources_dir: Path, base_mock_graph: MockDataHubGraph +) -> None: + mock_graph = base_mock_graph + source_file = test_resources_dir / "golden_dataproduct_out_ownership_type_urn.json" + mock_graph.import_file(source_file) + + data_product_file = ( + test_resources_dir / "dataproduct_ownership_type_urn_different_owner.yaml" + ) + original_data_product: DataProduct = DataProduct.from_yaml( + data_product_file, mock_graph + ) + + data_product: DataProduct = DataProduct.from_datahub( + mock_graph, id="urn:li:dataProduct:pet_of_the_week" + ) + + dataproduct_output_file = ( + tmp_path / "patch_dataproduct_ownership_type_urn_different_owner.yaml" + ) + data_product.patch_yaml(original_data_product, dataproduct_output_file) + + assert not check_yaml_golden_file( + str(dataproduct_output_file), + str(test_resources_dir / "dataproduct_ownership_type_urn.yaml"), + ) diff --git a/metadata-ingestion/tests/unit/redshift_query_mocker.py b/metadata-ingestion/tests/unit/redshift_query_mocker.py new file mode 100644 index 00000000000000..631e6e7ceaf1f6 --- /dev/null +++ b/metadata-ingestion/tests/unit/redshift_query_mocker.py @@ -0,0 +1,104 @@ +from datetime import datetime +from unittest.mock import MagicMock + + +def mock_temp_table_cursor(cursor: MagicMock) -> None: + cursor.description = [ + ["transaction_id"], + ["session_id"], + ["query_text"], + ["create_command"], + ["start_time"], + ] + + cursor.fetchmany.side_effect = [ + [ + ( + 126, + "abc", + "CREATE TABLE #player_price distkey(player_id) AS SELECT player_id, SUM(price) AS " + "price_usd from player_activity group by player_id", + "CREATE TABLE #player_price", + datetime.now(), + ) + ], + [ + # Empty result to stop the while loop + ], + ] + + +def mock_stl_insert_table_cursor(cursor: MagicMock) -> None: + cursor.description = [ + ["source_schema"], + ["source_table"], + ["target_schema"], + ["target_table"], + ["ddl"], + ] + + cursor.fetchmany.side_effect = [ + [ + ( + "public", + "#player_price", + "public", + "player_price_with_hike_v6", + "INSERT INTO player_price_with_hike_v6 SELECT (price_usd + 0.2 * price_usd) as price, '20%' FROM " + "#player_price", + ) + ], + [ + # Empty result to stop the while loop + ], + ] + + +query_vs_cursor_mocker = { + ( + "-- DataHub Redshift Source temp table DDL query\n select\n *\n " + "from\n (\n select\n session_id,\n " + " transaction_id,\n start_time,\n userid,\n " + " REGEXP_REPLACE(REGEXP_SUBSTR(REGEXP_REPLACE(query_text,'\\\\\\\\n','\\\\n'), '(CREATE(?:[" + "\\\\n\\\\s\\\\t]+(?:temp|temporary))?(?:[\\\\n\\\\s\\\\t]+)table(?:[\\\\n\\\\s\\\\t]+)[" + "^\\\\n\\\\s\\\\t()-]+)', 0, 1, 'ipe'),'[\\\\n\\\\s\\\\t]+',' ',1,'p') as create_command,\n " + " query_text,\n row_number() over (\n partition " + "by TRIM(query_text)\n order by start_time desc\n ) rn\n " + " from\n (\n select\n pid " + "as session_id,\n xid as transaction_id,\n starttime " + "as start_time,\n type,\n query_text,\n " + " userid\n from\n (\n " + "select\n starttime,\n pid,\n " + " xid,\n type,\n userid,\n " + " LISTAGG(case\n when LEN(RTRIM(text)) = 0 then text\n " + " else RTRIM(text)\n end,\n " + " '') within group (\n order by sequence\n " + " ) as query_text\n from\n " + "SVL_STATEMENTTEXT\n where\n type in ('DDL', " + "'QUERY')\n AND starttime >= '2024-01-01 12:00:00'\n " + " AND starttime < '2024-01-10 12:00:00'\n -- See " + "https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl" + "-statementtext\n AND sequence < 320\n group by\n " + " starttime,\n pid,\n " + "xid,\n type,\n userid\n " + " order by\n starttime,\n pid,\n " + " xid,\n type,\n userid\n " + " asc)\n where\n type in ('DDL', " + "'QUERY')\n )\n where\n (create_command ilike " + "'create temp table %'\n or create_command ilike 'create temporary table %'\n " + " -- we want to get all the create table statements and not just temp tables " + "if non temp table is created and dropped in the same transaction\n or " + "create_command ilike 'create table %')\n -- Redshift creates temp tables with " + "the following names: volt_tt_%. We need to filter them out.\n and query_text not " + "ilike 'CREATE TEMP TABLE volt_tt_%'\n and create_command not like 'CREATE TEMP " + "TABLE volt_tt_'\n -- We need to filter out our query and it was not possible " + "earlier when we did not have any comment in the query\n and query_text not ilike " + "'%https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl" + "-statementtext%'\n\n )\n where\n rn = 1;\n " + ): mock_temp_table_cursor, + "select * from test_collapse_temp_lineage": mock_stl_insert_table_cursor, +} + + +def mock_cursor(cursor: MagicMock, query: str) -> None: + query_vs_cursor_mocker[query](cursor=cursor) diff --git a/metadata-ingestion/tests/unit/test_cli_utils.py b/metadata-ingestion/tests/unit/test_cli_utils.py index cb0b7c734ee0ac..bc1826d422e38a 100644 --- a/metadata-ingestion/tests/unit/test_cli_utils.py +++ b/metadata-ingestion/tests/unit/test_cli_utils.py @@ -59,3 +59,24 @@ def test_correct_url_when_gms_host_port_url_protocol_set(): ) def test_correct_url_when_url_set(): assert cli_utils.get_details_from_env() == ("https://example.com", None) + + +def test_fixup_gms_url(): + assert cli_utils.fixup_gms_url("http://localhost:8080") == "http://localhost:8080" + assert cli_utils.fixup_gms_url("http://localhost:8080/") == "http://localhost:8080" + assert cli_utils.fixup_gms_url("http://abc.acryl.io") == "https://abc.acryl.io/gms" + + +def test_guess_frontend_url_from_gms_url(): + assert ( + cli_utils.guess_frontend_url_from_gms_url("http://localhost:8080") + == "http://localhost:9002" + ) + assert ( + cli_utils.guess_frontend_url_from_gms_url("http://localhost:8080/") + == "http://localhost:9002" + ) + assert ( + cli_utils.guess_frontend_url_from_gms_url("https://abc.acryl.io/gms") + == "https://abc.acryl.io" + ) diff --git a/metadata-ingestion/tests/unit/test_mapping.py b/metadata-ingestion/tests/unit/test_mapping.py index de35451c9ec4b5..755a62fa329123 100644 --- a/metadata-ingestion/tests/unit/test_mapping.py +++ b/metadata-ingestion/tests/unit/test_mapping.py @@ -25,6 +25,11 @@ def get_operation_defs() -> Dict[str, Any]: "operation": "add_owner", "config": {"owner_type": "user"}, }, + "multi_user": { + "match": ".*", + "operation": "add_owner", + "config": {"owner_type": "user"}, + }, "group.owner": { "match": ".*", "operation": "add_owner", @@ -78,6 +83,7 @@ def test_operation_processor_not_matching(): def test_operation_processor_matching(): raw_props = { "user_owner": "test_user@abc.com", + "multi_user": "sales_member1@abc.com, sales_member2@abc.com", "user_owner_2": "test_user_2", "group.owner": "test.group@abc.co.in", "governance.team_owner": "Finance", @@ -86,6 +92,7 @@ def test_operation_processor_matching(): "double_property": 2.5, "tag": "Finance", } + processor = OperationProcessor( operation_defs=get_operation_defs(), owner_source_type="SOURCE_CONTROL", @@ -116,11 +123,13 @@ def test_operation_processor_matching(): ) ownership_aspect: OwnershipClass = aspect_map["add_owner"] - assert len(ownership_aspect.owners) == 3 + assert len(ownership_aspect.owners) == 5 owner_set = { "urn:li:corpuser:test_user", "urn:li:corpuser:test_user_2", "urn:li:corpGroup:test.group", + "urn:li:corpuser:sales_member1", + "urn:li:corpuser:sales_member2", } for single_owner in ownership_aspect.owners: assert single_owner.owner in owner_set diff --git a/metadata-ingestion/tests/unit/test_metabase_source.py b/metadata-ingestion/tests/unit/test_metabase_source.py index 642752b8ca0e67..08c0ddb5036648 100644 --- a/metadata-ingestion/tests/unit/test_metabase_source.py +++ b/metadata-ingestion/tests/unit/test_metabase_source.py @@ -40,3 +40,12 @@ def test_get_platform_instance(): # database_id_to_instance_map is missing, platform_instance_map is defined and key missing assert metabase.get_platform_instance("missing-platform", 999) is None + + +def test_set_display_uri(): + display_uri = "some_host:1234" + + config = MetabaseConfig.parse_obj({"display_uri": display_uri}) + + assert config.connect_uri == "localhost:3000" + assert config.display_uri == display_uri diff --git a/metadata-ingestion/tests/unit/test_redshift_lineage.py b/metadata-ingestion/tests/unit/test_redshift_lineage.py index db5af3a71efb99..6a3e6e47bd96aa 100644 --- a/metadata-ingestion/tests/unit/test_redshift_lineage.py +++ b/metadata-ingestion/tests/unit/test_redshift_lineage.py @@ -1,8 +1,31 @@ +from datetime import datetime +from functools import partial +from typing import List +from unittest.mock import MagicMock + +import datahub.utilities.sqlglot_lineage as sqlglot_l from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.source.redshift.config import RedshiftConfig -from datahub.ingestion.source.redshift.lineage import RedshiftLineageExtractor +from datahub.ingestion.source.redshift.lineage import ( + LineageCollectorType, + LineageDataset, + LineageDatasetPlatform, + LineageItem, + RedshiftLineageExtractor, + parse_alter_table_rename, +) +from datahub.ingestion.source.redshift.redshift_schema import TempTableRow from datahub.ingestion.source.redshift.report import RedshiftReport -from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, DownstreamColumnRef +from datahub.metadata._schema_classes import NumberTypeClass, SchemaFieldDataTypeClass +from datahub.utilities.sqlglot_lineage import ( + ColumnLineageInfo, + DownstreamColumnRef, + QueryType, + SqlParsingDebugInfo, + SqlParsingResult, +) +from tests.unit.redshift_query_mocker import mock_cursor def test_get_sources_from_query(): @@ -120,16 +143,45 @@ def test_get_sources_from_query_with_only_table(): ) -def test_cll(): - config = RedshiftConfig(host_port="localhost:5439", database="test") +def test_parse_alter_table_rename(): + assert parse_alter_table_rename("public", "alter table foo rename to bar") == ( + "public", + "foo", + "bar", + ) + assert parse_alter_table_rename( + "public", "alter table second_schema.storage_v2_stg rename to storage_v2; " + ) == ( + "second_schema", + "storage_v2_stg", + "storage_v2", + ) + + +def get_lineage_extractor() -> RedshiftLineageExtractor: + config = RedshiftConfig( + host_port="localhost:5439", + database="test", + resolve_temp_table_in_lineage=True, + start_time=datetime(2024, 1, 1, 12, 0, 0).isoformat() + "Z", + end_time=datetime(2024, 1, 10, 12, 0, 0).isoformat() + "Z", + ) report = RedshiftReport() + lineage_extractor = RedshiftLineageExtractor( + config, report, PipelineContext(run_id="foo", graph=mock_graph()) + ) + + return lineage_extractor + + +def test_cll(): test_query = """ select a,b,c from db.public.customer inner join db.public.order on db.public.customer.id = db.public.order.customer_id """ - lineage_extractor = RedshiftLineageExtractor( - config, report, PipelineContext(run_id="foo") - ) + + lineage_extractor = get_lineage_extractor() + _, cll = lineage_extractor._get_sources_from_query(db_name="db", query=test_query) assert cll == [ @@ -149,3 +201,600 @@ def test_cll(): logic=None, ), ] + + +def cursor_execute_side_effect(cursor: MagicMock, query: str) -> None: + mock_cursor(cursor=cursor, query=query) + + +def mock_redshift_connection() -> MagicMock: + connection = MagicMock() + + cursor = MagicMock() + + connection.cursor.return_value = cursor + + cursor.execute.side_effect = partial(cursor_execute_side_effect, cursor) + + return connection + + +def mock_graph() -> DataHubGraph: + + graph = MagicMock() + + graph._make_schema_resolver.return_value = sqlglot_l.SchemaResolver( + platform="redshift", + env="PROD", + platform_instance=None, + graph=None, + ) + + return graph + + +def test_collapse_temp_lineage(): + lineage_extractor = get_lineage_extractor() + + connection: MagicMock = mock_redshift_connection() + + lineage_extractor._init_temp_table_schema( + database=lineage_extractor.config.database, + temp_tables=lineage_extractor.get_temp_tables(connection=connection), + ) + + lineage_extractor._populate_lineage_map( + query="select * from test_collapse_temp_lineage", + database=lineage_extractor.config.database, + all_tables_set={ + lineage_extractor.config.database: {"public": {"player_price_with_hike_v6"}} + }, + connection=connection, + lineage_type=LineageCollectorType.QUERY_SQL_PARSER, + ) + + print(lineage_extractor._lineage_map) + + target_urn: str = "urn:li:dataset:(urn:li:dataPlatform:redshift,test.public.player_price_with_hike_v6,PROD)" + + assert lineage_extractor._lineage_map.get(target_urn) is not None + + lineage_item: LineageItem = lineage_extractor._lineage_map[target_urn] + + assert list(lineage_item.upstreams)[0].urn == ( + "urn:li:dataset:(urn:li:dataPlatform:redshift," + "test.public.player_activity,PROD)" + ) + + assert lineage_item.cll is not None + + assert lineage_item.cll[0].downstream.table == ( + "urn:li:dataset:(urn:li:dataPlatform:redshift," + "test.public.player_price_with_hike_v6,PROD)" + ) + + assert lineage_item.cll[0].downstream.column == "price" + + assert lineage_item.cll[0].upstreams[0].table == ( + "urn:li:dataset:(urn:li:dataPlatform:redshift," + "test.public.player_activity,PROD)" + ) + + assert lineage_item.cll[0].upstreams[0].column == "price" + + +def test_collapse_temp_recursive_cll_lineage(): + lineage_extractor = get_lineage_extractor() + + temp_table: TempTableRow = TempTableRow( + transaction_id=126, + query_text="CREATE TABLE #player_price distkey(player_id) AS SELECT player_id, SUM(price_usd) AS price_usd " + "from #player_activity_temp group by player_id", + start_time=datetime.now(), + session_id="abc", + create_command="CREATE TABLE #player_price", + parsed_result=SqlParsingResult( + query_type=QueryType.CREATE, + in_tables=[ + "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)" + ], + out_tables=[ + "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)" + ], + debug_info=SqlParsingDebugInfo(), + column_lineage=[ + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + column="player_id", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="INTEGER", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="player_id", + ) + ], + logic=None, + ), + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + column="price_usd", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="BIGINT", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="price_usd", + ) + ], + logic=None, + ), + ], + ), + urn="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + ) + + temp_table_activity: TempTableRow = TempTableRow( + transaction_id=127, + query_text="CREATE TABLE #player_activity_temp SELECT player_id, SUM(price) AS price_usd " + "from player_activity", + start_time=datetime.now(), + session_id="abc", + create_command="CREATE TABLE #player_activity_temp", + parsed_result=SqlParsingResult( + query_type=QueryType.CREATE, + in_tables=[ + "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)" + ], + out_tables=[ + "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)" + ], + debug_info=SqlParsingDebugInfo(), + column_lineage=[ + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="player_id", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="INTEGER", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)", + column="player_id", + ) + ], + logic=None, + ), + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="price_usd", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="BIGINT", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)", + column="price", + ) + ], + logic=None, + ), + ], + ), + urn="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + ) + + assert temp_table.urn + assert temp_table_activity.urn + + lineage_extractor.temp_tables[temp_table.urn] = temp_table + lineage_extractor.temp_tables[temp_table_activity.urn] = temp_table_activity + + target_dataset_cll: List[sqlglot_l.ColumnLineageInfo] = [ + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_price_with_hike_v6,PROD)", + column="price", + column_type=SchemaFieldDataTypeClass(type=NumberTypeClass()), + native_column_type="DOUBLE PRECISION", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + column="price_usd", + ) + ], + logic=None, + ) + ] + + datasets = lineage_extractor._get_upstream_lineages( + sources=[ + LineageDataset( + platform=LineageDatasetPlatform.REDSHIFT, + urn="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + ) + ], + target_table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_price_with_hike_v4,PROD)", + raw_db_name="dev", + alias_db_name="dev", + all_tables_set={ + "dev": { + "public": set(), + } + }, + connection=MagicMock(), + target_dataset_cll=target_dataset_cll, + ) + + assert len(datasets) == 1 + + assert ( + datasets[0].urn + == "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)" + ) + + assert target_dataset_cll[0].upstreams[0].table == ( + "urn:li:dataset:(urn:li:dataPlatform:redshift," + "dev.public.player_activity,PROD)" + ) + assert target_dataset_cll[0].upstreams[0].column == "price" + + +def test_collapse_temp_recursive_with_compex_column_cll_lineage(): + lineage_extractor = get_lineage_extractor() + + temp_table: TempTableRow = TempTableRow( + transaction_id=126, + query_text="CREATE TABLE #player_price distkey(player_id) AS SELECT player_id, SUM(price+tax) AS price_usd " + "from #player_activity_temp group by player_id", + start_time=datetime.now(), + session_id="abc", + create_command="CREATE TABLE #player_price", + parsed_result=SqlParsingResult( + query_type=QueryType.CREATE, + in_tables=[ + "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)" + ], + out_tables=[ + "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)" + ], + debug_info=SqlParsingDebugInfo(), + column_lineage=[ + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + column="player_id", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="INTEGER", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="player_id", + ) + ], + logic=None, + ), + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + column="price_usd", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="BIGINT", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="price", + ), + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="tax", + ), + ], + logic=None, + ), + ], + ), + urn="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + ) + + temp_table_activity: TempTableRow = TempTableRow( + transaction_id=127, + query_text="CREATE TABLE #player_activity_temp SELECT player_id, price, tax " + "from player_activity", + start_time=datetime.now(), + session_id="abc", + create_command="CREATE TABLE #player_activity_temp", + parsed_result=SqlParsingResult( + query_type=QueryType.CREATE, + in_tables=[ + "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)" + ], + out_tables=[ + "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)" + ], + debug_info=SqlParsingDebugInfo(), + column_lineage=[ + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="player_id", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="INTEGER", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)", + column="player_id", + ) + ], + logic=None, + ), + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="price", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="BIGINT", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)", + column="price", + ) + ], + logic=None, + ), + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="tax", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="BIGINT", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)", + column="tax", + ) + ], + logic=None, + ), + ], + ), + urn="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + ) + assert temp_table.urn + assert temp_table_activity.urn + + lineage_extractor.temp_tables[temp_table.urn] = temp_table + lineage_extractor.temp_tables[temp_table_activity.urn] = temp_table_activity + + target_dataset_cll: List[sqlglot_l.ColumnLineageInfo] = [ + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_price_with_hike_v6,PROD)", + column="price", + column_type=SchemaFieldDataTypeClass(type=NumberTypeClass()), + native_column_type="DOUBLE PRECISION", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + column="price_usd", + ) + ], + logic=None, + ), + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_price_with_hike_v6,PROD)", + column="player_id", + column_type=SchemaFieldDataTypeClass(type=NumberTypeClass()), + native_column_type="BIGINT", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + column="player_id", + ) + ], + logic=None, + ), + ] + + datasets = lineage_extractor._get_upstream_lineages( + sources=[ + LineageDataset( + platform=LineageDatasetPlatform.REDSHIFT, + urn="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + ) + ], + target_table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_price_with_hike_v4,PROD)", + raw_db_name="dev", + alias_db_name="dev", + all_tables_set={ + "dev": { + "public": set(), + } + }, + connection=MagicMock(), + target_dataset_cll=target_dataset_cll, + ) + + assert len(datasets) == 1 + + assert ( + datasets[0].urn + == "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)" + ) + + assert target_dataset_cll[0].upstreams[0].table == ( + "urn:li:dataset:(urn:li:dataPlatform:redshift," + "dev.public.player_activity,PROD)" + ) + assert target_dataset_cll[0].upstreams[0].column == "price" + assert target_dataset_cll[0].upstreams[1].column == "tax" + assert target_dataset_cll[1].upstreams[0].column == "player_id" + + +def test_collapse_temp_recursive_cll_lineage_with_circular_reference(): + lineage_extractor = get_lineage_extractor() + + temp_table: TempTableRow = TempTableRow( + transaction_id=126, + query_text="CREATE TABLE #player_price distkey(player_id) AS SELECT player_id, SUM(price_usd) AS price_usd " + "from #player_activity_temp group by player_id", + start_time=datetime.now(), + session_id="abc", + create_command="CREATE TABLE #player_price", + parsed_result=SqlParsingResult( + query_type=QueryType.CREATE, + in_tables=[ + "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)" + ], + out_tables=[ + "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)" + ], + debug_info=SqlParsingDebugInfo(), + column_lineage=[ + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + column="player_id", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="INTEGER", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="player_id", + ) + ], + logic=None, + ), + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + column="price_usd", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="BIGINT", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="price_usd", + ) + ], + logic=None, + ), + ], + ), + urn="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + ) + + temp_table_activity: TempTableRow = TempTableRow( + transaction_id=127, + query_text="CREATE TABLE #player_activity_temp SELECT player_id, SUM(price) AS price_usd " + "from #player_price", + start_time=datetime.now(), + session_id="abc", + create_command="CREATE TABLE #player_activity_temp", + parsed_result=SqlParsingResult( + query_type=QueryType.CREATE, + in_tables=[ + "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_activity,PROD)" + ], + out_tables=[ + "urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)" + ], + debug_info=SqlParsingDebugInfo(), + column_lineage=[ + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="player_id", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="INTEGER", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="player_id", + ) + ], + logic=None, + ), + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="price_usd", + column_type=SchemaFieldDataTypeClass(NumberTypeClass()), + native_column_type="BIGINT", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + column="price_usd", + ) + ], + logic=None, + ), + ], + ), + urn="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_activity_temp,PROD)", + ) + + assert temp_table.urn + assert temp_table_activity.urn + + lineage_extractor.temp_tables[temp_table.urn] = temp_table + lineage_extractor.temp_tables[temp_table_activity.urn] = temp_table_activity + + target_dataset_cll: List[sqlglot_l.ColumnLineageInfo] = [ + ColumnLineageInfo( + downstream=DownstreamColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_price_with_hike_v6,PROD)", + column="price", + column_type=SchemaFieldDataTypeClass(type=NumberTypeClass()), + native_column_type="DOUBLE PRECISION", + ), + upstreams=[ + sqlglot_l.ColumnRef( + table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + column="price_usd", + ) + ], + logic=None, + ) + ] + + datasets = lineage_extractor._get_upstream_lineages( + sources=[ + LineageDataset( + platform=LineageDatasetPlatform.REDSHIFT, + urn="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.#player_price,PROD)", + ) + ], + target_table="urn:li:dataset:(urn:li:dataPlatform:redshift,dev.public.player_price_with_hike_v4,PROD)", + raw_db_name="dev", + alias_db_name="dev", + all_tables_set={ + "dev": { + "public": set(), + } + }, + connection=MagicMock(), + target_dataset_cll=target_dataset_cll, + ) + + assert len(datasets) == 1 + # Here we only interested if it fails or not diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index 5152f406ed3ce0..2a6176906a0c3e 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -648,22 +648,35 @@ def _test_owner( ) ] ) + transformer = ExtractOwnersFromTagsTransformer.create( config, PipelineContext(run_id="test"), ) - transformed = list( + + record_envelops: List[RecordEnvelope] = list( transformer.transform( [ RecordEnvelope(dataset, metadata={}), + RecordEnvelope(record=EndOfStream(), metadata={}), ] ) ) - owners_aspect = transformed[0].record.proposedSnapshot.aspects[0] + + assert len(record_envelops) == 3 + + mcp: MetadataChangeProposalWrapper = record_envelops[1].record + + owners_aspect = cast(OwnershipClass, mcp.aspect) + owners = owners_aspect.owners + owner = owners[0] - if expected_owner_type is not None: - assert owner.type == expected_owner_type + + assert expected_owner_type is not None + + assert owner.type == expected_owner_type + assert owner.owner == expected_owner _test_owner( @@ -672,6 +685,7 @@ def _test_owner( "tag_prefix": "owner:", }, expected_owner="urn:li:corpuser:foo", + expected_owner_type=OwnershipTypeClass.TECHNICAL_OWNER, ) _test_owner( tag="abcdef-owner:foo", @@ -679,6 +693,7 @@ def _test_owner( "tag_prefix": ".*owner:", }, expected_owner="urn:li:corpuser:foo", + expected_owner_type=OwnershipTypeClass.TECHNICAL_OWNER, ) _test_owner( tag="owner:foo", @@ -687,6 +702,7 @@ def _test_owner( "is_user": False, }, expected_owner="urn:li:corpGroup:foo", + expected_owner_type=OwnershipTypeClass.TECHNICAL_OWNER, ) _test_owner( tag="owner:foo", @@ -695,6 +711,7 @@ def _test_owner( "email_domain": "example.com", }, expected_owner="urn:li:corpuser:foo@example.com", + expected_owner_type=OwnershipTypeClass.TECHNICAL_OWNER, ) _test_owner( tag="owner:foo", diff --git a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestCoalesceJobLineage.java b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestCoalesceJobLineage.java index 053055716eaa07..17aea13dbb94e3 100644 --- a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestCoalesceJobLineage.java +++ b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestCoalesceJobLineage.java @@ -99,7 +99,9 @@ public static void resetBaseExpectations() { @BeforeClass public static void initMockServer() { - mockServer = startClientAndServer(GMS_PORT); + if (mockServer == null) { + mockServer = startClientAndServer(GMS_PORT); + } resetBaseExpectations(); } diff --git a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java index a4eb035b0abce5..885be6d00fee85 100644 --- a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java +++ b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java @@ -138,7 +138,9 @@ public static void resetBaseExpectations() { @BeforeClass public static void init() { - mockServer = startClientAndServer(GMS_PORT); + if (mockServer == null) { + mockServer = startClientAndServer(GMS_PORT); + } resetBaseExpectations(); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java new file mode 100644 index 00000000000000..a3711afb753dc8 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java @@ -0,0 +1,312 @@ +package com.linkedin.metadata.aspect.utils; + +import static com.linkedin.metadata.Constants.BROWSE_PATHS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.BROWSE_PATHS_V2_ASPECT_NAME; +import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME; +import static com.linkedin.metadata.search.utils.BrowsePathUtils.buildDataPlatformUrn; +import static com.linkedin.metadata.search.utils.BrowsePathUtils.getDefaultBrowsePath; +import static com.linkedin.metadata.search.utils.BrowsePathV2Utils.getDefaultBrowsePathV2; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.BrowsePaths; +import com.linkedin.common.BrowsePathsV2; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.StringArray; +import com.linkedin.dataplatform.DataPlatformInfo; +import com.linkedin.entity.EntityResponse; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.MCPBatchItem; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityUtils; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.utils.DataPlatformInstanceUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.GenericAspect; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.util.Pair; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; + +/** Consolidates logic for default aspects */ +@Slf4j +public class DefaultAspectsUtil { + private DefaultAspectsUtil() {} + + public static final Set SUPPORTED_TYPES = + Set.of(ChangeType.UPSERT, ChangeType.CREATE, ChangeType.PATCH); + + public static List getAdditionalChanges( + @Nonnull AspectsBatch batch, @Nonnull EntityService entityService, boolean browsePathV2) { + + Map> itemsByUrn = + batch.getMCPItems().stream() + .filter(item -> SUPPORTED_TYPES.contains(item.getChangeType())) + .collect(Collectors.groupingBy(BatchItem::getUrn)); + + Set urnsWithExistingKeyAspects = entityService.exists(itemsByUrn.keySet()); + + // create default aspects when key aspect is missing + return itemsByUrn.entrySet().stream() + .filter(aspectsEntry -> !urnsWithExistingKeyAspects.contains(aspectsEntry.getKey())) + .flatMap( + aspectsEntry -> { + // Exclude aspects already in the batch + Set currentBatchAspectNames = + aspectsEntry.getValue().stream() + .map(BatchItem::getAspectName) + .collect(Collectors.toSet()); + + // Generate key aspect and defaults + List> defaultAspects = + generateDefaultAspects( + entityService, aspectsEntry.getKey(), currentBatchAspectNames, browsePathV2); + + // First is the key aspect + RecordTemplate entityKeyAspect = defaultAspects.get(0).getSecond(); + + // pick the first item as a template (use entity information) + MCPBatchItem templateItem = aspectsEntry.getValue().get(0); + + // generate default aspects (including key aspect, always upserts) + return defaultAspects.stream() + .map( + entry -> + MCPUpsertBatchItem.MCPUpsertBatchItemBuilder.build( + getProposalFromAspect( + entry.getKey(), entry.getValue(), entityKeyAspect, templateItem), + templateItem.getAuditStamp(), + entityService)) + .filter(Objects::nonNull); + }) + .collect(Collectors.toList()); + } + + /** + * Generate default aspects + * + * @param entityService entity service + * @param urn entity urn + * @return a list of aspect name/aspect pairs to be written + */ + public static List> generateDefaultAspects( + @Nonnull EntityService entityService, + @Nonnull final Urn urn, + @Nonnull Set currentBatchAspectNames, + boolean browsePathV2) { + + final List> defaultAspects = new LinkedList<>(); + + // Key Aspect + final String keyAspectName = entityService.getKeyAspectName(urn); + defaultAspects.add( + Pair.of(keyAspectName, EntityUtils.buildKeyAspect(entityService.getEntityRegistry(), urn))); + + // Other Aspects + defaultAspects.addAll( + generateDefaultAspectsIfMissing( + entityService, + urn, + defaultAspects.get(0).getSecond(), + currentBatchAspectNames, + browsePathV2)); + + return defaultAspects; + } + + /** + * Generate default aspects if the aspect is NOT in the database. + * + *

Does not automatically create key aspects. + * + * @see #generateDefaultAspectsIfMissing if key aspects need autogeneration + * @param entityService + * @param urn entity urn + * @param entityKeyAspect entity's key aspect + * @return additional aspects to be written + */ + private static List> generateDefaultAspectsIfMissing( + @Nonnull EntityService entityService, + @Nonnull final Urn urn, + RecordTemplate entityKeyAspect, + @Nonnull Set currentAspectNames, + boolean browsePathV2) { + EntityRegistry entityRegistry = entityService.getEntityRegistry(); + + Set fetchAspects = + Stream.of( + BROWSE_PATHS_ASPECT_NAME, + BROWSE_PATHS_V2_ASPECT_NAME, + DATA_PLATFORM_INSTANCE_ASPECT_NAME) + // If browsePathV2 then exclude v1 + .filter(aspectName -> !(BROWSE_PATHS_ASPECT_NAME.equals(aspectName) && browsePathV2)) + // Exclude currently ingesting aspects + .filter(aspectName -> !currentAspectNames.contains(aspectName)) + // Exclude in case when we have limited test entity registry which doesn't include these + .filter( + aspectName -> + entityRegistry + .getEntitySpec(urn.getEntityType()) + .getAspectSpecMap() + .containsKey(aspectName)) + .collect(Collectors.toSet()); + + if (!fetchAspects.isEmpty()) { + + Set latestAspects = entityService.getLatestAspectsForUrn(urn, fetchAspects).keySet(); + + return fetchAspects.stream() + .filter(aspectName -> !latestAspects.contains(aspectName)) + .map( + aspectName -> { + switch (aspectName) { + case BROWSE_PATHS_ASPECT_NAME: + return Pair.of( + BROWSE_PATHS_ASPECT_NAME, + (RecordTemplate) buildDefaultBrowsePath(urn, entityService)); + case BROWSE_PATHS_V2_ASPECT_NAME: + return Pair.of( + BROWSE_PATHS_V2_ASPECT_NAME, + (RecordTemplate) buildDefaultBrowsePathV2(urn, false, entityService)); + case DATA_PLATFORM_INSTANCE_ASPECT_NAME: + return DataPlatformInstanceUtils.buildDataPlatformInstance( + urn.getEntityType(), entityKeyAspect) + .map( + aspect -> + Pair.of( + DATA_PLATFORM_INSTANCE_ASPECT_NAME, (RecordTemplate) aspect)) + .orElse(null); + default: + return null; + } + }) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + } + + return Collections.emptyList(); + } + + /** + * Builds the default browse path aspects for a subset of well-supported entities. + * + *

This method currently supports datasets, charts, dashboards, data flows, data jobs, and + * glossary terms. + */ + @Nonnull + public static BrowsePaths buildDefaultBrowsePath( + final @Nonnull Urn urn, EntityService entityService) { + Character dataPlatformDelimiter = getDataPlatformDelimiter(urn, entityService); + String defaultBrowsePath = + getDefaultBrowsePath(urn, entityService.getEntityRegistry(), dataPlatformDelimiter); + StringArray browsePaths = new StringArray(); + browsePaths.add(defaultBrowsePath); + BrowsePaths browsePathAspect = new BrowsePaths(); + browsePathAspect.setPaths(browsePaths); + return browsePathAspect; + } + + /** + * Builds the default browse path V2 aspects for all entities. + * + *

This method currently supports datasets, charts, dashboards, and data jobs best. Everything + * else will have a basic "Default" folder added to their browsePathV2. + */ + @Nonnull + public static BrowsePathsV2 buildDefaultBrowsePathV2( + final @Nonnull Urn urn, boolean useContainerPaths, EntityService entityService) { + Character dataPlatformDelimiter = getDataPlatformDelimiter(urn, entityService); + return getDefaultBrowsePathV2( + urn, + entityService.getEntityRegistry(), + dataPlatformDelimiter, + entityService, + useContainerPaths); + } + + /** Returns a delimiter on which the name of an asset may be split. */ + private static Character getDataPlatformDelimiter(Urn urn, EntityService entityService) { + // Attempt to construct the appropriate Data Platform URN + Urn dataPlatformUrn = buildDataPlatformUrn(urn, entityService.getEntityRegistry()); + if (dataPlatformUrn != null) { + // Attempt to resolve the delimiter from Data Platform Info + DataPlatformInfo dataPlatformInfo = getDataPlatformInfo(dataPlatformUrn, entityService); + if (dataPlatformInfo != null && dataPlatformInfo.hasDatasetNameDelimiter()) { + return dataPlatformInfo.getDatasetNameDelimiter().charAt(0); + } + } + // Else, fallback to a default delimiter (period) if one cannot be resolved. + return '.'; + } + + @Nullable + private static DataPlatformInfo getDataPlatformInfo(Urn urn, EntityService entityService) { + try { + final EntityResponse entityResponse = + entityService.getEntityV2( + Constants.DATA_PLATFORM_ENTITY_NAME, + urn, + ImmutableSet.of(Constants.DATA_PLATFORM_INFO_ASPECT_NAME)); + if (entityResponse != null + && entityResponse.hasAspects() + && entityResponse.getAspects().containsKey(Constants.DATA_PLATFORM_INFO_ASPECT_NAME)) { + return new DataPlatformInfo( + entityResponse + .getAspects() + .get(Constants.DATA_PLATFORM_INFO_ASPECT_NAME) + .getValue() + .data()); + } + } catch (Exception e) { + log.warn(String.format("Failed to find Data Platform Info for urn %s", urn)); + } + return null; + } + + private static MetadataChangeProposal getProposalFromAspect( + String aspectName, + RecordTemplate aspect, + RecordTemplate entityKeyAspect, + MCPBatchItem templateItem) { + MetadataChangeProposal proposal = new MetadataChangeProposal(); + GenericAspect genericAspect = GenericRecordUtils.serializeAspect(aspect); + + // Set net new fields + proposal.setAspect(genericAspect); + proposal.setAspectName(aspectName); + + // Set fields determined from original + // Additional changes should never be set as PATCH, if a PATCH is coming across it should be an + // UPSERT + proposal.setChangeType(templateItem.getChangeType()); + if (ChangeType.PATCH.equals(proposal.getChangeType())) { + proposal.setChangeType(ChangeType.UPSERT); + } + + if (templateItem.getSystemMetadata() != null) { + proposal.setSystemMetadata(templateItem.getSystemMetadata()); + } + if (templateItem.getUrn() != null) { + proposal.setEntityUrn(templateItem.getUrn()); + } + if (entityKeyAspect != null) { + proposal.setEntityKeyAspect(GenericRecordUtils.serializeAspect(entityKeyAspect)); + } + proposal.setEntityType(templateItem.getUrn().getEntityType()); + + return proposal; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index 0ebe9ed1d1b666..9a3bc9e319d2bd 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -26,7 +26,6 @@ import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultV2; -import com.linkedin.metadata.entity.AspectUtils; import com.linkedin.metadata.entity.DeleteEntityService; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.IngestResult; @@ -67,7 +66,6 @@ import java.util.Set; import java.util.function.Supplier; import java.util.stream.Collectors; -import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; @@ -706,14 +704,10 @@ public String ingestProposal( : Constants.UNKNOWN_ACTOR; final AuditStamp auditStamp = new AuditStamp().setTime(_clock.millis()).setActor(UrnUtils.getUrn(actorUrnStr)); - final List additionalChanges = - AspectUtils.getAdditionalChanges(metadataChangeProposal, _entityService); - Stream proposalStream = - Stream.concat(Stream.of(metadataChangeProposal), additionalChanges.stream()); AspectsBatch batch = AspectsBatchImpl.builder() - .mcps(proposalStream.collect(Collectors.toList()), auditStamp, _entityService) + .mcps(List.of(metadataChangeProposal), auditStamp, _entityService) .build(); IngestResult one = _entityService.ingestProposal(batch, async).stream().findFirst().get(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java index e00a696a095a15..b031377842176b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java @@ -148,11 +148,11 @@ T runInTransactionWithRetry( @Nonnull final Function block, final int maxTransactionRetry); @Nonnull - default T runInTransactionWithRetry( + default List runInTransactionWithRetry( @Nonnull final Function block, AspectsBatch batch, final int maxTransactionRetry) { - return runInTransactionWithRetry(block, maxTransactionRetry); + return List.of(runInTransactionWithRetry(block, maxTransactionRetry)); } default void incrementWriteMetrics(String aspectName, long count, long bytes) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index e6e69c96c15421..7f15e3a7fd8fc9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -2,16 +2,11 @@ import static com.linkedin.metadata.Constants.APP_SOURCE; import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION; -import static com.linkedin.metadata.Constants.BROWSE_PATHS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.BROWSE_PATHS_V2_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME; import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID; import static com.linkedin.metadata.Constants.FORCE_INDEXING_KEY; import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; import static com.linkedin.metadata.Constants.SYSTEM_ACTOR; import static com.linkedin.metadata.Constants.UI_SOURCE; -import static com.linkedin.metadata.search.utils.BrowsePathUtils.buildDataPlatformUrn; -import static com.linkedin.metadata.search.utils.BrowsePathUtils.getDefaultBrowsePath; import static com.linkedin.metadata.utils.GenericRecordUtils.entityResponseToAspectMap; import static com.linkedin.metadata.utils.PegasusUtils.constructMCL; import static com.linkedin.metadata.utils.PegasusUtils.getDataTemplateClassFromSchema; @@ -25,8 +20,6 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Streams; import com.linkedin.common.AuditStamp; -import com.linkedin.common.BrowsePaths; -import com.linkedin.common.BrowsePathsV2; import com.linkedin.common.Status; import com.linkedin.common.UrnArray; import com.linkedin.common.VersionedUrn; @@ -38,10 +31,8 @@ import com.linkedin.data.template.GetMode; import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.SetMode; -import com.linkedin.data.template.StringArray; import com.linkedin.data.template.StringMap; import com.linkedin.data.template.UnionTemplate; -import com.linkedin.dataplatform.DataPlatformInfo; import com.linkedin.entity.AspectType; import com.linkedin.entity.Entity; import com.linkedin.entity.EntityResponse; @@ -57,6 +48,7 @@ import com.linkedin.metadata.aspect.batch.SystemAspect; import com.linkedin.metadata.aspect.batch.UpsertItem; import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -72,10 +64,8 @@ import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.ListUrnsResult; import com.linkedin.metadata.run.AspectRowSummary; -import com.linkedin.metadata.search.utils.BrowsePathV2Utils; import com.linkedin.metadata.service.UpdateIndicesService; import com.linkedin.metadata.snapshot.Snapshot; -import com.linkedin.metadata.utils.DataPlatformInstanceUtils; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.PegasusUtils; @@ -166,6 +156,7 @@ public class EntityServiceImpl implements EntityService { protected static final int MAX_KEYS_PER_QUERY = 500; private final Integer ebeanMaxTransactionRetry; + private final boolean enableBrowseV2; public EntityServiceImpl( @Nonnull final AspectDao aspectDao, @@ -173,7 +164,8 @@ public EntityServiceImpl( @Nonnull final EntityRegistry entityRegistry, final boolean alwaysEmitChangeLog, @Nullable final UpdateIndicesService updateIndicesService, - final PreProcessHooks preProcessHooks) { + final PreProcessHooks preProcessHooks, + final boolean enableBrowsePathV2) { this( aspectDao, producer, @@ -181,7 +173,8 @@ public EntityServiceImpl( alwaysEmitChangeLog, updateIndicesService, preProcessHooks, - DEFAULT_MAX_TRANSACTION_RETRY); + DEFAULT_MAX_TRANSACTION_RETRY, + enableBrowsePathV2); } public EntityServiceImpl( @@ -191,7 +184,8 @@ public EntityServiceImpl( final boolean alwaysEmitChangeLog, @Nullable final UpdateIndicesService updateIndicesService, final PreProcessHooks preProcessHooks, - @Nullable final Integer retry) { + @Nullable final Integer retry, + final boolean enableBrowseV2) { _aspectDao = aspectDao; _producer = producer; @@ -204,6 +198,7 @@ public EntityServiceImpl( } _preProcessHooks = preProcessHooks; ebeanMaxTransactionRetry = retry != null ? retry : DEFAULT_MAX_TRANSACTION_RETRY; + this.enableBrowseV2 = enableBrowseV2; } @Override @@ -622,7 +617,7 @@ public List ingestAspects( MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(pair.getKey()) - .aspect(pair.getValue()) + .recordTemplate(pair.getValue()) .systemMetadata(systemMetadata) .auditStamp(auditStamp) .build(this)) @@ -670,162 +665,166 @@ private List ingestAspectsToLocalDB( log.warn(String.format("Batch contains duplicates: %s", aspectsBatch)); } - return _aspectDao.runInTransactionWithRetry( - (tx) -> { - // Read before write is unfortunate, however batch it - final Map> urnAspects = aspectsBatch.getUrnAspectsMap(); - // read #1 - final Map> latestAspects = - toSystemEntityAspects(_aspectDao.getLatestAspects(urnAspects)); - // read #2 - final Map> nextVersions = - _aspectDao.getNextVersions(urnAspects); - - // 1. Convert patches to full upserts - // 2. Run any entity/aspect level hooks - Pair>, List> updatedItems = - aspectsBatch.toUpsertBatchItems(latestAspects, this); - - // Fetch additional information if needed - final Map> updatedLatestAspects; - final Map> updatedNextVersions; - if (!updatedItems.getFirst().isEmpty()) { - Map> newLatestAspects = - toSystemEntityAspects(_aspectDao.getLatestAspects(updatedItems.getFirst())); - Map> newNextVersions = - _aspectDao.getNextVersions(updatedItems.getFirst()); - // merge - updatedLatestAspects = aspectsBatch.merge(latestAspects, newLatestAspects); - updatedNextVersions = aspectsBatch.merge(nextVersions, newNextVersions); - } else { - updatedLatestAspects = latestAspects; - updatedNextVersions = nextVersions; - } - - // do final pre-commit checks with previous aspect value - updatedItems - .getSecond() - .forEach( - item -> { - SystemAspect previousAspect = - updatedLatestAspects - .getOrDefault(item.getUrn().toString(), Map.of()) - .get(item.getAspectSpec().getName()); - try { - item.validatePreCommit( - previousAspect == null - ? null - : previousAspect.getRecordTemplate(_entityRegistry), - this); - } catch (AspectValidationException e) { - throw new RuntimeException(e); - } - }); + return _aspectDao + .runInTransactionWithRetry( + (tx) -> { + // Read before write is unfortunate, however batch it + final Map> urnAspects = aspectsBatch.getUrnAspectsMap(); + // read #1 + final Map> latestAspects = + toSystemEntityAspects(_aspectDao.getLatestAspects(urnAspects)); + // read #2 + final Map> nextVersions = + _aspectDao.getNextVersions(urnAspects); + + // 1. Convert patches to full upserts + // 2. Run any entity/aspect level hooks + Pair>, List> updatedItems = + aspectsBatch.toUpsertBatchItems(latestAspects, this); + + // Fetch additional information if needed + final Map> updatedLatestAspects; + final Map> updatedNextVersions; + if (!updatedItems.getFirst().isEmpty()) { + Map> newLatestAspects = + toSystemEntityAspects(_aspectDao.getLatestAspects(updatedItems.getFirst())); + Map> newNextVersions = + _aspectDao.getNextVersions(updatedItems.getFirst()); + // merge + updatedLatestAspects = aspectsBatch.merge(latestAspects, newLatestAspects); + updatedNextVersions = aspectsBatch.merge(nextVersions, newNextVersions); + } else { + updatedLatestAspects = latestAspects; + updatedNextVersions = nextVersions; + } - // Database Upsert results - List upsertResults = - updatedItems.getSecond().stream() - .map( + // do final pre-commit checks with previous aspect value + updatedItems + .getSecond() + .forEach( item -> { - final String urnStr = item.getUrn().toString(); - final SystemAspect latest = + SystemAspect previousAspect = updatedLatestAspects - .getOrDefault(urnStr, Map.of()) - .get(item.getAspectName()); - final long nextVersion = - updatedNextVersions - .getOrDefault(urnStr, Map.of()) - .getOrDefault(item.getAspectName(), 0L); - - final UpdateAspectResult result; - if (overwrite || latest == null) { - result = - ingestAspectToLocalDB( - tx, - item.getUrn(), - item.getAspectName(), - item.getAspect(), - item.getAuditStamp(), - item.getSystemMetadata(), - latest == null - ? null - : ((EntityAspect.EntitySystemAspect) latest).asRaw(), - nextVersion) - .toBuilder() - .request(item) - .build(); - - // support inner-batch upserts - latestAspects - .computeIfAbsent(urnStr, key -> new HashMap<>()) - .put(item.getAspectName(), item.toLatestEntityAspect()); - nextVersions - .computeIfAbsent(urnStr, key -> new HashMap<>()) - .put(item.getAspectName(), nextVersion + 1); - } else { - RecordTemplate oldValue = latest.getRecordTemplate(_entityRegistry); - SystemMetadata oldMetadata = latest.getSystemMetadata(); - result = - UpdateAspectResult.builder() - .urn(item.getUrn()) - .request(item) - .oldValue(oldValue) - .newValue(oldValue) - .oldSystemMetadata(oldMetadata) - .newSystemMetadata(oldMetadata) - .operation(MetadataAuditOperation.UPDATE) - .auditStamp(item.getAuditStamp()) - .maxVersion(latest.getVersion()) - .build(); + .getOrDefault(item.getUrn().toString(), Map.of()) + .get(item.getAspectSpec().getName()); + try { + item.validatePreCommit( + previousAspect == null + ? null + : previousAspect.getRecordTemplate(_entityRegistry), + this); + } catch (AspectValidationException e) { + throw new RuntimeException(e); } + }); - return result; - }) - .collect(Collectors.toList()); + // Database Upsert results + List upsertResults = + updatedItems.getSecond().stream() + .map( + item -> { + final String urnStr = item.getUrn().toString(); + final SystemAspect latest = + updatedLatestAspects + .getOrDefault(urnStr, Map.of()) + .get(item.getAspectName()); + final long nextVersion = + updatedNextVersions + .getOrDefault(urnStr, Map.of()) + .getOrDefault(item.getAspectName(), 0L); + + final UpdateAspectResult result; + if (overwrite || latest == null) { + result = + ingestAspectToLocalDB( + tx, + item.getUrn(), + item.getAspectName(), + item.getRecordTemplate(), + item.getAuditStamp(), + item.getSystemMetadata(), + latest == null + ? null + : ((EntityAspect.EntitySystemAspect) latest).asRaw(), + nextVersion) + .toBuilder() + .request(item) + .build(); + + // support inner-batch upserts + latestAspects + .computeIfAbsent(urnStr, key -> new HashMap<>()) + .put(item.getAspectName(), item.toLatestEntityAspect()); + nextVersions + .computeIfAbsent(urnStr, key -> new HashMap<>()) + .put(item.getAspectName(), nextVersion + 1); + } else { + RecordTemplate oldValue = latest.getRecordTemplate(_entityRegistry); + SystemMetadata oldMetadata = latest.getSystemMetadata(); + result = + UpdateAspectResult.builder() + .urn(item.getUrn()) + .request(item) + .oldValue(oldValue) + .newValue(oldValue) + .oldSystemMetadata(oldMetadata) + .newSystemMetadata(oldMetadata) + .operation(MetadataAuditOperation.UPDATE) + .auditStamp(item.getAuditStamp()) + .maxVersion(latest.getVersion()) + .build(); + } + + return result; + }) + .collect(Collectors.toList()); - // commit upserts prior to retention or kafka send, if supported by impl - if (tx != null) { - tx.commitAndContinue(); - } + // commit upserts prior to retention or kafka send, if supported by impl + if (tx != null) { + tx.commitAndContinue(); + } - // Retention optimization and tx - if (_retentionService != null) { - List retentionBatch = - upsertResults.stream() - // Only consider retention when there was a previous version - .filter( - result -> - latestAspects.containsKey(result.getUrn().toString()) - && latestAspects - .get(result.getUrn().toString()) - .containsKey(result.getRequest().getAspectName())) - .filter( - result -> { - RecordTemplate oldAspect = result.getOldValue(); - RecordTemplate newAspect = result.getNewValue(); - // Apply retention policies if there was an update to existing aspect - // value - return oldAspect != newAspect - && oldAspect != null - && _retentionService != null; - }) - .map( - result -> - RetentionService.RetentionContext.builder() - .urn(result.getUrn()) - .aspectName(result.getRequest().getAspectName()) - .maxVersion(Optional.of(result.getMaxVersion())) - .build()) - .collect(Collectors.toList()); - _retentionService.applyRetentionWithPolicyDefaults(retentionBatch); - } else { - log.warn("Retention service is missing!"); - } + // Retention optimization and tx + if (_retentionService != null) { + List retentionBatch = + upsertResults.stream() + // Only consider retention when there was a previous version + .filter( + result -> + latestAspects.containsKey(result.getUrn().toString()) + && latestAspects + .get(result.getUrn().toString()) + .containsKey(result.getRequest().getAspectName())) + .filter( + result -> { + RecordTemplate oldAspect = result.getOldValue(); + RecordTemplate newAspect = result.getNewValue(); + // Apply retention policies if there was an update to existing aspect + // value + return oldAspect != newAspect + && oldAspect != null + && _retentionService != null; + }) + .map( + result -> + RetentionService.RetentionContext.builder() + .urn(result.getUrn()) + .aspectName(result.getRequest().getAspectName()) + .maxVersion(Optional.of(result.getMaxVersion())) + .build()) + .collect(Collectors.toList()); + _retentionService.applyRetentionWithPolicyDefaults(retentionBatch); + } else { + log.warn("Retention service is missing!"); + } - return upsertResults; - }, - aspectsBatch, - DEFAULT_MAX_TRANSACTION_RETRY); + return upsertResults; + }, + aspectsBatch, + DEFAULT_MAX_TRANSACTION_RETRY) + .stream() + .flatMap(List::stream) + .collect(Collectors.toList()); } /** @@ -921,7 +920,7 @@ public RecordTemplate ingestAspectIfNotPresent( MCPUpsertBatchItem.builder() .urn(urn) .aspectName(aspectName) - .aspect(newValue) + .recordTemplate(newValue) .systemMetadata(systemMetadata) .auditStamp(auditStamp) .build(this)) @@ -965,7 +964,6 @@ public IngestResult ingestProposal( */ @Override public Set ingestProposal(AspectsBatch aspectsBatch, final boolean async) { - Stream timeseriesIngestResults = ingestTimeseriesProposal(aspectsBatch); Stream nonTimeseriesIngestResults = async ? ingestProposalAsync(aspectsBatch) : ingestProposalSync(aspectsBatch); @@ -1005,7 +1003,7 @@ private Stream ingestTimeseriesProposal(AspectsBatch aspectsBatch) conditionallyProduceMCLAsync( null, null, - item.getAspect(), + item.getRecordTemplate(), item.getSystemMetadata(), item.getMetadataChangeProposal(), item.getUrn(), @@ -1082,10 +1080,17 @@ private Stream ingestProposalAsync(AspectsBatch aspectsBatch) { } private Stream ingestProposalSync(AspectsBatch aspectsBatch) { + Set items = new HashSet<>(aspectsBatch.getItems()); + + // Generate additional items as needed + items.addAll(DefaultAspectsUtil.getAdditionalChanges(aspectsBatch, this, enableBrowseV2)); + + AspectsBatch withDefaults = AspectsBatchImpl.builder().items(items).build(); + AspectsBatchImpl nonTimeseries = AspectsBatchImpl.builder() .items( - aspectsBatch.getItems().stream() + withDefaults.getItems().stream() .filter(item -> !item.getAspectSpec().isTimeseries()) .collect(Collectors.toList())) .build(); @@ -1542,116 +1547,6 @@ protected Map> getLatestAspectUnions( .collect(Collectors.toList()))); } - /** - * Returns true if entityType should have some aspect as per its definition but aspects given does - * not have that aspect - */ - private boolean isAspectMissing(String entityType, String aspectName, Set aspects) { - return _entityRegistry.getEntitySpec(entityType).getAspectSpecMap().containsKey(aspectName) - && !aspects.contains(aspectName); - } - - @Override - public Pair>> generateDefaultAspectsOnFirstWrite( - @Nonnull final Urn urn, Map includedAspects) { - List> returnAspects = new ArrayList<>(); - - final String keyAspectName = getKeyAspectName(urn); - final Map latestAspects = - new HashMap<>(getLatestAspectsForUrn(urn, Set.of(keyAspectName))); - - // key aspect: does not exist in database && is being written - boolean generateDefaults = - !latestAspects.containsKey(keyAspectName) && includedAspects.containsKey(keyAspectName); - - // conditionally generate defaults - if (generateDefaults) { - String entityType = urnToEntityName(urn); - Set aspectsToGet = new HashSet<>(); - - boolean shouldCheckBrowsePath = - isAspectMissing(entityType, BROWSE_PATHS_ASPECT_NAME, includedAspects.keySet()); - if (shouldCheckBrowsePath) { - aspectsToGet.add(BROWSE_PATHS_ASPECT_NAME); - } - - boolean shouldCheckBrowsePathV2 = - isAspectMissing(entityType, BROWSE_PATHS_V2_ASPECT_NAME, includedAspects.keySet()); - if (shouldCheckBrowsePathV2) { - aspectsToGet.add(BROWSE_PATHS_V2_ASPECT_NAME); - } - - boolean shouldCheckDataPlatform = - isAspectMissing(entityType, DATA_PLATFORM_INSTANCE_ASPECT_NAME, includedAspects.keySet()); - if (shouldCheckDataPlatform) { - aspectsToGet.add(DATA_PLATFORM_INSTANCE_ASPECT_NAME); - } - - // fetch additional aspects - latestAspects.putAll(getLatestAspectsForUrn(urn, aspectsToGet)); - - if (shouldCheckBrowsePath - && latestAspects.get(BROWSE_PATHS_ASPECT_NAME) == null - && !includedAspects.containsKey(BROWSE_PATHS_ASPECT_NAME)) { - try { - BrowsePaths generatedBrowsePath = buildDefaultBrowsePath(urn); - returnAspects.add(Pair.of(BROWSE_PATHS_ASPECT_NAME, generatedBrowsePath)); - } catch (URISyntaxException e) { - log.error("Failed to parse urn: {}", urn); - } - } - - if (shouldCheckBrowsePathV2 - && latestAspects.get(BROWSE_PATHS_V2_ASPECT_NAME) == null - && !includedAspects.containsKey(BROWSE_PATHS_V2_ASPECT_NAME)) { - try { - BrowsePathsV2 generatedBrowsePathV2 = buildDefaultBrowsePathV2(urn, false); - returnAspects.add(Pair.of(BROWSE_PATHS_V2_ASPECT_NAME, generatedBrowsePathV2)); - } catch (URISyntaxException e) { - log.error("Failed to parse urn: {}", urn); - } - } - - if (shouldCheckDataPlatform - && latestAspects.get(DATA_PLATFORM_INSTANCE_ASPECT_NAME) == null - && !includedAspects.containsKey(DATA_PLATFORM_INSTANCE_ASPECT_NAME)) { - RecordTemplate keyAspect = includedAspects.get(keyAspectName); - DataPlatformInstanceUtils.buildDataPlatformInstance(entityType, keyAspect) - .ifPresent( - aspect -> returnAspects.add(Pair.of(DATA_PLATFORM_INSTANCE_ASPECT_NAME, aspect))); - } - } - - return Pair.of(latestAspects.containsKey(keyAspectName), returnAspects); - } - - @Override - public List> generateDefaultAspectsIfMissing( - @Nonnull final Urn urn, Map includedAspects) { - - final String keyAspectName = getKeyAspectName(urn); - - if (includedAspects.containsKey(keyAspectName)) { - return generateDefaultAspectsOnFirstWrite(urn, includedAspects).getValue(); - } else { - // No key aspect being written, generate it and potentially suggest writing it later - HashMap includedWithKeyAspect = new HashMap<>(includedAspects); - Pair keyAspect = - Pair.of(keyAspectName, EntityUtils.buildKeyAspect(_entityRegistry, urn)); - includedWithKeyAspect.put(keyAspect.getKey(), keyAspect.getValue()); - - Pair>> returnAspects = - generateDefaultAspectsOnFirstWrite(urn, includedWithKeyAspect); - - // missing key aspect in database, add it - if (!returnAspects.getFirst()) { - returnAspects.getValue().add(keyAspect); - } - - return returnAspects.getValue(); - } - } - private void ingestSnapshotUnion( @Nonnull final Snapshot snapshotUnion, @Nonnull final AuditStamp auditStamp, @@ -1664,10 +1559,11 @@ private void ingestSnapshotUnion( log.info("INGEST urn {} with system metadata {}", urn.toString(), systemMetadata.toString()); aspectRecordsToIngest.addAll( - generateDefaultAspectsIfMissing( + DefaultAspectsUtil.generateDefaultAspects( + this, urn, - aspectRecordsToIngest.stream() - .collect(Collectors.toMap(Pair::getKey, Pair::getValue)))); + aspectRecordsToIngest.stream().map(Pair::getFirst).collect(Collectors.toSet()), + enableBrowseV2)); AspectsBatchImpl aspectsBatch = AspectsBatchImpl.builder() @@ -1678,7 +1574,7 @@ private void ingestSnapshotUnion( MCPUpsertBatchItem.builder() .urn(urn) .aspectName(pair.getKey()) - .aspect(pair.getValue()) + .recordTemplate(pair.getValue()) .auditStamp(auditStamp) .systemMetadata(systemMetadata) .build(this)) @@ -2397,79 +2293,6 @@ private UpdateAspectResult ingestAspectToLocalDB( .build(); } - /** - * Builds the default browse path aspects for a subset of well-supported entities. - * - *

This method currently supports datasets, charts, dashboards, data flows, data jobs, and - * glossary terms. - */ - @Nonnull - @Override - public BrowsePaths buildDefaultBrowsePath(final @Nonnull Urn urn) throws URISyntaxException { - Character dataPlatformDelimiter = getDataPlatformDelimiter(urn); - String defaultBrowsePath = - getDefaultBrowsePath(urn, this.getEntityRegistry(), dataPlatformDelimiter); - StringArray browsePaths = new StringArray(); - browsePaths.add(defaultBrowsePath); - BrowsePaths browsePathAspect = new BrowsePaths(); - browsePathAspect.setPaths(browsePaths); - return browsePathAspect; - } - - /** - * Builds the default browse path V2 aspects for all entities. - * - *

This method currently supports datasets, charts, dashboards, and data jobs best. Everything - * else will have a basic "Default" folder added to their browsePathV2. - */ - @Nonnull - @Override - public BrowsePathsV2 buildDefaultBrowsePathV2(final @Nonnull Urn urn, boolean useContainerPaths) - throws URISyntaxException { - Character dataPlatformDelimiter = getDataPlatformDelimiter(urn); - return BrowsePathV2Utils.getDefaultBrowsePathV2( - urn, this.getEntityRegistry(), dataPlatformDelimiter, this, useContainerPaths); - } - - /** Returns a delimiter on which the name of an asset may be split. */ - private Character getDataPlatformDelimiter(Urn urn) { - // Attempt to construct the appropriate Data Platform URN - Urn dataPlatformUrn = buildDataPlatformUrn(urn, this.getEntityRegistry()); - if (dataPlatformUrn != null) { - // Attempt to resolve the delimiter from Data Platform Info - DataPlatformInfo dataPlatformInfo = getDataPlatformInfo(dataPlatformUrn); - if (dataPlatformInfo != null && dataPlatformInfo.hasDatasetNameDelimiter()) { - return dataPlatformInfo.getDatasetNameDelimiter().charAt(0); - } - } - // Else, fallback to a default delimiter (period) if one cannot be resolved. - return '.'; - } - - @Nullable - private DataPlatformInfo getDataPlatformInfo(Urn urn) { - try { - final EntityResponse entityResponse = - getEntityV2( - Constants.DATA_PLATFORM_ENTITY_NAME, - urn, - ImmutableSet.of(Constants.DATA_PLATFORM_INFO_ASPECT_NAME)); - if (entityResponse != null - && entityResponse.hasAspects() - && entityResponse.getAspects().containsKey(Constants.DATA_PLATFORM_INFO_ASPECT_NAME)) { - return new DataPlatformInfo( - entityResponse - .getAspects() - .get(Constants.DATA_PLATFORM_INFO_ASPECT_NAME) - .getValue() - .data()); - } - } catch (Exception e) { - log.warn(String.format("Failed to find Data Platform Info for urn %s", urn)); - } - return null; - } - private static boolean shouldAspectEmitChangeLog(@Nonnull final AspectSpec aspectSpec) { final List relationshipFieldSpecs = aspectSpec.getRelationshipFieldSpecs(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java index 176a99d8d3a498..3342d4632f642e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java @@ -5,14 +5,20 @@ import com.codahale.metrics.MetricRegistry; import com.datahub.util.exception.ModelConversionException; import com.datahub.util.exception.RetryLimitReached; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.aspect.batch.MCPBatchItem; +import com.linkedin.metadata.config.EbeanConfiguration; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.AspectMigrationsDao; import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.EntityAspectIdentifier; import com.linkedin.metadata.entity.ListResult; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; @@ -21,6 +27,7 @@ import com.linkedin.metadata.query.ListResultMetadata; import com.linkedin.metadata.search.utils.QueryUtils; import com.linkedin.metadata.utils.metrics.MetricUtils; +import com.linkedin.util.Pair; import io.ebean.Database; import io.ebean.DuplicateKeyException; import io.ebean.ExpressionList; @@ -39,9 +46,14 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -69,8 +81,29 @@ public class EbeanAspectDao implements AspectDao, AspectMigrationsDao { // more testing. private int _queryKeysCount = 375; // 0 means no pagination on keys - public EbeanAspectDao(@Nonnull final Database server) { + /** + * Used to control write concurrency when an entity key aspect is present. If a batch contains an + * entity key aspect, only allow a single execution per URN + */ + private final LoadingCache locks; + + public EbeanAspectDao(@Nonnull final Database server, EbeanConfiguration ebeanConfiguration) { _server = server; + if (ebeanConfiguration.getLocking().isEnabled()) { + this.locks = + CacheBuilder.newBuilder() + .maximumSize(ebeanConfiguration.getLocking().getMaximumLocks()) + .expireAfterWrite( + ebeanConfiguration.getLocking().getDurationSeconds(), TimeUnit.SECONDS) + .build( + new CacheLoader<>() { + public Lock load(String key) { + return new ReentrantLock(true); + } + }); + } else { + this.locks = null; + } } @Override @@ -588,15 +621,70 @@ public ListResult listLatestAspectMetadata( @Nonnull public T runInTransactionWithRetry( @Nonnull final Function block, final int maxTransactionRetry) { - return runInTransactionWithRetry(block, null, maxTransactionRetry); + return runInTransactionWithRetry(block, null, maxTransactionRetry).get(0); } @Override @Nonnull - public T runInTransactionWithRetry( + public List runInTransactionWithRetry( @Nonnull final Function block, @Nullable AspectsBatch batch, final int maxTransactionRetry) { + + LinkedList result = new LinkedList<>(); + + if (locks != null && batch != null) { + Set urnsWithKeyAspects = + batch.getMCPItems().stream() + .filter(i -> i.getEntitySpec().getKeyAspectSpec().equals(i.getAspectSpec())) + .map(MCPBatchItem::getUrn) + .collect(Collectors.toSet()); + + if (!urnsWithKeyAspects.isEmpty()) { + + // Split into batches by urn with key aspect, remaining aspects in the pair's second + Pair, AspectsBatch> splitBatches = splitByUrn(batch, urnsWithKeyAspects); + + // Run non-key aspect `other` batch per normal + if (!splitBatches.getSecond().getItems().isEmpty()) { + result.add( + runInTransactionWithRetryUnlocked( + block, splitBatches.getSecond(), maxTransactionRetry)); + } + + // For each key aspect batch + for (AspectsBatch splitBatch : splitBatches.getFirst()) { + try { + Lock lock = + locks.get(splitBatch.getMCPItems().stream().findFirst().get().getUrn().toString()); + lock.lock(); + try { + result.add(runInTransactionWithRetryUnlocked(block, splitBatch, maxTransactionRetry)); + } finally { + lock.unlock(); + } + } catch (ExecutionException e) { + throw new RuntimeException(e); + } + } + } else { + // No key aspects found, run per normal + result.add(runInTransactionWithRetryUnlocked(block, batch, maxTransactionRetry)); + } + } else { + // locks disabled or null batch + result.add(runInTransactionWithRetryUnlocked(block, batch, maxTransactionRetry)); + } + + return result; + } + + @Nonnull + public T runInTransactionWithRetryUnlocked( + @Nonnull final Function block, + @Nullable AspectsBatch batch, + final int maxTransactionRetry) { + validateConnection(); int retryCount = 0; Exception lastException = null; @@ -804,4 +892,35 @@ private static String buildMetricName( MetricUtils.DELIMITER, List.of(entitySpec.getName(), aspectSpec.getName(), status.toLowerCase())); } + + /** + * Split batches by the set of Urns, all remaining items go into an `other` batch in the second of + * the pair + * + * @param batch the input batch + * @param urns urns for batch + * @return separated batches + */ + private static Pair, AspectsBatch> splitByUrn( + AspectsBatch batch, Set urns) { + Map> itemsByUrn = + batch.getMCPItems().stream().collect(Collectors.groupingBy(MCPBatchItem::getUrn)); + + AspectsBatch other = + AspectsBatchImpl.builder() + .items( + itemsByUrn.entrySet().stream() + .filter(entry -> !urns.contains(entry.getKey())) + .flatMap(entry -> entry.getValue().stream()) + .collect(Collectors.toList())) + .build(); + + List nonEmptyBatches = + urns.stream() + .map(urn -> AspectsBatchImpl.builder().items(itemsByUrn.get(urn)).build()) + .filter(b -> !b.getItems().isEmpty()) + .collect(Collectors.toList()); + + return Pair.of(nonEmptyBatches, other); + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java index 80fb4e3e1b940e..1718bd835dc31f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java @@ -11,6 +11,7 @@ import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; +import java.util.Collection; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -26,7 +27,7 @@ @Builder(toBuilder = true) public class AspectsBatchImpl implements AspectsBatch { - private final List items; + private final Collection items; /** * Convert patches to upserts, apply hooks at the aspect and batch level. diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLBatchItemImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLBatchItemImpl.java index 6563765657d6d0..a2ed2eb18fe6a3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLBatchItemImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLBatchItemImpl.java @@ -30,9 +30,9 @@ public class MCLBatchItemImpl implements MCLBatchItem { @Nonnull private final MetadataChangeLog metadataChangeLog; - @Nullable private final RecordTemplate aspect; + @Nullable private final RecordTemplate recordTemplate; - @Nullable private final RecordTemplate previousAspect; + @Nullable private final RecordTemplate previousRecordTemplate; // derived private final EntitySpec entitySpec; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPPatchBatchItem.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPPatchBatchItem.java index be333af2f75398..d0cb2a4cc59b8a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPPatchBatchItem.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPPatchBatchItem.java @@ -31,6 +31,7 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Builder; import lombok.Getter; import lombok.extern.slf4j.Slf4j; @@ -72,6 +73,12 @@ public ChangeType getChangeType() { return ChangeType.PATCH; } + @Nullable + @Override + public RecordTemplate getRecordTemplate() { + return null; + } + public MCPUpsertBatchItem applyPatch( RecordTemplate recordTemplate, AspectRetriever aspectRetriever) { MCPUpsertBatchItem.MCPUpsertBatchItemBuilder builder = @@ -100,7 +107,8 @@ public MCPUpsertBatchItem applyPatch( } try { - builder.aspect(aspectTemplateEngine.applyPatch(currentValue, getPatch(), getAspectSpec())); + builder.recordTemplate( + aspectTemplateEngine.applyPatch(currentValue, getPatch(), getAspectSpec())); } catch (JsonProcessingException | JsonPatchException e) { throw new RuntimeException(e); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPUpsertBatchItem.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPUpsertBatchItem.java index 89209c44f10c77..b9d5f24e7ce084 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPUpsertBatchItem.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPUpsertBatchItem.java @@ -58,7 +58,7 @@ public static MCPUpsertBatchItem fromPatch( recordTemplate != null ? recordTemplate : genericPatchTemplate.getDefault(); try { - builder.aspect(genericPatchTemplate.applyPatch(currentValue)); + builder.recordTemplate(genericPatchTemplate.applyPatch(currentValue)); } catch (JsonPatchException | IOException e) { throw new RuntimeException(e); } @@ -72,7 +72,7 @@ public static MCPUpsertBatchItem fromPatch( // aspectName name of the aspect being inserted @Nonnull private final String aspectName; - @Nonnull private final RecordTemplate aspect; + @Nonnull private final RecordTemplate recordTemplate; @Nonnull private final SystemMetadata systemMetadata; @@ -104,7 +104,7 @@ public void applyMutationHooks( entitySpec, aspectSpec, oldAspectValue, - aspect, + recordTemplate, oldSystemMetadata, systemMetadata, auditStamp, @@ -116,7 +116,7 @@ public void applyMutationHooks( public SystemAspect toLatestEntityAspect() { EntityAspect latest = new EntityAspect(); latest.setAspect(getAspectName()); - latest.setMetadata(EntityUtils.toJsonAspect(getAspect())); + latest.setMetadata(EntityUtils.toJsonAspect(getRecordTemplate())); latest.setUrn(getUrn().toString()); latest.setVersion(ASPECT_LATEST_VERSION); latest.setCreatedOn(new Timestamp(auditStamp.getTime())); @@ -135,7 +135,7 @@ public void validatePreCommit( .getAspectPayloadValidators( getChangeType(), entitySpec.getName(), aspectSpec.getName())) { validator.validatePreCommit( - getChangeType(), urn, getAspectSpec(), previous, this.aspect, aspectRetriever); + getChangeType(), urn, getAspectSpec(), previous, this.recordTemplate, aspectRetriever); } } @@ -167,13 +167,13 @@ public MCPUpsertBatchItem build(AspectRetriever aspectRetriever) { this.entitySpec, this.aspectSpec, this.urn, - this.aspect, + this.recordTemplate, aspectRetriever); return new MCPUpsertBatchItem( this.urn, this.aspectName, - this.aspect, + this.recordTemplate, SystemMetadataUtils.generateSystemMetadataIfEmpty(this.systemMetadata), this.auditStamp, this.metadataChangeProposal, @@ -213,7 +213,7 @@ public static MCPUpsertBatchItem build( SystemMetadataUtils.generateSystemMetadataIfEmpty(mcp.getSystemMetadata())) .metadataChangeProposal(mcp) .auditStamp(auditStamp) - .aspect(convertToRecordTemplate(mcp, aspectSpec)) + .recordTemplate(convertToRecordTemplate(mcp, aspectSpec)) .build(aspectRetriever); } @@ -258,12 +258,12 @@ public boolean equals(Object o) { return urn.equals(that.urn) && aspectName.equals(that.aspectName) && Objects.equals(systemMetadata, that.systemMetadata) - && aspect.equals(that.aspect); + && recordTemplate.equals(that.recordTemplate); } @Override public int hashCode() { - return Objects.hash(urn, aspectName, systemMetadata, aspect); + return Objects.hash(urn, aspectName, systemMetadata, recordTemplate); } @Override @@ -276,8 +276,8 @@ public String toString() { + '\'' + ", systemMetadata=" + systemMetadata - + ", aspect=" - + aspect + + ", recordTemplate=" + + recordTemplate + '}'; } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index 7de2770626ae34..76153a8d2adb3f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -22,9 +22,11 @@ import com.linkedin.metadata.search.FilterValueArray; import com.linkedin.metadata.search.ScrollResult; import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.elasticsearch.query.request.AggregationQueryBuilder; import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler; import com.linkedin.metadata.search.elasticsearch.query.request.SearchAfterWrapper; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; +import com.linkedin.metadata.search.utils.QueryUtils; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.opentelemetry.extension.annotations.WithSpan; @@ -317,7 +319,7 @@ public Map aggregateByValue( int limit) { List entitySpecs; if (entityNames == null || entityNames.isEmpty()) { - entitySpecs = new ArrayList<>(entityRegistry.getEntitySpecs().values()); + entitySpecs = QueryUtils.getQueryByDefaultEntitySpecs(entityRegistry); } else { entitySpecs = entityNames.stream().map(entityRegistry::getEntitySpec).collect(Collectors.toList()); @@ -341,7 +343,7 @@ public Map aggregateByValue( MetricUtils.timer(this.getClass(), "aggregateByValue_search").time()) { final SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT); // extract results, validated against document model as well - return SearchRequestHandler.extractAggregationsFromResponse(searchResponse, field); + return AggregationQueryBuilder.extractAggregationsFromResponse(searchResponse, field); } catch (Exception e) { log.error("Aggregation query failed", e); throw new ESQueryException("Aggregation query failed:", e); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java index bdc0332b040df9..887d4b22f37e24 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java @@ -1,36 +1,71 @@ package com.linkedin.metadata.search.elasticsearch.query.request; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.search.utils.ESUtils.toFacetField; import static com.linkedin.metadata.utils.SearchUtil.*; +import com.linkedin.data.template.LongMap; import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.StructuredPropertyUtils; import com.linkedin.metadata.models.annotation.SearchableAnnotation; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.AggregationMetadata; +import com.linkedin.metadata.search.FilterValueArray; import com.linkedin.metadata.search.utils.ESUtils; +import com.linkedin.metadata.utils.SearchUtil; +import com.linkedin.util.Pair; +import io.opentelemetry.extension.annotations.WithSpan; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.function.BinaryOperator; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang.StringUtils; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.search.aggregations.Aggregation; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.Aggregations; +import org.opensearch.search.aggregations.bucket.missing.ParsedMissing; +import org.opensearch.search.aggregations.bucket.terms.ParsedTerms; +import org.opensearch.search.aggregations.bucket.terms.Terms; @Slf4j public class AggregationQueryBuilder { + private static final String URN_FILTER = "urn"; - private final SearchConfiguration _configs; - private final Set _defaultFacetFields; - private final Set _allFacetFields; + private final SearchConfiguration configs; + private final Set defaultFacetFields; + private final Set allFacetFields; + private final Map> entitySearchAnnotations; + + private Map filtersToDisplayName; public AggregationQueryBuilder( @Nonnull final SearchConfiguration configs, - @Nonnull final List annotations) { - this._configs = Objects.requireNonNull(configs, "configs must not be null"); - this._defaultFacetFields = getDefaultFacetFields(annotations); - this._allFacetFields = getAllFacetFields(annotations); + @Nonnull Map> entitySearchAnnotations) { + this.configs = Objects.requireNonNull(configs, "configs must not be null"); + this.entitySearchAnnotations = entitySearchAnnotations; + + List annotations = + this.entitySearchAnnotations.values().stream() + .flatMap(List::stream) + .collect(Collectors.toList()); + this.defaultFacetFields = getDefaultFacetFields(annotations); + this.allFacetFields = getAllFacetFields(annotations); } /** Get the set of default aggregations, across all facets. */ @@ -48,7 +83,7 @@ public List getAggregations(@Nullable List facets) { facetsToAggregate = facets.stream().filter(this::isValidAggregate).collect(Collectors.toSet()); } else { - facetsToAggregate = _defaultFacetFields; + facetsToAggregate = defaultFacetFields; } return facetsToAggregate.stream() .map(this::facetToAggregationBuilder) @@ -79,13 +114,13 @@ private boolean isValidAggregate(final String inputFacet) { !facets.isEmpty() && ((facets.size() == 1 && facets.get(0).startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD + ".")) - || _allFacetFields.containsAll(facets)); + || allFacetFields.containsAll(facets)); if (!isValid) { log.warn( String.format( "Requested facet for search filter aggregations that isn't part of the filters. " + "Provided: %s; Available: %s", - inputFacet, _allFacetFields)); + inputFacet, allFacetFields)); } return isValid; } @@ -122,11 +157,11 @@ private AggregationBuilder facetToAggregationBuilder(final String inputFacet) { facet.equalsIgnoreCase(INDEX_VIRTUAL_FIELD) ? AggregationBuilders.terms(inputFacet) .field(getAggregationField("_index")) - .size(_configs.getMaxTermBucketSize()) + .size(configs.getMaxTermBucketSize()) .minDocCount(0) : AggregationBuilders.terms(inputFacet) .field(getAggregationField(facet)) - .size(_configs.getMaxTermBucketSize()); + .size(configs.getMaxTermBucketSize()); } if (lastAggBuilder != null) { aggBuilder = aggBuilder.subAggregation(lastAggBuilder); @@ -173,4 +208,365 @@ List getAllFacetFieldsFromAnnotation(final SearchableAnnotation annotati } return facetsFromAnnotation; } + + private String computeDisplayName(String name) { + if (getFacetToDisplayNames().containsKey(name)) { + return getFacetToDisplayNames().get(name); + } else if (name.contains(AGGREGATION_SEPARATOR_CHAR)) { + return Arrays.stream(name.split(AGGREGATION_SEPARATOR_CHAR)) + .map(i -> getFacetToDisplayNames().get(i)) + .collect(Collectors.joining(AGGREGATION_SEPARATOR_CHAR)); + } + return name; + } + + List extractAggregationMetadata( + @Nonnull SearchResponse searchResponse, @Nullable Filter filter) { + final List aggregationMetadataList = new ArrayList<>(); + if (searchResponse.getAggregations() == null) { + return addFiltersToAggregationMetadata(aggregationMetadataList, filter); + } + for (Map.Entry entry : + searchResponse.getAggregations().getAsMap().entrySet()) { + if (entry.getValue() instanceof ParsedTerms) { + processTermAggregations(entry, aggregationMetadataList); + } + if (entry.getValue() instanceof ParsedMissing) { + processMissingAggregations(entry, aggregationMetadataList); + } + } + return addFiltersToAggregationMetadata(aggregationMetadataList, filter); + } + + private void processTermAggregations( + final Map.Entry entry, + final List aggregationMetadataList) { + final Map oneTermAggResult = + extractTermAggregations( + (ParsedTerms) entry.getValue(), entry.getKey().equals(INDEX_VIRTUAL_FIELD)); + if (oneTermAggResult.isEmpty()) { + return; + } + final AggregationMetadata aggregationMetadata = + new AggregationMetadata() + .setName(entry.getKey()) + .setDisplayName(computeDisplayName(entry.getKey())) + .setAggregations(new LongMap(oneTermAggResult)) + .setFilterValues( + new FilterValueArray( + SearchUtil.convertToFilters(oneTermAggResult, Collections.emptySet()))); + aggregationMetadataList.add(aggregationMetadata); + } + + /** + * Adds nested sub-aggregation values to the aggregated results + * + * @param aggs The aggregations to traverse. Could be null (base case) + * @return A map from names to aggregation count values + */ + @Nonnull + private static Map recursivelyAddNestedSubAggs(@Nullable Aggregations aggs) { + final Map aggResult = new HashMap<>(); + + if (aggs != null) { + for (Map.Entry entry : aggs.getAsMap().entrySet()) { + if (entry.getValue() instanceof ParsedTerms) { + recurseTermsAgg((ParsedTerms) entry.getValue(), aggResult, false); + } else if (entry.getValue() instanceof ParsedMissing) { + recurseMissingAgg((ParsedMissing) entry.getValue(), aggResult); + } else { + throw new UnsupportedOperationException( + "Unsupported aggregation type: " + entry.getValue().getClass().getName()); + } + } + } + return aggResult; + } + + private static void recurseTermsAgg( + ParsedTerms terms, Map aggResult, boolean includeZeroes) { + List bucketList = terms.getBuckets(); + bucketList.forEach(bucket -> processTermBucket(bucket, aggResult, includeZeroes)); + } + + private static void processTermBucket( + Terms.Bucket bucket, Map aggResult, boolean includeZeroes) { + String key = bucket.getKeyAsString(); + // Gets filtered sub aggregation doc count if exist + Map subAggs = recursivelyAddNestedSubAggs(bucket.getAggregations()); + subAggs.forEach( + (entryKey, entryValue) -> + aggResult.put( + String.format("%s%s%s", key, AGGREGATION_SEPARATOR_CHAR, entryKey), entryValue)); + long docCount = bucket.getDocCount(); + if (includeZeroes || docCount > 0) { + aggResult.put(key, docCount); + } + } + + private static void recurseMissingAgg(ParsedMissing missing, Map aggResult) { + Map subAggs = recursivelyAddNestedSubAggs(missing.getAggregations()); + subAggs.forEach( + (key, value) -> + aggResult.put( + String.format("%s%s%s", missing.getName(), AGGREGATION_SEPARATOR_CHAR, key), + value)); + long docCount = missing.getDocCount(); + if (docCount > 0) { + aggResult.put(missing.getName(), docCount); + } + } + + /** + * Extracts term aggregations give a parsed term. + * + * @param terms an abstract parse term, input can be either ParsedStringTerms ParsedLongTerms + * @return a map with aggregation key and corresponding doc counts + */ + @Nonnull + private static Map extractTermAggregations( + @Nonnull ParsedTerms terms, boolean includeZeroes) { + + final Map aggResult = new HashMap<>(); + recurseTermsAgg(terms, aggResult, includeZeroes); + + return aggResult; + } + + /** Injects the missing conjunctive filters into the aggregations list. */ + public List addFiltersToAggregationMetadata( + @Nonnull final List originalMetadata, @Nullable final Filter filter) { + if (filter == null) { + return originalMetadata; + } + if (filter.getOr() != null) { + addOrFiltersToAggregationMetadata(filter.getOr(), originalMetadata); + } else if (filter.getCriteria() != null) { + addCriteriaFiltersToAggregationMetadata(filter.getCriteria(), originalMetadata); + } + return originalMetadata; + } + + void addOrFiltersToAggregationMetadata( + @Nonnull final ConjunctiveCriterionArray or, + @Nonnull final List originalMetadata) { + for (ConjunctiveCriterion conjunction : or) { + // For each item in the conjunction, inject an empty aggregation if necessary + addCriteriaFiltersToAggregationMetadata(conjunction.getAnd(), originalMetadata); + } + } + + private void addCriteriaFiltersToAggregationMetadata( + @Nonnull final CriterionArray criteria, + @Nonnull final List originalMetadata) { + for (Criterion criterion : criteria) { + addCriterionFiltersToAggregationMetadata(criterion, originalMetadata); + } + } + + private void addCriterionFiltersToAggregationMetadata( + @Nonnull final Criterion criterion, + @Nonnull final List aggregationMetadata) { + + // We should never see duplicate aggregation for the same field in aggregation metadata list. + final Map aggregationMetadataMap = + aggregationMetadata.stream() + .collect(Collectors.toMap(AggregationMetadata::getName, agg -> agg)); + + // Map a filter criterion to a facet field (e.g. domains.keyword -> domains) + final String finalFacetField = toFacetField(criterion.getField()); + + if (finalFacetField == null) { + log.warn( + String.format( + "Found invalid filter field for entity search. Invalid or unrecognized facet %s", + criterion.getField())); + return; + } + + // We don't want to add urn filters to the aggregations we return as a sidecar to search + // results. + // They are automatically added by searchAcrossLineage and we dont need them to show up in the + // filter panel. + if (finalFacetField.equals(URN_FILTER)) { + return; + } + + if (aggregationMetadataMap.containsKey(finalFacetField)) { + /* + * If we already have aggregations for the facet field, simply inject any missing values counts into the set. + * If there are no results for a particular facet value, it will NOT be in the original aggregation set returned by + * Elasticsearch. + */ + AggregationMetadata originalAggMetadata = aggregationMetadataMap.get(finalFacetField); + if (criterion.hasValues()) { + criterion + .getValues() + .forEach( + value -> + addMissingAggregationValueToAggregationMetadata(value, originalAggMetadata)); + } else { + addMissingAggregationValueToAggregationMetadata(criterion.getValue(), originalAggMetadata); + } + } else { + /* + * If we do not have ANY aggregation for the facet field, then inject a new aggregation metadata object for the + * facet field. + * If there are no results for a particular facet, it will NOT be in the original aggregation set returned by + * Elasticsearch. + */ + aggregationMetadata.add( + buildAggregationMetadata( + finalFacetField, + getFacetToDisplayNames().getOrDefault(finalFacetField, finalFacetField), + new LongMap( + criterion.getValues().stream().collect(Collectors.toMap(i -> i, i -> 0L))), + new FilterValueArray( + criterion.getValues().stream() + .map(value -> createFilterValue(value, 0L, true)) + .collect(Collectors.toList())))); + } + } + + private void addMissingAggregationValueToAggregationMetadata( + @Nonnull final String value, @Nonnull final AggregationMetadata originalMetadata) { + if (originalMetadata.getAggregations().entrySet().stream() + .noneMatch(entry -> value.equals(entry.getKey())) + || originalMetadata.getFilterValues().stream() + .noneMatch(entry -> entry.getValue().equals(value))) { + // No aggregation found for filtered value -- inject one! + originalMetadata.getAggregations().put(value, 0L); + originalMetadata.getFilterValues().add(createFilterValue(value, 0L, true)); + } + } + + private AggregationMetadata buildAggregationMetadata( + @Nonnull final String facetField, + @Nonnull final String displayName, + @Nonnull final LongMap aggValues, + @Nonnull final FilterValueArray filterValues) { + return new AggregationMetadata() + .setName(facetField) + .setDisplayName(displayName) + .setAggregations(aggValues) + .setFilterValues(filterValues); + } + + private List>> getFacetFieldDisplayNameFromAnnotation( + @Nonnull EntitySpec entitySpec, @Nonnull final SearchableAnnotation annotation) { + final List>> facetsFromAnnotation = new ArrayList<>(); + // Case 1: Default Keyword field + if (annotation.isAddToFilters()) { + facetsFromAnnotation.add( + Pair.of( + annotation.getFieldName(), + Pair.of(entitySpec.getName(), annotation.getFilterName()))); + } + // Case 2: HasX boolean field + if (annotation.isAddHasValuesToFilters() && annotation.getHasValuesFieldName().isPresent()) { + facetsFromAnnotation.add( + Pair.of( + annotation.getHasValuesFieldName().get(), + Pair.of(entitySpec.getName(), annotation.getHasValuesFilterName()))); + } + return facetsFromAnnotation; + } + + @WithSpan + public static Map extractAggregationsFromResponse( + @Nonnull SearchResponse searchResponse, @Nonnull String aggregationName) { + if (searchResponse.getAggregations() == null) { + return Collections.emptyMap(); + } + + Aggregation aggregation = searchResponse.getAggregations().get(aggregationName); + if (aggregation == null) { + return Collections.emptyMap(); + } + if (aggregation instanceof ParsedTerms) { + return extractTermAggregations( + (ParsedTerms) aggregation, aggregationName.equals("_entityType")); + } else if (aggregation instanceof ParsedMissing) { + return Collections.singletonMap( + aggregation.getName(), ((ParsedMissing) aggregation).getDocCount()); + } + throw new UnsupportedOperationException( + "Unsupported aggregation type: " + aggregation.getClass().getName()); + } + + /** + * Only used in aggregation queries, lazy load + * + * @return map of field name to facet display names + */ + private Map getFacetToDisplayNames() { + if (filtersToDisplayName == null) { + // Validate field names + Map>>> validateFieldMap = + entitySearchAnnotations.entrySet().stream() + .flatMap( + entry -> + entry.getValue().stream() + .flatMap( + annotation -> + getFacetFieldDisplayNameFromAnnotation(entry.getKey(), annotation) + .stream())) + .collect(Collectors.groupingBy(Pair::getFirst, Collectors.toSet())); + for (Map.Entry>>> entry : + validateFieldMap.entrySet()) { + if (entry.getValue().stream().map(i -> i.getSecond().getSecond()).distinct().count() > 1) { + Map>> displayNameEntityMap = + entry.getValue().stream() + .map(Pair::getSecond) + .collect(Collectors.groupingBy(Pair::getSecond, Collectors.toSet())); + throw new IllegalStateException( + String.format( + "Facet field collision on field `%s`. Incompatible Display Name across entities. Multiple Display Names detected: %s", + entry.getKey(), displayNameEntityMap)); + } + } + + filtersToDisplayName = + entitySearchAnnotations.entrySet().stream() + .flatMap( + entry -> + entry.getValue().stream() + .flatMap( + annotation -> + getFacetFieldDisplayNameFromAnnotation(entry.getKey(), annotation) + .stream())) + .collect( + Collectors.toMap(Pair::getFirst, p -> p.getSecond().getSecond(), mapMerger())); + filtersToDisplayName.put(INDEX_VIRTUAL_FIELD, "Type"); + } + + return filtersToDisplayName; + } + + private void processMissingAggregations( + final Map.Entry entry, + final List aggregationMetadataList) { + ParsedMissing parsedMissing = (ParsedMissing) entry.getValue(); + Long docCount = parsedMissing.getDocCount(); + LongMap longMap = new LongMap(); + longMap.put(entry.getKey(), docCount); + final AggregationMetadata aggregationMetadata = + new AggregationMetadata() + .setName(entry.getKey()) + .setDisplayName(computeDisplayName(entry.getKey())) + .setAggregations(longMap) + .setFilterValues( + new FilterValueArray(SearchUtil.convertToFilters(longMap, Collections.emptySet()))); + aggregationMetadataList.add(aggregationMetadata); + } + + // If values are not equal, throw error + private BinaryOperator mapMerger() { + return (s1, s2) -> { + if (!StringUtils.equals(s1, s2)) { + throw new IllegalStateException(String.format("Unable to merge values %s and %s", s1, s2)); + } + return s1; + }; + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 277e15e1334d56..3ac05ed122cd70 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -1,7 +1,6 @@ package com.linkedin.metadata.search.elasticsearch.query.request; import static com.linkedin.metadata.search.utils.ESUtils.NAME_SUGGESTION; -import static com.linkedin.metadata.search.utils.ESUtils.toFacetField; import static com.linkedin.metadata.search.utils.SearchUtils.applyDefaultSearchFlags; import static com.linkedin.metadata.utils.SearchUtil.*; @@ -10,22 +9,16 @@ import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.DoubleMap; -import com.linkedin.data.template.LongMap; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; import com.linkedin.metadata.query.SearchFlags; -import com.linkedin.metadata.query.filter.ConjunctiveCriterion; -import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; -import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.search.AggregationMetadata; import com.linkedin.metadata.search.AggregationMetadataArray; -import com.linkedin.metadata.search.FilterValueArray; import com.linkedin.metadata.search.MatchedField; import com.linkedin.metadata.search.MatchedFieldArray; import com.linkedin.metadata.search.ScrollResult; @@ -37,13 +30,11 @@ import com.linkedin.metadata.search.SearchSuggestionArray; import com.linkedin.metadata.search.features.Features; import com.linkedin.metadata.search.utils.ESUtils; -import com.linkedin.metadata.utils.SearchUtil; import com.linkedin.util.Pair; import io.opentelemetry.extension.annotations.WithSpan; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -51,13 +42,11 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import java.util.function.BinaryOperator; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang.StringUtils; import org.opensearch.action.search.SearchRequest; import org.opensearch.action.search.SearchResponse; import org.opensearch.common.text.Text; @@ -66,12 +55,7 @@ import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.SearchHit; -import org.opensearch.search.aggregations.Aggregation; import org.opensearch.search.aggregations.AggregationBuilders; -import org.opensearch.search.aggregations.Aggregations; -import org.opensearch.search.aggregations.bucket.missing.ParsedMissing; -import org.opensearch.search.aggregations.bucket.terms.ParsedTerms; -import org.opensearch.search.aggregations.bucket.terms.Terms; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import org.opensearch.search.fetch.subphase.highlight.HighlightField; @@ -88,11 +72,9 @@ public class SearchRequestHandler { .setSkipHighlighting(false); private static final Map, SearchRequestHandler> REQUEST_HANDLER_BY_ENTITY_NAME = new ConcurrentHashMap<>(); - private static final String URN_FILTER = "urn"; private final List _entitySpecs; private final Set _defaultQueryFieldNames; private final HighlightBuilder _highlights; - private final Map _filtersToDisplayName; private final SearchConfiguration _configs; private final SearchQueryBuilder _searchQueryBuilder; @@ -111,16 +93,16 @@ private SearchRequestHandler( @Nonnull SearchConfiguration configs, @Nullable CustomSearchConfiguration customSearchConfiguration) { _entitySpecs = entitySpecs; - List annotations = getSearchableAnnotations(); + Map> entitySearchAnnotations = + getSearchableAnnotations(); + List annotations = + entitySearchAnnotations.values().stream() + .flatMap(List::stream) + .collect(Collectors.toList()); _defaultQueryFieldNames = getDefaultQueryFieldNames(annotations); - _filtersToDisplayName = - annotations.stream() - .flatMap(annotation -> getFacetFieldDisplayNameFromAnnotation(annotation).stream()) - .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond, mapMerger())); - _filtersToDisplayName.put(INDEX_VIRTUAL_FIELD, "Type"); _highlights = getHighlights(); _searchQueryBuilder = new SearchQueryBuilder(configs, customSearchConfiguration); - _aggregationQueryBuilder = new AggregationQueryBuilder(configs, annotations); + _aggregationQueryBuilder = new AggregationQueryBuilder(configs, entitySearchAnnotations); _configs = configs; searchableFieldTypes = _entitySpecs.stream() @@ -153,12 +135,16 @@ public static SearchRequestHandler getBuilder( k -> new SearchRequestHandler(entitySpecs, configs, customSearchConfiguration)); } - private List getSearchableAnnotations() { + private Map> getSearchableAnnotations() { return _entitySpecs.stream() - .map(EntitySpec::getSearchableFieldSpecs) - .flatMap(List::stream) - .map(SearchableFieldSpec::getSearchableAnnotation) - .collect(Collectors.toList()); + .map( + spec -> + Pair.of( + spec, + spec.getSearchableFieldSpecs().stream() + .map(SearchableFieldSpec::getSearchableAnnotation) + .collect(Collectors.toList()))) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); } @VisibleForTesting @@ -171,16 +157,6 @@ private Set getDefaultQueryFieldNames(List annotat .collect(Collectors.toSet()); } - // If values are not equal, throw error - private BinaryOperator mapMerger() { - return (s1, s2) -> { - if (!StringUtils.equals(s1, s2)) { - throw new IllegalStateException(String.format("Unable to merge values %s and %s", s1, s2)); - } - return s1; - }; - } - public BoolQueryBuilder getFilterQuery(@Nullable Filter filter) { return getFilterQuery(filter, searchableFieldTypes); } @@ -327,42 +303,6 @@ public SearchRequest getFilterRequest( return searchRequest; } - /** - * Returns a {@link SearchRequest} given filters to be applied to search query and sort criterion - * to be applied to search results. - * - *

TODO: Used in batch ingestion from ingestion scheduler - * - * @param filters {@link Filter} list of conditions with fields and values - * @param sortCriterion {@link SortCriterion} to be applied to the search results - * @param sort sort values from last result of previous request - * @param pitId the Point In Time Id of the previous request - * @param keepAlive string representation of time to keep point in time alive - * @param size the number of search hits to return - * @return {@link SearchRequest} that contains the filtered query - */ - @Nonnull - public SearchRequest getFilterRequest( - @Nullable Filter filters, - @Nullable SortCriterion sortCriterion, - @Nullable Object[] sort, - @Nullable String pitId, - @Nonnull String keepAlive, - int size) { - SearchRequest searchRequest = new SearchRequest(); - - BoolQueryBuilder filterQuery = getFilterQuery(filters); - final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); - searchSourceBuilder.query(filterQuery); - searchSourceBuilder.size(size); - - ESUtils.setSearchAfter(searchSourceBuilder, sort, pitId, keepAlive); - ESUtils.buildSortOrder(searchSourceBuilder, sortCriterion, _entitySpecs); - searchRequest.source(searchSourceBuilder); - - return searchRequest; - } - /** * Get search request to aggregate and get document counts per field value * @@ -558,7 +498,7 @@ private SearchResultMetadata extractSearchResultMetadata( new SearchResultMetadata().setAggregations(new AggregationMetadataArray()); final List aggregationMetadataList = - extractAggregationMetadata(searchResponse, filter); + _aggregationQueryBuilder.extractAggregationMetadata(searchResponse, filter); searchResultMetadata.setAggregations(new AggregationMetadataArray(aggregationMetadataList)); final List searchSuggestions = extractSearchSuggestions(searchResponse); @@ -588,301 +528,4 @@ private List extractSearchSuggestions(@Nonnull SearchResponse } return searchSuggestions; } - - private String computeDisplayName(String name) { - if (_filtersToDisplayName.containsKey(name)) { - return _filtersToDisplayName.get(name); - } else if (name.contains(AGGREGATION_SEPARATOR_CHAR)) { - return Arrays.stream(name.split(AGGREGATION_SEPARATOR_CHAR)) - .map(_filtersToDisplayName::get) - .collect(Collectors.joining(AGGREGATION_SEPARATOR_CHAR)); - } - return name; - } - - private List extractAggregationMetadata( - @Nonnull SearchResponse searchResponse, @Nullable Filter filter) { - final List aggregationMetadataList = new ArrayList<>(); - if (searchResponse.getAggregations() == null) { - return addFiltersToAggregationMetadata(aggregationMetadataList, filter); - } - for (Map.Entry entry : - searchResponse.getAggregations().getAsMap().entrySet()) { - if (entry.getValue() instanceof ParsedTerms) { - processTermAggregations(entry, aggregationMetadataList); - } - if (entry.getValue() instanceof ParsedMissing) { - processMissingAggregations(entry, aggregationMetadataList); - } - } - return addFiltersToAggregationMetadata(aggregationMetadataList, filter); - } - - private void processTermAggregations( - final Map.Entry entry, - final List aggregationMetadataList) { - final Map oneTermAggResult = - extractTermAggregations( - (ParsedTerms) entry.getValue(), entry.getKey().equals(INDEX_VIRTUAL_FIELD)); - if (oneTermAggResult.isEmpty()) { - return; - } - final AggregationMetadata aggregationMetadata = - new AggregationMetadata() - .setName(entry.getKey()) - .setDisplayName(computeDisplayName(entry.getKey())) - .setAggregations(new LongMap(oneTermAggResult)) - .setFilterValues( - new FilterValueArray( - SearchUtil.convertToFilters(oneTermAggResult, Collections.emptySet()))); - aggregationMetadataList.add(aggregationMetadata); - } - - private void processMissingAggregations( - final Map.Entry entry, - final List aggregationMetadataList) { - ParsedMissing parsedMissing = (ParsedMissing) entry.getValue(); - Long docCount = parsedMissing.getDocCount(); - LongMap longMap = new LongMap(); - longMap.put(entry.getKey(), docCount); - final AggregationMetadata aggregationMetadata = - new AggregationMetadata() - .setName(entry.getKey()) - .setDisplayName(computeDisplayName(entry.getKey())) - .setAggregations(longMap) - .setFilterValues( - new FilterValueArray(SearchUtil.convertToFilters(longMap, Collections.emptySet()))); - aggregationMetadataList.add(aggregationMetadata); - } - - @WithSpan - public static Map extractAggregationsFromResponse( - @Nonnull SearchResponse searchResponse, @Nonnull String aggregationName) { - if (searchResponse.getAggregations() == null) { - return Collections.emptyMap(); - } - - Aggregation aggregation = searchResponse.getAggregations().get(aggregationName); - if (aggregation == null) { - return Collections.emptyMap(); - } - if (aggregation instanceof ParsedTerms) { - return extractTermAggregations( - (ParsedTerms) aggregation, aggregationName.equals("_entityType")); - } else if (aggregation instanceof ParsedMissing) { - return Collections.singletonMap( - aggregation.getName(), ((ParsedMissing) aggregation).getDocCount()); - } - throw new UnsupportedOperationException( - "Unsupported aggregation type: " + aggregation.getClass().getName()); - } - - /** - * Adds nested sub-aggregation values to the aggregated results - * - * @param aggs The aggregations to traverse. Could be null (base case) - * @return A map from names to aggregation count values - */ - @Nonnull - private static Map recursivelyAddNestedSubAggs(@Nullable Aggregations aggs) { - final Map aggResult = new HashMap<>(); - - if (aggs != null) { - for (Map.Entry entry : aggs.getAsMap().entrySet()) { - if (entry.getValue() instanceof ParsedTerms) { - recurseTermsAgg((ParsedTerms) entry.getValue(), aggResult, false); - } else if (entry.getValue() instanceof ParsedMissing) { - recurseMissingAgg((ParsedMissing) entry.getValue(), aggResult); - } else { - throw new UnsupportedOperationException( - "Unsupported aggregation type: " + entry.getValue().getClass().getName()); - } - } - } - return aggResult; - } - - private static void recurseTermsAgg( - ParsedTerms terms, Map aggResult, boolean includeZeroes) { - List bucketList = terms.getBuckets(); - bucketList.forEach(bucket -> processTermBucket(bucket, aggResult, includeZeroes)); - } - - private static void processTermBucket( - Terms.Bucket bucket, Map aggResult, boolean includeZeroes) { - String key = bucket.getKeyAsString(); - // Gets filtered sub aggregation doc count if exist - Map subAggs = recursivelyAddNestedSubAggs(bucket.getAggregations()); - subAggs.forEach( - (entryKey, entryValue) -> - aggResult.put( - String.format("%s%s%s", key, AGGREGATION_SEPARATOR_CHAR, entryKey), entryValue)); - long docCount = bucket.getDocCount(); - if (includeZeroes || docCount > 0) { - aggResult.put(key, docCount); - } - } - - private static void recurseMissingAgg(ParsedMissing missing, Map aggResult) { - Map subAggs = recursivelyAddNestedSubAggs(missing.getAggregations()); - subAggs.forEach( - (key, value) -> - aggResult.put( - String.format("%s%s%s", missing.getName(), AGGREGATION_SEPARATOR_CHAR, key), - value)); - long docCount = missing.getDocCount(); - if (docCount > 0) { - aggResult.put(missing.getName(), docCount); - } - } - - /** - * Extracts term aggregations give a parsed term. - * - * @param terms an abstract parse term, input can be either ParsedStringTerms ParsedLongTerms - * @return a map with aggregation key and corresponding doc counts - */ - @Nonnull - private static Map extractTermAggregations( - @Nonnull ParsedTerms terms, boolean includeZeroes) { - - final Map aggResult = new HashMap<>(); - recurseTermsAgg(terms, aggResult, includeZeroes); - - return aggResult; - } - - /** Injects the missing conjunctive filters into the aggregations list. */ - public List addFiltersToAggregationMetadata( - @Nonnull final List originalMetadata, @Nullable final Filter filter) { - if (filter == null) { - return originalMetadata; - } - if (filter.getOr() != null) { - addOrFiltersToAggregationMetadata(filter.getOr(), originalMetadata); - } else if (filter.getCriteria() != null) { - addCriteriaFiltersToAggregationMetadata(filter.getCriteria(), originalMetadata); - } - return originalMetadata; - } - - void addOrFiltersToAggregationMetadata( - @Nonnull final ConjunctiveCriterionArray or, - @Nonnull final List originalMetadata) { - for (ConjunctiveCriterion conjunction : or) { - // For each item in the conjunction, inject an empty aggregation if necessary - addCriteriaFiltersToAggregationMetadata(conjunction.getAnd(), originalMetadata); - } - } - - private void addCriteriaFiltersToAggregationMetadata( - @Nonnull final CriterionArray criteria, - @Nonnull final List originalMetadata) { - for (Criterion criterion : criteria) { - addCriterionFiltersToAggregationMetadata(criterion, originalMetadata); - } - } - - private void addCriterionFiltersToAggregationMetadata( - @Nonnull final Criterion criterion, - @Nonnull final List aggregationMetadata) { - - // We should never see duplicate aggregation for the same field in aggregation metadata list. - final Map aggregationMetadataMap = - aggregationMetadata.stream() - .collect(Collectors.toMap(AggregationMetadata::getName, agg -> agg)); - - // Map a filter criterion to a facet field (e.g. domains.keyword -> domains) - final String finalFacetField = toFacetField(criterion.getField()); - - if (finalFacetField == null) { - log.warn( - String.format( - "Found invalid filter field for entity search. Invalid or unrecognized facet %s", - criterion.getField())); - return; - } - - // We don't want to add urn filters to the aggregations we return as a sidecar to search - // results. - // They are automatically added by searchAcrossLineage and we dont need them to show up in the - // filter panel. - if (finalFacetField.equals(URN_FILTER)) { - return; - } - - if (aggregationMetadataMap.containsKey(finalFacetField)) { - /* - * If we already have aggregations for the facet field, simply inject any missing values counts into the set. - * If there are no results for a particular facet value, it will NOT be in the original aggregation set returned by - * Elasticsearch. - */ - AggregationMetadata originalAggMetadata = aggregationMetadataMap.get(finalFacetField); - if (criterion.hasValues()) { - criterion - .getValues() - .forEach( - value -> - addMissingAggregationValueToAggregationMetadata(value, originalAggMetadata)); - } else { - addMissingAggregationValueToAggregationMetadata(criterion.getValue(), originalAggMetadata); - } - } else { - /* - * If we do not have ANY aggregation for the facet field, then inject a new aggregation metadata object for the - * facet field. - * If there are no results for a particular facet, it will NOT be in the original aggregation set returned by - * Elasticsearch. - */ - aggregationMetadata.add( - buildAggregationMetadata( - finalFacetField, - _filtersToDisplayName.getOrDefault(finalFacetField, finalFacetField), - new LongMap( - criterion.getValues().stream().collect(Collectors.toMap(i -> i, i -> 0L))), - new FilterValueArray( - criterion.getValues().stream() - .map(value -> createFilterValue(value, 0L, true)) - .collect(Collectors.toList())))); - } - } - - private void addMissingAggregationValueToAggregationMetadata( - @Nonnull final String value, @Nonnull final AggregationMetadata originalMetadata) { - if (originalMetadata.getAggregations().entrySet().stream() - .noneMatch(entry -> value.equals(entry.getKey())) - || originalMetadata.getFilterValues().stream() - .noneMatch(entry -> entry.getValue().equals(value))) { - // No aggregation found for filtered value -- inject one! - originalMetadata.getAggregations().put(value, 0L); - originalMetadata.getFilterValues().add(createFilterValue(value, 0L, true)); - } - } - - private AggregationMetadata buildAggregationMetadata( - @Nonnull final String facetField, - @Nonnull final String displayName, - @Nonnull final LongMap aggValues, - @Nonnull final FilterValueArray filterValues) { - return new AggregationMetadata() - .setName(facetField) - .setDisplayName(displayName) - .setAggregations(aggValues) - .setFilterValues(filterValues); - } - - private List> getFacetFieldDisplayNameFromAnnotation( - @Nonnull final SearchableAnnotation annotation) { - final List> facetsFromAnnotation = new ArrayList<>(); - // Case 1: Default Keyword field - if (annotation.isAddToFilters()) { - facetsFromAnnotation.add(Pair.of(annotation.getFieldName(), annotation.getFilterName())); - } - // Case 2: HasX boolean field - if (annotation.isAddHasValuesToFilters() && annotation.getHasValuesFieldName().isPresent()) { - facetsFromAnnotation.add( - Pair.of(annotation.getHasValuesFieldName().get(), annotation.getHasValuesFilterName())); - } - return facetsFromAnnotation; - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/BrowsePathUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/BrowsePathUtils.java index af0f537de86292..4152122c381dab 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/BrowsePathUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/BrowsePathUtils.java @@ -29,8 +29,7 @@ public class BrowsePathUtils { public static String getDefaultBrowsePath( @Nonnull Urn urn, @Nonnull EntityRegistry entityRegistry, - @Nonnull Character dataPlatformDelimiter) - throws URISyntaxException { + @Nonnull Character dataPlatformDelimiter) { switch (urn.getEntityType()) { case Constants.DATASET_ENTITY_NAME: diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/BrowsePathV2Utils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/BrowsePathV2Utils.java index 961167663e11f7..a531c268ed7d29 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/BrowsePathV2Utils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/BrowsePathV2Utils.java @@ -16,7 +16,6 @@ import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.utils.EntityKeyUtils; -import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -46,9 +45,8 @@ public static BrowsePathsV2 getDefaultBrowsePathV2( @Nonnull Urn urn, @Nonnull EntityRegistry entityRegistry, @Nonnull Character dataPlatformDelimiter, - @Nonnull EntityService entityService, - boolean useContainerPaths) - throws URISyntaxException { + @Nonnull EntityService entityService, + boolean useContainerPaths) { BrowsePathsV2 result = new BrowsePathsV2(); BrowsePathEntryArray browsePathEntries = new BrowsePathEntryArray(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index ed633b063afb21..3c73d1acab5c25 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -160,8 +160,8 @@ private void handleUpdateChangeEvent(@Nonnull final MCLBatchItem event) throws I final AspectSpec aspectSpec = event.getAspectSpec(); final Urn urn = event.getUrn(); - RecordTemplate aspect = event.getAspect(); - RecordTemplate previousAspect = event.getPreviousAspect(); + RecordTemplate aspect = event.getRecordTemplate(); + RecordTemplate previousAspect = event.getPreviousRecordTemplate(); // Step 0. If the aspect is timeseries, add to its timeseries index. if (aspectSpec.isTimeseries()) { @@ -264,7 +264,7 @@ private void handleDeleteChangeEvent(@Nonnull final MCLBatchItem event) { urn.getEntityType(), event.getAspectName())); } - RecordTemplate aspect = event.getAspect(); + RecordTemplate aspect = event.getRecordTemplate(); Boolean isDeletingKey = event.getAspectName().equals(entitySpec.getKeyAspectName()); if (!aspectSpec.isTimeseries()) { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java index 451b7327224986..72bbc794171ff9 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java @@ -38,7 +38,7 @@ public static Map ingestCorpUserKeyAspects( MCPUpsertBatchItem.builder() .urn(urn) .aspectName(aspectName) - .aspect(aspect) + .recordTemplate(aspect) .auditStamp(AspectGenerationUtils.createAuditStamp()) .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .build(entityService)); @@ -68,7 +68,7 @@ public static Map ingestCorpUserInfoAspects( MCPUpsertBatchItem.builder() .urn(urn) .aspectName(aspectName) - .aspect(aspect) + .recordTemplate(aspect) .auditStamp(AspectGenerationUtils.createAuditStamp()) .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .build(entityService)); @@ -99,7 +99,7 @@ public static Map ingestChartInfoAspects( MCPUpsertBatchItem.builder() .urn(urn) .aspectName(aspectName) - .aspect(aspect) + .recordTemplate(aspect) .auditStamp(AspectGenerationUtils.createAuditStamp()) .systemMetadata(AspectGenerationUtils.createSystemMetadata()) .build(entityService)); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java similarity index 56% rename from metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java index 258b40cac63715..308832a9c63ef0 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtilTest.java @@ -1,31 +1,35 @@ -package com.linkedin.metadata; +package com.linkedin.metadata.aspect.utils; import static org.mockito.Mockito.*; +import com.linkedin.common.AuditStamp; import com.linkedin.common.FabricType; import com.linkedin.common.urn.DataPlatformUrn; import com.linkedin.common.urn.DatasetUrn; -import com.linkedin.dataset.DatasetProperties; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.EbeanTestUtils; +import com.linkedin.metadata.aspect.batch.MCPBatchItem; +import com.linkedin.metadata.aspect.patch.builder.DatasetPropertiesPatchBuilder; +import com.linkedin.metadata.config.EbeanConfiguration; import com.linkedin.metadata.config.PreProcessHooks; -import com.linkedin.metadata.entity.AspectUtils; import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.TestEntityRegistry; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistryException; import com.linkedin.metadata.models.registry.MergedEntityRegistry; import com.linkedin.metadata.snapshot.Snapshot; -import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; import io.ebean.Database; import java.util.List; +import java.util.stream.Collectors; import org.testng.Assert; import org.testng.annotations.Test; -public class AspectUtilsTest { +public class DefaultAspectsUtilTest { protected final EntityRegistry _snapshotEntityRegistry = new TestEntityRegistry(); protected final EntityRegistry _configEntityRegistry = @@ -34,31 +38,41 @@ public class AspectUtilsTest { protected final EntityRegistry _testEntityRegistry = new MergedEntityRegistry(_snapshotEntityRegistry).apply(_configEntityRegistry); - public AspectUtilsTest() throws EntityRegistryException {} + public DefaultAspectsUtilTest() throws EntityRegistryException {} @Test public void testAdditionalChanges() { - Database server = EbeanTestUtils.createTestServer(AspectUtilsTest.class.getSimpleName()); - EbeanAspectDao aspectDao = new EbeanAspectDao(server); + Database server = EbeanTestUtils.createTestServer(DefaultAspectsUtilTest.class.getSimpleName()); + EbeanAspectDao aspectDao = new EbeanAspectDao(server, EbeanConfiguration.testDefault); aspectDao.setConnectionValidated(true); EventProducer mockProducer = mock(EventProducer.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); EntityServiceImpl entityServiceImpl = new EntityServiceImpl( - aspectDao, mockProducer, _testEntityRegistry, true, null, preProcessHooks); + aspectDao, mockProducer, _testEntityRegistry, true, null, preProcessHooks, false); - MetadataChangeProposal proposal1 = new MetadataChangeProposal(); - proposal1.setEntityUrn( - new DatasetUrn(new DataPlatformUrn("platform"), "name", FabricType.PROD)); - proposal1.setAspectName("datasetProperties"); - DatasetProperties datasetProperties = new DatasetProperties().setName("name"); - proposal1.setAspect(GenericRecordUtils.serializeAspect(datasetProperties)); - proposal1.setEntityType("dataset"); - proposal1.setChangeType(ChangeType.PATCH); + MetadataChangeProposal proposal1 = + new DatasetPropertiesPatchBuilder() + .urn(new DatasetUrn(new DataPlatformUrn("platform"), "name", FabricType.PROD)) + .setDescription("something") + .setName("name") + .addCustomProperty("prop1", "propVal1") + .addCustomProperty("prop2", "propVal2") + .build(); + + Assert.assertEquals(proposal1.getChangeType(), ChangeType.PATCH); List proposalList = - AspectUtils.getAdditionalChanges(proposal1, entityServiceImpl); + DefaultAspectsUtil.getAdditionalChanges( + AspectsBatchImpl.builder() + .mcps(List.of(proposal1), new AuditStamp(), entityServiceImpl) + .build(), + entityServiceImpl, + false) + .stream() + .map(MCPBatchItem::getMetadataChangeProposal) + .collect(Collectors.toList()); // proposals for key aspect, browsePath, browsePathV2, dataPlatformInstance Assert.assertEquals(proposalList.size(), 4); Assert.assertEquals(proposalList.get(0).getChangeType(), ChangeType.UPSERT); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraAspectMigrationsDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraAspectMigrationsDaoTest.java index d94de604bf44de..d191ea2b9fa971 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraAspectMigrationsDaoTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraAspectMigrationsDaoTest.java @@ -54,7 +54,8 @@ private void configureComponents() { _testEntityRegistry, true, _mockUpdateIndicesService, - preProcessHooks); + preProcessHooks, + true); _retentionService = new CassandraRetentionService(_entityServiceImpl, session, 1000); _entityServiceImpl.setRetentionService(_retentionService); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java index bad47f9acf507c..8d30fb02915c70 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java @@ -77,7 +77,8 @@ private void configureComponents() { _testEntityRegistry, false, _mockUpdateIndicesService, - preProcessHooks); + preProcessHooks, + true); _retentionService = new CassandraRetentionService(_entityServiceImpl, session, 1000); _entityServiceImpl.setRetentionService(_retentionService); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java index 496744770dba8a..42fa2acb542375 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java @@ -59,7 +59,8 @@ public DeleteEntityServiceTest() { _entityRegistry, true, _mockUpdateIndicesService, - preProcessHooks); + preProcessHooks, + true); _deleteEntityService = new DeleteEntityService(_entityServiceImpl, _graphService); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java index 2430ebb1f94bec..d241fb3b9581b4 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java @@ -7,6 +7,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.metadata.AspectIngestionUtils; import com.linkedin.metadata.EbeanTestUtils; +import com.linkedin.metadata.config.EbeanConfiguration; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; import com.linkedin.metadata.entity.ebean.EbeanRetentionService; @@ -32,7 +33,7 @@ public void setupTest() { Database server = EbeanTestUtils.createTestServer(EbeanAspectMigrationsDaoTest.class.getSimpleName()); _mockProducer = mock(EventProducer.class); - EbeanAspectDao dao = new EbeanAspectDao(server); + EbeanAspectDao dao = new EbeanAspectDao(server, EbeanConfiguration.testDefault); dao.setConnectionValidated(true); _mockUpdateIndicesService = mock(UpdateIndicesService.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); @@ -44,7 +45,8 @@ public void setupTest() { _testEntityRegistry, true, _mockUpdateIndicesService, - preProcessHooks); + preProcessHooks, + true); _retentionService = new EbeanRetentionService(_entityServiceImpl, server, 1000); _entityServiceImpl.setRetentionService(_retentionService); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index c45306e5f022bb..1e2cf4d4255d2e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -13,6 +13,7 @@ import com.linkedin.metadata.AspectGenerationUtils; import com.linkedin.metadata.Constants; import com.linkedin.metadata.EbeanTestUtils; +import com.linkedin.metadata.config.EbeanConfiguration; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; import com.linkedin.metadata.entity.ebean.EbeanRetentionService; @@ -63,7 +64,7 @@ public void setupTest() { Database server = EbeanTestUtils.createTestServer(EbeanEntityServiceTest.class.getSimpleName()); _mockProducer = mock(EventProducer.class); - _aspectDao = new EbeanAspectDao(server); + _aspectDao = new EbeanAspectDao(server, EbeanConfiguration.testDefault); _mockUpdateIndicesService = mock(UpdateIndicesService.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); @@ -75,7 +76,8 @@ public void setupTest() { _testEntityRegistry, false, _mockUpdateIndicesService, - preProcessHooks); + preProcessHooks, + true); _retentionService = new EbeanRetentionService(_entityServiceImpl, server, 1000); _entityServiceImpl.setRetentionService(_retentionService); } @@ -121,21 +123,21 @@ public void testIngestListLatestAspects() throws AssertionError { MCPUpsertBatchItem.builder() .urn(entityUrn1) .aspectName(aspectName) - .aspect(writeAspect1) + .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), MCPUpsertBatchItem.builder() .urn(entityUrn2) .aspectName(aspectName) - .aspect(writeAspect2) + .recordTemplate(writeAspect2) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), MCPUpsertBatchItem.builder() .urn(entityUrn3) .aspectName(aspectName) - .aspect(writeAspect3) + .recordTemplate(writeAspect3) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl)); @@ -190,21 +192,21 @@ public void testIngestListUrns() throws AssertionError { MCPUpsertBatchItem.builder() .urn(entityUrn1) .aspectName(aspectName) - .aspect(writeAspect1) + .recordTemplate(writeAspect1) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), MCPUpsertBatchItem.builder() .urn(entityUrn2) .aspectName(aspectName) - .aspect(writeAspect2) + .recordTemplate(writeAspect2) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl), MCPUpsertBatchItem.builder() .urn(entityUrn3) .aspectName(aspectName) - .aspect(writeAspect3) + .recordTemplate(writeAspect3) .systemMetadata(metadata1) .auditStamp(TEST_AUDIT_STAMP) .build(_entityServiceImpl)); @@ -311,6 +313,12 @@ public void multiThreadingTest() { Set> additions = actualAspectIds.stream() .filter(id -> !generatedAspectIds.contains(id)) + // Exclude default aspects + .filter( + id -> + !Set.of("browsePaths", "browsePathsV2", "dataPlatformInstance") + .contains(id.getMiddle())) + .filter(id -> !id.getMiddle().endsWith("Key")) .collect(Collectors.toSet()); assertEquals( additions.size(), 0, String.format("Expected no additional aspects. Found: %s", additions)); @@ -361,6 +369,12 @@ public void singleThreadingTest() { Set> additions = actualAspectIds.stream() .filter(id -> !generatedAspectIds.contains(id)) + // Exclude default aspects + .filter( + id -> + !Set.of("browsePaths", "browsePathsV2", "dataPlatformInstance") + .contains(id.getMiddle())) + .filter(id -> !id.getMiddle().endsWith("Key")) .collect(Collectors.toSet()); assertEquals( additions.size(), 0, String.format("Expected no additional aspects. Found: %s", additions)); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index db749f3575a064..ea4e97d264bca3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -108,6 +108,8 @@ public abstract class EntityServiceTest captor = ArgumentCaptor.forClass(MetadataChangeLog.class); - verify(_mockProducer, times(1)) - .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), captor.capture()); + ArgumentCaptor aspectSpecCaptor = ArgumentCaptor.forClass(AspectSpec.class); + verify(_mockProducer, times(4)) + .produceMetadataChangeLog( + Mockito.eq(entityUrn), aspectSpecCaptor.capture(), captor.capture()); assertEquals(UI_SOURCE, captor.getValue().getSystemMetadata().getProperties().get(APP_SOURCE)); + assertEquals( + aspectSpecCaptor.getAllValues().stream() + .map(AspectSpec::getName) + .collect(Collectors.toSet()), + Set.of( + "browsePathsV2", + "editableDatasetProperties", + // "browsePaths", + "dataPlatformInstance", + "datasetKey")); } @Test @@ -1673,12 +1688,17 @@ public void testStructuredPropertyIngestProposal() throws Exception { genericAspect.setContentType("application/json"); gmce.setAspect(genericAspect); _entityServiceImpl.ingestProposal(gmce, TEST_AUDIT_STAMP, false); + ArgumentCaptor captor = ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(1)) - .produceMetadataChangeLog(Mockito.eq(firstPropertyUrn), Mockito.any(), captor.capture()); + .produceMetadataChangeLog( + Mockito.eq(firstPropertyUrn), + Mockito.eq(structuredPropertiesDefinitionAspect), + captor.capture()); assertEquals( _entityServiceImpl.getAspect(firstPropertyUrn, definitionAspectName, 0), structuredPropertyDefinition); + Urn secondPropertyUrn = UrnUtils.getUrn("urn:li:structuredProperty:secondStructuredProperty"); assertNull(_entityServiceImpl.getAspect(secondPropertyUrn, definitionAspectName, 0)); assertEquals( @@ -1752,7 +1772,9 @@ public void testStructuredPropertyIngestProposal() throws Exception { ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(1)) .produceMetadataChangeLog( - Mockito.eq(secondPropertyUrn), Mockito.any(), secondCaptor.capture()); + Mockito.eq(secondPropertyUrn), + Mockito.eq(structuredPropertiesDefinitionAspect), + secondCaptor.capture()); assertEquals( _entityServiceImpl.getAspect(firstPropertyUrn, definitionAspectName, 0), structuredPropertyDefinition); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationCandidateSourceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationCandidateSourceTest.java index dcc59d06329544..2d60f3202b69f5 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationCandidateSourceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationCandidateSourceTest.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.recommendation.candidatesource; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.eq; import static org.testng.Assert.assertEquals; @@ -11,6 +12,7 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.TestEntityUrn; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.recommendation.RecommendationContent; import com.linkedin.metadata.recommendation.RecommendationParams; @@ -29,6 +31,7 @@ public class EntitySearchAggregationCandidateSourceTest { private EntitySearchService _entitySearchService = Mockito.mock(EntitySearchService.class); + private EntityRegistry entityRegistry = Mockito.mock(EntityRegistry.class); private EntitySearchAggregationSource _valueBasedCandidateSource; private EntitySearchAggregationSource _urnBasedCandidateSource; @@ -45,7 +48,7 @@ public void setup() { private EntitySearchAggregationSource buildCandidateSource( String identifier, boolean isValueUrn) { - return new EntitySearchAggregationSource(_entitySearchService) { + return new EntitySearchAggregationSource(_entitySearchService, entityRegistry) { @Override protected String getSearchFieldName() { return identifier; @@ -98,8 +101,7 @@ public void testWhenSearchServiceReturnsEmpty() { @Test public void testWhenSearchServiceReturnsValueResults() { // One result - Mockito.when( - _entitySearchService.aggregateByValue(eq(null), eq("testValue"), eq(null), anyInt())) + Mockito.when(_entitySearchService.aggregateByValue(any(), eq("testValue"), eq(null), anyInt())) .thenReturn(ImmutableMap.of("value1", 1L)); List candidates = _valueBasedCandidateSource.getRecommendations(USER, CONTEXT); @@ -120,8 +122,7 @@ public void testWhenSearchServiceReturnsValueResults() { assertTrue(_valueBasedCandidateSource.getRecommendationModule(USER, CONTEXT).isPresent()); // Multiple result - Mockito.when( - _entitySearchService.aggregateByValue(eq(null), eq("testValue"), eq(null), anyInt())) + Mockito.when(_entitySearchService.aggregateByValue(any(), eq("testValue"), eq(null), anyInt())) .thenReturn(ImmutableMap.of("value1", 1L, "value2", 2L, "value3", 3L)); candidates = _valueBasedCandidateSource.getRecommendations(USER, CONTEXT); assertEquals(candidates.size(), 2); @@ -160,7 +161,7 @@ public void testWhenSearchServiceReturnsUrnResults() { Urn testUrn1 = new TestEntityUrn("testUrn1", "testUrn1", "testUrn1"); Urn testUrn2 = new TestEntityUrn("testUrn2", "testUrn2", "testUrn2"); Urn testUrn3 = new TestEntityUrn("testUrn3", "testUrn3", "testUrn3"); - Mockito.when(_entitySearchService.aggregateByValue(eq(null), eq("testUrn"), eq(null), anyInt())) + Mockito.when(_entitySearchService.aggregateByValue(any(), eq("testUrn"), eq(null), anyInt())) .thenReturn(ImmutableMap.of(testUrn1.toString(), 1L)); List candidates = _urnBasedCandidateSource.getRecommendations(USER, CONTEXT); @@ -181,7 +182,7 @@ public void testWhenSearchServiceReturnsUrnResults() { assertTrue(_urnBasedCandidateSource.getRecommendationModule(USER, CONTEXT).isPresent()); // Multiple result - Mockito.when(_entitySearchService.aggregateByValue(eq(null), eq("testUrn"), eq(null), anyInt())) + Mockito.when(_entitySearchService.aggregateByValue(any(), eq("testUrn"), eq(null), anyInt())) .thenReturn( ImmutableMap.of( testUrn1.toString(), 1L, testUrn2.toString(), 2L, testUrn3.toString(), 3L)); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java index 9e8855622ced4b..ed4c9db5db6430 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AggregationQueryBuilderTest.java @@ -1,10 +1,13 @@ package com.linkedin.metadata.search.query.request; import static com.linkedin.metadata.utils.SearchUtil.*; +import static org.mockito.Mockito.mock; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; import com.linkedin.metadata.search.elasticsearch.query.request.AggregationQueryBuilder; import java.util.Collections; @@ -42,7 +45,8 @@ public void testGetDefaultAggregationsHasFields() { config.setMaxTermBucketSize(25); AggregationQueryBuilder builder = - new AggregationQueryBuilder(config, ImmutableList.of(annotation)); + new AggregationQueryBuilder( + config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation))); List aggs = builder.getAggregations(); @@ -73,7 +77,8 @@ public void testGetDefaultAggregationsFields() { config.setMaxTermBucketSize(25); AggregationQueryBuilder builder = - new AggregationQueryBuilder(config, ImmutableList.of(annotation)); + new AggregationQueryBuilder( + config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation))); List aggs = builder.getAggregations(); @@ -120,7 +125,9 @@ public void testGetSpecificAggregationsHasFields() { config.setMaxTermBucketSize(25); AggregationQueryBuilder builder = - new AggregationQueryBuilder(config, ImmutableList.of(annotation1, annotation2)); + new AggregationQueryBuilder( + config, + ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation1, annotation2))); // Case 1: Ask for fields that should exist. List aggs = @@ -139,7 +146,9 @@ public void testAggregateOverStructuredProperty() { SearchConfiguration config = new SearchConfiguration(); config.setMaxTermBucketSize(25); - AggregationQueryBuilder builder = new AggregationQueryBuilder(config, List.of()); + AggregationQueryBuilder builder = + new AggregationQueryBuilder( + config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of())); List aggs = builder.getAggregations(List.of("structuredProperties.ab.fgh.ten")); @@ -202,7 +211,9 @@ public void testAggregateOverFieldsAndStructProp() { config.setMaxTermBucketSize(25); AggregationQueryBuilder builder = - new AggregationQueryBuilder(config, ImmutableList.of(annotation1, annotation2)); + new AggregationQueryBuilder( + config, + ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation1, annotation2))); // Aggregate over fields and structured properties List aggs = @@ -252,7 +263,8 @@ public void testMissingAggregation() { config.setMaxTermBucketSize(25); AggregationQueryBuilder builder = - new AggregationQueryBuilder(config, ImmutableList.of(annotation)); + new AggregationQueryBuilder( + config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of(annotation))); List aggs = builder.getAggregations(); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/CassandraTimelineServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/CassandraTimelineServiceTest.java index 921fbac12df854..552cb0b52994f9 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeline/CassandraTimelineServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/CassandraTimelineServiceTest.java @@ -61,7 +61,8 @@ private void configureComponents() { _testEntityRegistry, true, _mockUpdateIndicesService, - preProcessHooks); + preProcessHooks, + true); } /** diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java index 4e47e596dddc26..5d7137a52eb21e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java @@ -3,6 +3,7 @@ import static org.mockito.Mockito.mock; import com.linkedin.metadata.EbeanTestUtils; +import com.linkedin.metadata.config.EbeanConfiguration; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; @@ -29,7 +30,7 @@ public EbeanTimelineServiceTest() throws EntityRegistryException {} public void setupTest() { Database server = EbeanTestUtils.createTestServer(EbeanTimelineServiceTest.class.getSimpleName()); - _aspectDao = new EbeanAspectDao(server); + _aspectDao = new EbeanAspectDao(server, EbeanConfiguration.testDefault); _aspectDao.setConnectionValidated(true); _entityTimelineService = new TimelineServiceImpl(_aspectDao, _testEntityRegistry); _mockProducer = mock(EventProducer.class); @@ -42,7 +43,8 @@ public void setupTest() { _testEntityRegistry, true, _mockUpdateIndicesService, - preProcessHooks); + preProcessHooks, + true); } /** diff --git a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java index 29c64abdc4d0d0..eb4c85209ce422 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java +++ b/metadata-io/src/test/java/io/datahubproject/test/DataGenerator.java @@ -1,5 +1,6 @@ package io.datahubproject.test; +import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.Mockito.mock; import com.linkedin.common.AuditStamp; @@ -10,15 +11,18 @@ import com.linkedin.common.urn.GlossaryTermUrn; import com.linkedin.common.urn.TagUrn; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; import com.linkedin.glossary.GlossaryTermInfo; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.batch.MCPBatchItem; +import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.AspectDao; -import com.linkedin.metadata.entity.AspectUtils; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; @@ -51,11 +55,17 @@ public class DataGenerator { private static final Faker FAKER = new Faker(); private final EntityRegistry entityRegistry; - private final EntityService entityService; + private final EntityService entityService; + private final boolean generateDefaultAspects; - public DataGenerator(EntityService entityService) { + public DataGenerator(EntityService entityService) { + this(entityService, false); + } + + public DataGenerator(EntityService entityService, Boolean generateDefaultAspects) { this.entityService = entityService; this.entityRegistry = entityService.getEntityRegistry(); + this.generateDefaultAspects = generateDefaultAspects != null ? generateDefaultAspects : false; } public static DataGenerator build(EntityRegistry entityRegistry) { @@ -66,7 +76,8 @@ public static DataGenerator build(EntityRegistry entityRegistry) { entityRegistry, false, mock(UpdateIndicesService.class), - mock(PreProcessHooks.class)); + mock(PreProcessHooks.class), + anyBoolean()); return new DataGenerator(mockEntityServiceImpl); } @@ -81,10 +92,15 @@ public List generateTags(long count) { public Stream> generateMCPs( String entityName, long count, List aspects) { EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); + AuditStamp auditStamp = + new AuditStamp() + .setActor(UrnUtils.getUrn(Constants.DATAHUB_ACTOR)) + .setTime(System.currentTimeMillis()); // Prevent duplicate tags and terms generated as secondary entities Set secondaryUrns = new HashSet<>(); + // Expand with default aspects per normal return LongStream.range(0, count) .mapToObj( idx -> { @@ -145,11 +161,22 @@ public Stream> generateMCPs( }) .map( mcp -> { - // Expand with default aspects per normal - return Stream.concat( - Stream.of(mcp), - AspectUtils.getAdditionalChanges(mcp, entityService, true).stream()) - .collect(Collectors.toList()); + if (generateDefaultAspects) { + // Expand with default aspects instead of relying on default generation + return Stream.concat( + Stream.of(mcp), + DefaultAspectsUtil.getAdditionalChanges( + AspectsBatchImpl.builder() + .mcps(List.of(mcp), auditStamp, entityService) + .build(), + entityService, + true) + .stream() + .map(MCPBatchItem::getMetadataChangeProposal)) + .collect(Collectors.toList()); + } else { + return List.of(mcp); + } }); } diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index 84433a2b439f41..b42cd89131f51f 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -296,7 +296,8 @@ private EntityClient entityClientHelper( PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); return new JavaEntityClient( - new EntityServiceImpl(mockAspectDao, null, entityRegistry, true, null, preProcessHooks), + new EntityServiceImpl( + mockAspectDao, null, entityRegistry, true, null, preProcessHooks, true), null, entitySearchService, cachingEntitySearchService, diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index 978471b53faada..07d27245222b9e 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -234,7 +234,7 @@ protected EntityClient entityClient( PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); return new JavaEntityClient( - new EntityServiceImpl(null, null, entityRegistry, true, null, preProcessHooks), + new EntityServiceImpl(null, null, entityRegistry, true, null, preProcessHooks, true), null, entitySearchService, cachingEntitySearchService, diff --git a/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/restli/RestliServletConfig.java b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/restli/RestliServletConfig.java index b41e6bc75af19c..269b9a41a89a9b 100644 --- a/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/restli/RestliServletConfig.java +++ b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/restli/RestliServletConfig.java @@ -3,6 +3,7 @@ import com.datahub.auth.authentication.filter.AuthenticationFilter; import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; import com.linkedin.restli.server.RestliHandlerServlet; +import java.util.Collections; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.web.servlet.FilterRegistrationBean; @@ -41,19 +42,19 @@ public RestliHandlerServlet restliHandlerServlet() { @Bean public FilterRegistrationBean authenticationFilterRegistrationBean( - @Qualifier("restliServletRegistration") - ServletRegistrationBean servlet) { + @Qualifier("restliServletRegistration") ServletRegistrationBean servlet, + AuthenticationFilter authenticationFilter) { FilterRegistrationBean registrationBean = new FilterRegistrationBean<>(); - registrationBean.addServletRegistrationBeans(servlet); + registrationBean.setServletRegistrationBeans(Collections.singletonList(servlet)); + registrationBean.setUrlPatterns(Collections.singletonList("/gms/*")); + registrationBean.setServletNames(Collections.singletonList(servlet.getServletName())); registrationBean.setOrder(1); + registrationBean.setFilter(authenticationFilter); return registrationBean; } @Bean - public AuthenticationFilter authenticationFilter( - FilterRegistrationBean filterReg) { - AuthenticationFilter filter = new AuthenticationFilter(); - filterReg.setFilter(filter); - return filter; + public AuthenticationFilter authenticationFilter() { + return new AuthenticationFilter(); } } diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java index d2041c443503ed..c21b64c8a4fc00 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java @@ -24,7 +24,7 @@ protected Stream applyMCPSideEffect( MCPUpsertBatchItem.builder() .urn(mirror) .aspectName(input.getAspectName()) - .aspect(input.getAspect()) + .recordTemplate(input.getRecordTemplate()) .auditStamp(input.getAuditStamp()) .systemMetadata(input.getSystemMetadata()) .build(aspectRetriever)); diff --git a/metadata-models/src/main/pegasus/com/linkedin/form/FormActorAssignment.pdl b/metadata-models/src/main/pegasus/com/linkedin/form/FormActorAssignment.pdl index e58eb4c7c56a81..f0ea0f4988298b 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/form/FormActorAssignment.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/form/FormActorAssignment.pdl @@ -7,15 +7,31 @@ record FormActorAssignment { * Whether the form should be assigned to the owners of assets that it is applied to. * This is the default. */ + @Searchable = { + "fieldName": "isOwnershipForm", + "fieldType": "BOOLEAN", + } owners: boolean = true /** * Optional: Specific set of groups that are targeted by this form assignment. */ + @Searchable = { + "/*": { + "fieldName": "assignedGroups", + "fieldType": "URN" + } + } groups: optional array[Urn] /** * Optional: Specific set of users that are targeted by this form assignment. */ + @Searchable = { + "/*": { + "fieldName": "assignedUsers", + "fieldType": "URN" + } + } users: optional array[Urn] } \ No newline at end of file diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/HealthStatusAuthenticator.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/HealthStatusAuthenticator.java index 65581f1d5b6352..017ab25bc7b7c3 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/HealthStatusAuthenticator.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/authenticator/HealthStatusAuthenticator.java @@ -24,7 +24,8 @@ */ @Slf4j public class HealthStatusAuthenticator implements Authenticator { - private static final Set HEALTH_ENDPOINTS = Set.of("/openapi/check/", "/openapi/up/"); + private static final Set HEALTH_ENDPOINTS = + Set.of("/openapi/check/", "/openapi/up/", "/actuator/health", "/health"); private String systemClientId; @Override diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java index e072a59ae77ffd..0d1da4a7687bae 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java @@ -10,7 +10,6 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.entity.AspectUtils; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.key.DataHubAccessTokenKey; @@ -20,12 +19,11 @@ import java.util.Base64; import java.util.Date; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; -import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @@ -146,15 +144,8 @@ public String generateAccessToken( final AuditStamp auditStamp = AuditStampUtils.createDefaultAuditStamp().setActor(UrnUtils.getUrn(actorUrn)); - Stream proposalStream = - Stream.concat( - Stream.of(proposal), - AspectUtils.getAdditionalChanges(proposal, _entityService).stream()); - _entityService.ingestProposal( - AspectsBatchImpl.builder() - .mcps(proposalStream.collect(Collectors.toList()), auditStamp, _entityService) - .build(), + AspectsBatchImpl.builder().mcps(List.of(proposal), auditStamp, _entityService).build(), false); return accessToken; diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EbeanConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EbeanConfiguration.java new file mode 100644 index 00000000000000..47b406e695a3fb --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EbeanConfiguration.java @@ -0,0 +1,46 @@ +package com.linkedin.metadata.config; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class EbeanConfiguration { + private String username; + private String password; + private String url; + private String driver; + private long minConnections; + private long maxConnections; + private long maxInactiveTimeSeconds; + private long maxAgeMinutes; + private long leakTimeMinutes; + private long waitTimeoutMillis; + private boolean autoCreateDdl; + private boolean postgresUseIamAuth; + private LockingConfiguration locking; + + public static final EbeanConfiguration testDefault = + EbeanConfiguration.builder().locking(LockingConfiguration.testDefault).build(); + + @Data + @Builder + @AllArgsConstructor + @NoArgsConstructor + public static class LockingConfiguration { + private boolean enabled; + private long durationSeconds; + private long maximumLocks; + + public static final LockingConfiguration testDefault = + LockingConfiguration.builder() + .enabled(true) + .durationSeconds(60) + .maximumLocks(10000) + .build(); + } +} diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml index a7222f2adc3c60..d4c11d4aa53bd0 100644 --- a/metadata-service/configuration/src/main/resources/application.yml +++ b/metadata-service/configuration/src/main/resources/application.yml @@ -141,6 +141,10 @@ ebean: waitTimeoutMillis: ${EBEAN_WAIT_TIMEOUT_MILLIS:1000} autoCreateDdl: ${EBEAN_AUTOCREATE:false} postgresUseIamAuth: ${EBEAN_POSTGRES_USE_AWS_IAM_AUTH:false} + locking: + enabled: ${EBEAN_LOCKING_ENABLED:true} + durationSeconds: ${EBEAN_LOCKING_DURATION_SECONDS:60} + maximumLocks: ${EBEAN_LOCKING_MAXIMUM_LOCKS:20000} # Only required if entityService.impl is cassandra cassandra: diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java index e969793fac1ef4..62bfcfa2cbf93d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java @@ -4,6 +4,7 @@ import com.datahub.authorization.AuthorizationConfiguration; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.metadata.config.DataHubConfiguration; +import com.linkedin.metadata.config.EbeanConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.SystemUpdateConfiguration; import com.linkedin.metadata.config.TestsConfiguration; @@ -71,4 +72,7 @@ public class ConfigurationProvider { /** Structured properties related configurations */ private StructuredPropertiesConfiguration structuredProperties; + + /** Ebean related configuration */ + private EbeanConfiguration ebean; } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityAspectDaoFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityAspectDaoFactory.java index 94aebb2a39efa3..22eced4fd5acf0 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityAspectDaoFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityAspectDaoFactory.java @@ -1,6 +1,7 @@ package com.linkedin.gms.factory.entity; import com.datastax.oss.driver.api.core.CqlSession; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.cassandra.CassandraAspectDao; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; @@ -18,8 +19,9 @@ public class EntityAspectDaoFactory { @DependsOn({"gmsEbeanServiceConfig"}) @ConditionalOnProperty(name = "entityService.impl", havingValue = "ebean", matchIfMissing = true) @Nonnull - protected AspectDao createEbeanInstance(Database server) { - return new EbeanAspectDao(server); + protected AspectDao createEbeanInstance( + Database server, final ConfigurationProvider configurationProvider) { + return new EbeanAspectDao(server, configurationProvider.getEbean()); } @Bean(name = "entityAspectDao") diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityAspectMigrationsDaoFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityAspectMigrationsDaoFactory.java index 9123714de5bc8d..2bf9804030b49c 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityAspectMigrationsDaoFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityAspectMigrationsDaoFactory.java @@ -1,6 +1,7 @@ package com.linkedin.gms.factory.entity; import com.datastax.oss.driver.api.core.CqlSession; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.entity.AspectMigrationsDao; import com.linkedin.metadata.entity.cassandra.CassandraAspectDao; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; @@ -18,8 +19,9 @@ public class EntityAspectMigrationsDaoFactory { @DependsOn({"gmsEbeanServiceConfig"}) @ConditionalOnProperty(name = "entityService.impl", havingValue = "ebean", matchIfMissing = true) @Nonnull - protected AspectMigrationsDao createEbeanInstance(Database server) { - return new EbeanAspectDao(server); + protected AspectMigrationsDao createEbeanInstance( + Database server, final ConfigurationProvider configurationProvider) { + return new EbeanAspectDao(server, configurationProvider.getEbean()); } @Bean(name = "entityAspectMigrationsDao") diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java index 5fd64b02d08a8d..871f16d97be33c 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java @@ -43,7 +43,8 @@ protected EntityService createInstance( @Qualifier("entityAspectDao") AspectDao aspectDao, EntityRegistry entityRegistry, ConfigurationProvider configurationProvider, - UpdateIndicesService updateIndicesService) { + UpdateIndicesService updateIndicesService, + @Value("${featureFlags.showBrowseV2}") final boolean enableBrowsePathV2) { final KafkaEventProducer eventProducer = new KafkaEventProducer(producer, convention, kafkaHealthChecker); @@ -56,6 +57,7 @@ protected EntityService createInstance( featureFlags.isAlwaysEmitChangeLog(), updateIndicesService, featureFlags.getPreProcessHooks(), - _ebeanMaxTransactionRetry); + _ebeanMaxTransactionRetry, + enableBrowsePathV2); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/DomainsCandidateSourceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/DomainsCandidateSourceFactory.java index fbfd80f85ff4d2..a7c2dde8b7d25e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/DomainsCandidateSourceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/DomainsCandidateSourceFactory.java @@ -1,6 +1,7 @@ package com.linkedin.gms.factory.recommendation.candidatesource; import com.linkedin.gms.factory.search.EntitySearchServiceFactory; +import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.recommendation.candidatesource.DomainsCandidateSource; import com.linkedin.metadata.search.EntitySearchService; import javax.annotation.Nonnull; @@ -20,7 +21,7 @@ public class DomainsCandidateSourceFactory { @Bean(name = "domainsCandidateSource") @Nonnull - protected DomainsCandidateSource getInstance() { - return new DomainsCandidateSource(entitySearchService); + protected DomainsCandidateSource getInstance(final EntityRegistry entityRegistry) { + return new DomainsCandidateSource(entitySearchService, entityRegistry); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/TopTagsCandidateSourceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/TopTagsCandidateSourceFactory.java index fe5c2d03d19071..bc2520c2b4617d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/TopTagsCandidateSourceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/TopTagsCandidateSourceFactory.java @@ -1,6 +1,7 @@ package com.linkedin.gms.factory.recommendation.candidatesource; import com.linkedin.gms.factory.search.EntitySearchServiceFactory; +import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.recommendation.candidatesource.TopTagsSource; import com.linkedin.metadata.search.EntitySearchService; import javax.annotation.Nonnull; @@ -20,7 +21,7 @@ public class TopTagsCandidateSourceFactory { @Bean(name = "topTagsCandidateSource") @Nonnull - protected TopTagsSource getInstance() { - return new TopTagsSource(entitySearchService); + protected TopTagsSource getInstance(final EntityService entityService) { + return new TopTagsSource(entitySearchService, entityService); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/TopTermsCandidateSourceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/TopTermsCandidateSourceFactory.java index 36c53936094ff5..c8ad276eb3d862 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/TopTermsCandidateSourceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/TopTermsCandidateSourceFactory.java @@ -1,6 +1,7 @@ package com.linkedin.gms.factory.recommendation.candidatesource; import com.linkedin.gms.factory.search.EntitySearchServiceFactory; +import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.recommendation.candidatesource.TopTermsSource; import com.linkedin.metadata.search.EntitySearchService; import javax.annotation.Nonnull; @@ -20,7 +21,7 @@ public class TopTermsCandidateSourceFactory { @Bean(name = "topTermsCandidateSource") @Nonnull - protected TopTermsSource getInstance() { - return new TopTermsSource(entitySearchService); + protected TopTermsSource getInstance(final EntityService entityService) { + return new TopTermsSource(entitySearchService, entityService); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java index 80e139dcd5c65b..49a86406c1ecd5 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java @@ -9,6 +9,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.boot.UpgradeStep; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.query.filter.Condition; @@ -128,7 +129,8 @@ private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, S } private void ingestBrowsePathsV2(Urn urn, AuditStamp auditStamp) throws Exception { - BrowsePathsV2 browsePathsV2 = _entityService.buildDefaultBrowsePathV2(urn, true); + BrowsePathsV2 browsePathsV2 = + DefaultAspectsUtil.buildDefaultBrowsePathV2(urn, true, _entityService); log.debug(String.format("Adding browse path v2 for urn %s with value %s", urn, browsePathsV2)); MetadataChangeProposal proposal = new MetadataChangeProposal(); proposal.setEntityUrn(urn); diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java index 716ae292338ed2..19efa5e9c4de20 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java @@ -79,7 +79,7 @@ public void execute() throws Exception { MCPUpsertBatchItem.builder() .urn(urn) .aspectName(DATA_PLATFORM_INSTANCE_ASPECT_NAME) - .aspect(dataPlatformInstance.get()) + .recordTemplate(dataPlatformInstance.get()) .auditStamp(aspectAuditStamp) .build(_entityService)); } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java index 89ed493e162ccf..d2bb61ad7ade5d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java @@ -86,7 +86,7 @@ public void execute() throws IOException, URISyntaxException { return MCPUpsertBatchItem.builder() .urn(urn) .aspectName(PLATFORM_ASPECT_NAME) - .aspect(info) + .recordTemplate(info) .auditStamp( new AuditStamp() .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataTypesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataTypesStep.java index 6f3a415b521e41..1487257d6d830e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataTypesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataTypesStep.java @@ -11,12 +11,12 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.boot.BootstrapStep; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; -import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeProposal; +import java.util.HashMap; +import java.util.Map; import java.util.Objects; +import java.util.Set; import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; import org.springframework.core.io.ClassPathResource; @@ -62,30 +62,29 @@ public void execute() throws Exception { log.info("Ingesting {} data types types", dataTypesObj.size()); int numIngested = 0; + + Map urnDataTypesMap = new HashMap<>(); for (final JsonNode roleObj : dataTypesObj) { final Urn urn = Urn.createFromString(roleObj.get("urn").asText()); - final DataTypeInfo info = - RecordUtils.toRecordTemplate(DataTypeInfo.class, roleObj.get("info").toString()); - log.info(String.format("Ingesting default data type with urn %s", urn)); - ingestDataType(urn, info); - numIngested++; + urnDataTypesMap.put(urn, roleObj); + } + + Set existingUrns = _entityService.exists(urnDataTypesMap.keySet()); + + for (final Map.Entry entry : urnDataTypesMap.entrySet()) { + if (!existingUrns.contains(entry.getKey())) { + final DataTypeInfo info = + RecordUtils.toRecordTemplate( + DataTypeInfo.class, entry.getValue().get("info").toString()); + log.info(String.format("Ingesting default data type with urn %s", entry.getKey())); + ingestDataType(entry.getKey(), info); + numIngested++; + } } log.info("Ingested {} new data types", numIngested); } private void ingestDataType(final Urn dataTypeUrn, final DataTypeInfo info) throws Exception { - // Write key - final MetadataChangeProposal keyAspectProposal = new MetadataChangeProposal(); - final AspectSpec keyAspectSpec = _entityService.getKeyAspectSpec(dataTypeUrn.getEntityType()); - GenericAspect keyAspect = - GenericRecordUtils.serializeAspect( - EntityKeyUtils.convertUrnToEntityKey(dataTypeUrn, keyAspectSpec)); - keyAspectProposal.setAspect(keyAspect); - keyAspectProposal.setAspectName(keyAspectSpec.getName()); - keyAspectProposal.setEntityType(DATA_TYPE_ENTITY_NAME); - keyAspectProposal.setChangeType(ChangeType.UPSERT); - keyAspectProposal.setEntityUrn(dataTypeUrn); - final MetadataChangeProposal proposal = new MetadataChangeProposal(); proposal.setEntityUrn(dataTypeUrn); proposal.setEntityType(DATA_TYPE_ENTITY_NAME); diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestEntityTypesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestEntityTypesStep.java index b2213eda71cae1..4067b4f34fb361 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestEntityTypesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestEntityTypesStep.java @@ -9,13 +9,14 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.boot.BootstrapStep; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; -import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.util.Pair; +import java.util.Map; import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; @@ -42,34 +43,39 @@ public void execute() throws Exception { log.info( "Ingesting {} entity types", _entityService.getEntityRegistry().getEntitySpecs().size()); int numIngested = 0; - for (final EntitySpec spec : _entityService.getEntityRegistry().getEntitySpecs().values()) { - final Urn entityTypeUrn = - UrnUtils.getUrn( - String.format("urn:li:entityType:%s.%s", DATAHUB_NAMESPACE, spec.getName())); - final EntityTypeInfo info = - new EntityTypeInfo() - .setDisplayName(spec.getName()) // TODO: Support display name in the entity registry. - .setQualifiedName(entityTypeUrn.getId()); - log.info(String.format("Ingesting entity type with urn %s", entityTypeUrn)); - ingestEntityType(entityTypeUrn, info); - numIngested++; + + Map urnEntitySpecMap = + _entityService.getEntityRegistry().getEntitySpecs().values().stream() + .map( + spec -> + Pair.of( + UrnUtils.getUrn( + String.format( + "urn:li:entityType:%s.%s", DATAHUB_NAMESPACE, spec.getName())), + spec)) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + + Set existingUrns = _entityService.exists(urnEntitySpecMap.keySet()); + + for (final Map.Entry entry : urnEntitySpecMap.entrySet()) { + if (!existingUrns.contains(entry.getKey())) { + final EntityTypeInfo info = + new EntityTypeInfo() + .setDisplayName( + entry + .getValue() + .getName()) // TODO: Support display name in the entity registry. + .setQualifiedName(entry.getKey().getId()); + log.info(String.format("Ingesting entity type with urn %s", entry.getKey())); + ingestEntityType(entry.getKey(), info); + numIngested++; + } } log.info("Ingested {} new entity types", numIngested); } private void ingestEntityType(final Urn entityTypeUrn, final EntityTypeInfo info) throws Exception { - // Write key - final MetadataChangeProposal keyAspectProposal = new MetadataChangeProposal(); - final AspectSpec keyAspectSpec = _entityService.getKeyAspectSpec(entityTypeUrn.getEntityType()); - GenericAspect keyAspect = - GenericRecordUtils.serializeAspect( - EntityKeyUtils.convertUrnToEntityKey(entityTypeUrn, keyAspectSpec)); - keyAspectProposal.setAspect(keyAspect); - keyAspectProposal.setAspectName(keyAspectSpec.getName()); - keyAspectProposal.setEntityType(ENTITY_TYPE_ENTITY_NAME); - keyAspectProposal.setChangeType(ChangeType.UPSERT); - keyAspectProposal.setEntityUrn(entityTypeUrn); final MetadataChangeProposal proposal = new MetadataChangeProposal(); proposal.setEntityUrn(entityTypeUrn); diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java index 3eedbb48aaecaf..f28e9ad4e9ed8c 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java @@ -9,6 +9,7 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.boot.UpgradeStep; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ListResult; @@ -126,7 +127,7 @@ private int getAndMigrateBrowsePaths(String entityType, int start, AuditStamp au } private void migrateBrowsePath(Urn urn, AuditStamp auditStamp) throws Exception { - BrowsePaths newPaths = _entityService.buildDefaultBrowsePath(urn); + BrowsePaths newPaths = DefaultAspectsUtil.buildDefaultBrowsePath(urn, _entityService); log.debug(String.format("Updating browse path for urn %s to value %s", urn, newPaths)); MetadataChangeProposal proposal = new MetadataChangeProposal(); proposal.setEntityUrn(urn); diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2StepTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2StepTest.java index 06571415620893..0858736e39021a 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2StepTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2StepTest.java @@ -1,12 +1,10 @@ package com.linkedin.metadata.boot.steps; import static com.linkedin.metadata.Constants.CONTAINER_ASPECT_NAME; +import static org.mockito.ArgumentMatchers.any; import com.google.common.collect.ImmutableList; import com.linkedin.common.AuditStamp; -import com.linkedin.common.BrowsePathEntry; -import com.linkedin.common.BrowsePathEntryArray; -import com.linkedin.common.BrowsePathsV2; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.entity.Aspect; @@ -93,9 +91,9 @@ public void testExecuteNoExistingBrowsePaths() throws Exception { Mockito.verify(mockSearchService, Mockito.times(9)) .scrollAcrossEntities( - Mockito.any(), + any(), Mockito.eq("*"), - Mockito.any(Filter.class), + any(Filter.class), Mockito.eq(null), Mockito.eq(null), Mockito.eq("5m"), @@ -104,8 +102,7 @@ public void testExecuteNoExistingBrowsePaths() throws Exception { // Verify that 11 aspects are ingested, 2 for the upgrade request / result, 9 for ingesting 1 of // each entity type Mockito.verify(mockService, Mockito.times(11)) - .ingestProposal( - Mockito.any(MetadataChangeProposal.class), Mockito.any(), Mockito.eq(false)); + .ingestProposal(any(MetadataChangeProposal.class), any(), Mockito.eq(false)); } @Test @@ -135,9 +132,7 @@ public void testDoesNotRunWhenAlreadyExecuted() throws Exception { Mockito.verify(mockService, Mockito.times(0)) .ingestProposal( - Mockito.any(MetadataChangeProposal.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + any(MetadataChangeProposal.class), any(AuditStamp.class), Mockito.anyBoolean()); } private EntityService initMockService() throws URISyntaxException { @@ -146,16 +141,9 @@ private EntityService initMockService() throws URISyntaxException { Mockito.when(mockService.getEntityRegistry()).thenReturn(registry); for (int i = 0; i < ENTITY_TYPES.size(); i++) { - Mockito.when( - mockService.buildDefaultBrowsePathV2( - Mockito.eq(ENTITY_URNS.get(i)), Mockito.eq(true))) - .thenReturn( - new BrowsePathsV2() - .setPath(new BrowsePathEntryArray(new BrowsePathEntry().setId("test")))); - Mockito.when( mockService.getEntityV2( - Mockito.any(), + any(), Mockito.eq(ENTITY_URNS.get(i)), Mockito.eq(Collections.singleton(CONTAINER_ASPECT_NAME)))) .thenReturn(null); @@ -172,7 +160,7 @@ private SearchService initMockSearchService() { mockSearchService.scrollAcrossEntities( Mockito.eq(ImmutableList.of(ENTITY_TYPES.get(i))), Mockito.eq("*"), - Mockito.any(Filter.class), + any(Filter.class), Mockito.eq(null), Mockito.eq(null), Mockito.eq("5m"), diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java index 1ac0f2f4f914a6..5617d7e9714b08 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java @@ -122,7 +122,7 @@ public void testExecuteWhenSomeEntitiesShouldReceiveDataPlatformInstance() throw item.getUrn().getEntityType().equals("chart") && item.getAspectName() .equals(DATA_PLATFORM_INSTANCE_ASPECT_NAME) - && ((MCPUpsertBatchItem) item).getAspect() + && ((MCPUpsertBatchItem) item).getRecordTemplate() instanceof DataPlatformInstance)), anyBoolean(), anyBoolean()); @@ -136,7 +136,7 @@ public void testExecuteWhenSomeEntitiesShouldReceiveDataPlatformInstance() throw item.getUrn().getEntityType().equals("chart") && item.getAspectName() .equals(DATA_PLATFORM_INSTANCE_ASPECT_NAME) - && ((MCPUpsertBatchItem) item).getAspect() + && ((MCPUpsertBatchItem) item).getRecordTemplate() instanceof DataPlatformInstance)), anyBoolean(), anyBoolean()); diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataTypesStepTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataTypesStepTest.java index 2bbd06c8a61a43..c5539b001e9e35 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataTypesStepTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataTypesStepTest.java @@ -13,6 +13,8 @@ import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; +import java.util.Collection; +import java.util.Set; import org.jetbrains.annotations.NotNull; import org.mockito.Mockito; import org.testng.Assert; @@ -51,14 +53,17 @@ public void testExecuteValidDataTypesNoExistingDataTypes() throws Exception { @Test public void testExecuteInvalidJson() throws Exception { final EntityService entityService = mock(EntityService.class); + when(entityService.exists(any(Collection.class))).thenAnswer(args -> Set.of()); final IngestDataTypesStep step = new IngestDataTypesStep(entityService, "./boot/test_data_types_invalid.json"); Assert.assertThrows(RuntimeException.class, step::execute); - // Verify no interactions - verifyNoInteractions(entityService); + verify(entityService, times(1)).exists(any()); + + // Verify no additional interactions + verifyNoMoreInteractions(entityService); } private static MetadataChangeProposal buildUpdateDataTypeProposal(final DataTypeInfo info) { diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStepTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStepTest.java index 024ad7b16a8447..605d9d1c5e5d89 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStepTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStepTest.java @@ -107,10 +107,6 @@ public void testExecuteFirstTime() throws Exception { final EntityService mockService = Mockito.mock(EntityService.class); final EntityRegistry registry = new TestEntityRegistry(); Mockito.when(mockService.getEntityRegistry()).thenReturn(registry); - Mockito.when(mockService.buildDefaultBrowsePath(Mockito.eq(testUrn1))) - .thenReturn(new BrowsePaths().setPaths(new StringArray(ImmutableList.of("/prod/kafka")))); - Mockito.when(mockService.buildDefaultBrowsePath(Mockito.eq(testUrn2))) - .thenReturn(new BrowsePaths().setPaths(new StringArray(ImmutableList.of("/prod/kafka")))); final Urn upgradeEntityUrn = Urn.createFromString(UPGRADE_URN); Mockito.when( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java index a7e88966e4f874..13d2e501abf09f 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java @@ -26,7 +26,6 @@ import com.linkedin.entity.Aspect; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.batch.AspectsBatch; -import com.linkedin.metadata.entity.AspectUtils; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.RollbackRunResult; @@ -452,20 +451,12 @@ public static Pair ingestProposal( .setTime(System.currentTimeMillis()) .setActor(UrnUtils.getUrn(actorUrn)); - final List additionalChanges = - AspectUtils.getAdditionalChanges(serviceProposal, entityService); - log.info("Proposal: {}", serviceProposal); Throwable exceptionally = null; try { - Stream proposalStream = - Stream.concat( - Stream.of(serviceProposal), - AspectUtils.getAdditionalChanges(serviceProposal, entityService).stream()); - AspectsBatch batch = AspectsBatchImpl.builder() - .mcps(proposalStream.collect(Collectors.toList()), auditStamp, entityService) + .mcps(List.of(serviceProposal), auditStamp, entityService) .build(); Set proposalResult = entityService.ingestProposal(batch, async); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index 503330fdc8a2e5..44202c20ca6db7 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -481,7 +481,7 @@ private UpsertItem toUpsertItem( .urn(entityUrn) .aspectName(aspectSpec.getName()) .auditStamp(AuditStampUtils.createAuditStamp(actor.toUrnStr())) - .aspect( + .recordTemplate( GenericRecordUtils.deserializeAspect( ByteString.copyString(jsonAspect, StandardCharsets.UTF_8), GenericRecordUtils.JSON, diff --git a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java index 17be5a60816d30..20862bbc7f000d 100644 --- a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java @@ -11,6 +11,7 @@ import com.datahub.authorization.AuthorizationResult; import com.datahub.authorization.AuthorizerChain; import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.UpdateAspectResult; @@ -67,13 +68,15 @@ public void setup() IllegalAccessException { EntityRegistry mockEntityRegistry = new MockEntityRegistry(); AspectDao aspectDao = Mockito.mock(AspectDao.class); - Mockito.when( - aspectDao.runInTransactionWithRetry( - ArgumentMatchers.>any(), any(), anyInt())) + when(aspectDao.runInTransactionWithRetry( + ArgumentMatchers.>>any(), + any(AspectsBatch.class), + anyInt())) .thenAnswer( i -> - ((Function) i.getArgument(0)) - .apply(Mockito.mock(Transaction.class))); + List.of( + ((Function>) i.getArgument(0)) + .apply(Mockito.mock(Transaction.class)))); EventProducer mockEntityEventProducer = Mockito.mock(EventProducer.class); UpdateIndicesService mockUpdateIndicesService = mock(UpdateIndicesService.class); diff --git a/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java b/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java index b082a735e8084b..be5f99bed8e630 100644 --- a/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java +++ b/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java @@ -59,7 +59,7 @@ public MockEntityService( @Nonnull EntityRegistry entityRegistry, @Nonnull UpdateIndicesService updateIndicesService, PreProcessHooks preProcessHooks) { - super(aspectDao, producer, entityRegistry, true, updateIndicesService, preProcessHooks); + super(aspectDao, producer, entityRegistry, true, updateIndicesService, preProcessHooks, true); } @Override diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index ffa3abe6806f99..21a9f47a13f738 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -248,20 +248,9 @@ public Task ingestProposal( return RestliUtil.toTask(() -> { log.debug("Proposal: {}", metadataChangeProposal); try { - final AspectsBatch batch; - if (asyncBool) { - // if async we'll expand the getAdditionalChanges later, no need to do this early - batch = AspectsBatchImpl.builder() - .mcps(List.of(metadataChangeProposal), auditStamp, _entityService) - .build(); - } else { - Stream proposalStream = Stream.concat(Stream.of(metadataChangeProposal), - AspectUtils.getAdditionalChanges(metadataChangeProposal, _entityService).stream()); - - batch = AspectsBatchImpl.builder() - .mcps(proposalStream.collect(Collectors.toList()), auditStamp, _entityService) - .build(); - } + final AspectsBatch batch = AspectsBatchImpl.builder() + .mcps(List.of(metadataChangeProposal), auditStamp, _entityService) + .build(); Set results = _entityService.ingestProposal(batch, asyncBool); diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java index d6130e05b77bd3..1678fe92ec70e3 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java +++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java @@ -53,7 +53,7 @@ public void setup() { _updateIndicesService = mock(UpdateIndicesService.class); _preProcessHooks = mock(PreProcessHooks.class); _entityService = new EntityServiceImpl(_aspectDao, _producer, _entityRegistry, false, - _updateIndicesService, _preProcessHooks); + _updateIndicesService, _preProcessHooks, true); _authorizer = mock(Authorizer.class); _aspectResource.setAuthorizer(_authorizer); _aspectResource.setEntityService(_entityService); @@ -84,13 +84,13 @@ public void testAsyncDefaultAspects() throws URISyntaxException { MCPUpsertBatchItem req = MCPUpsertBatchItem.builder() .urn(urn) .aspectName(mcp.getAspectName()) - .aspect(mcp.getAspect()) + .recordTemplate(mcp.getAspect()) .auditStamp(new AuditStamp()) .metadataChangeProposal(mcp) .build(_entityService); when(_aspectDao.runInTransactionWithRetry(any(), any(), anyInt())) .thenReturn( - List.of( + List.of(List.of( UpdateAspectResult.builder() .urn(urn) .newValue(new DatasetProperties().setName("name1")) @@ -120,7 +120,7 @@ public void testAsyncDefaultAspects() throws URISyntaxException { .newValue(new DatasetProperties().setName("name5")) .auditStamp(new AuditStamp()) .request(req) - .build())); + .build()))); _aspectResource.ingestProposal(mcp, "false"); verify(_producer, times(5)) .produceMetadataChangeLog(eq(urn), any(AspectSpec.class), any(MetadataChangeLog.class)); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/AspectUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/AspectUtils.java index 2c1596474fb21e..55373730e7b673 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/AspectUtils.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/AspectUtils.java @@ -1,7 +1,6 @@ package com.linkedin.metadata.entity; import com.datahub.authentication.Authentication; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; @@ -12,18 +11,12 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; -import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.MetadataChangeProposal; -import java.util.Collections; import java.util.HashMap; -import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Set; -import java.util.stream.Collectors; import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; import org.joda.time.DateTimeUtils; @@ -33,66 +26,6 @@ public class AspectUtils { private AspectUtils() {} - public static final Set SUPPORTED_TYPES = - Set.of(ChangeType.UPSERT, ChangeType.CREATE, ChangeType.PATCH); - - public static List getAdditionalChanges( - @Nonnull MetadataChangeProposal metadataChangeProposal, - @Nonnull EntityService entityService, - boolean onPrimaryKeyInsertOnly) { - - // No additional changes for unsupported operations - if (!SUPPORTED_TYPES.contains(metadataChangeProposal.getChangeType())) { - return Collections.emptyList(); - } - - final Urn urn = - EntityKeyUtils.getUrnFromProposal( - metadataChangeProposal, - entityService.getKeyAspectSpec(metadataChangeProposal.getEntityType())); - - final Map includedAspects; - if (metadataChangeProposal.getChangeType() != ChangeType.PATCH) { - RecordTemplate aspectRecord = - GenericRecordUtils.deserializeAspect( - metadataChangeProposal.getAspect().getValue(), - metadataChangeProposal.getAspect().getContentType(), - entityService - .getEntityRegistry() - .getEntitySpec(urn.getEntityType()) - .getAspectSpec(metadataChangeProposal.getAspectName())); - includedAspects = ImmutableMap.of(metadataChangeProposal.getAspectName(), aspectRecord); - } else { - includedAspects = ImmutableMap.of(); - } - - if (onPrimaryKeyInsertOnly) { - return entityService - .generateDefaultAspectsOnFirstWrite(urn, includedAspects) - .getValue() - .stream() - .map( - entry -> - getProposalFromAspect(entry.getKey(), entry.getValue(), metadataChangeProposal)) - .filter(Objects::nonNull) - .collect(Collectors.toList()); - } else { - return entityService.generateDefaultAspectsIfMissing(urn, includedAspects).stream() - .map( - entry -> - getProposalFromAspect(entry.getKey(), entry.getValue(), metadataChangeProposal)) - .filter(Objects::nonNull) - .collect(Collectors.toList()); - } - } - - public static List getAdditionalChanges( - @Nonnull MetadataChangeProposal metadataChangeProposal, - @Nonnull EntityService entityService) { - - return getAdditionalChanges(metadataChangeProposal, entityService, false); - } - public static Map batchGetLatestAspect( String entity, Set urns, @@ -112,40 +45,6 @@ public static Map batchGetLatestAspect( return finalResult; } - private static MetadataChangeProposal getProposalFromAspect( - String aspectName, RecordTemplate aspect, MetadataChangeProposal original) { - MetadataChangeProposal proposal = new MetadataChangeProposal(); - GenericAspect genericAspect = GenericRecordUtils.serializeAspect(aspect); - // Set net new fields - proposal.setAspect(genericAspect); - proposal.setAspectName(aspectName); - - // Set fields determined from original - // Additional changes should never be set as PATCH, if a PATCH is coming across it should be an - // UPSERT - proposal.setChangeType(original.getChangeType()); - if (ChangeType.PATCH.equals(proposal.getChangeType())) { - proposal.setChangeType(ChangeType.UPSERT); - } - - if (original.getSystemMetadata() != null) { - proposal.setSystemMetadata(original.getSystemMetadata()); - } - if (original.getEntityUrn() != null) { - proposal.setEntityUrn(original.getEntityUrn()); - } - if (original.getEntityKeyAspect() != null) { - proposal.setEntityKeyAspect(original.getEntityKeyAspect()); - } - if (original.getAuditHeader() != null) { - proposal.setAuditHeader(original.getAuditHeader()); - } - - proposal.setEntityType(original.getEntityType()); - - return proposal; - } - public static MetadataChangeProposal buildMetadataChangeProposal( @Nonnull Urn urn, @Nonnull String aspectName, @Nonnull RecordTemplate aspect) { final MetadataChangeProposal proposal = new MetadataChangeProposal(); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index 94ab69e895920f..d9b0f4b73d5805 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -1,8 +1,6 @@ package com.linkedin.metadata.entity; import com.linkedin.common.AuditStamp; -import com.linkedin.common.BrowsePaths; -import com.linkedin.common.BrowsePathsV2; import com.linkedin.common.VersionedUrn; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; @@ -255,34 +253,6 @@ Optional getAspectSpec( String getKeyAspectName(@Nonnull final Urn urn); - /** - * Generate default aspects if not present in the database. - * - * @param urn entity urn - * @param includedAspects aspects being written - * @return additional aspects to be written - */ - List> generateDefaultAspectsIfMissing( - @Nonnull final Urn urn, Map includedAspects); - - /** - * Generate default aspects if the entity key aspect is NOT in the database **AND** the key aspect - * is being written, present in `includedAspects`. - * - *

Does not automatically create key aspects. - * - * @see EntityService#generateDefaultAspectsIfMissing if key aspects need autogeneration - *

This version is more efficient in that it only generates additional writes when a new - * entity is being minted for the first time. The drawback is that it will not automatically - * add key aspects, in case the producer is not bothering to ensure that the entity exists - * before writing non-key aspects. - * @param urn entity urn - * @param includedAspects aspects being written - * @return whether key aspect exists in database and the additional aspects to be written - */ - Pair>> generateDefaultAspectsOnFirstWrite( - @Nonnull final Urn urn, Map includedAspects); - AspectSpec getKeyAspectSpec(@Nonnull final String entityName); Set getEntityAspectNames(final String entityName); @@ -339,17 +309,5 @@ default boolean exists(@Nonnull Urn urn, boolean includeSoftDelete) { void setWritable(boolean canWrite); - BrowsePaths buildDefaultBrowsePath(final @Nonnull Urn urn) throws URISyntaxException; - - /** - * Builds the default browse path V2 aspects for all entities. - * - *

This method currently supports datasets, charts, dashboards, and data jobs best. Everything - * else will have a basic "Default" folder added to their browsePathV2. - */ - @Nonnull - BrowsePathsV2 buildDefaultBrowsePathV2(final @Nonnull Urn urn, boolean useContainerPaths) - throws URISyntaxException; - RecordTemplate getLatestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName); } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/DomainsCandidateSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/DomainsCandidateSource.java index 9392f50b4749eb..e34fa8ff1bde57 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/DomainsCandidateSource.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/DomainsCandidateSource.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.recommendation.candidatesource; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.recommendation.RecommendationRenderType; import com.linkedin.metadata.recommendation.RecommendationRequestContext; import com.linkedin.metadata.recommendation.ScenarioType; @@ -13,8 +14,9 @@ public class DomainsCandidateSource extends EntitySearchAggregationSource { private static final String DOMAINS = "domains"; - public DomainsCandidateSource(EntitySearchService entitySearchService) { - super(entitySearchService); + public DomainsCandidateSource( + EntitySearchService entitySearchService, EntityRegistry entityRegistry) { + super(entitySearchService, entityRegistry); } @Override diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java index a19909576d25ba..8d6ccb22660fb2 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java @@ -2,6 +2,8 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.recommendation.ContentParams; @@ -10,6 +12,7 @@ import com.linkedin.metadata.recommendation.RecommendationRequestContext; import com.linkedin.metadata.recommendation.SearchParams; import com.linkedin.metadata.search.EntitySearchService; +import com.linkedin.metadata.search.utils.QueryUtils; import io.opentelemetry.extension.annotations.WithSpan; import java.net.URISyntaxException; import java.util.Collections; @@ -35,7 +38,8 @@ @Slf4j @RequiredArgsConstructor public abstract class EntitySearchAggregationSource implements RecommendationSource { - private final EntitySearchService _entitySearchService; + private final EntitySearchService entitySearchService; + private final EntityRegistry entityRegistry; /** Field to aggregate on */ protected abstract String getSearchFieldName(); @@ -69,8 +73,8 @@ protected boolean isValidCandidate(T candidate) { public List getRecommendations( @Nonnull Urn userUrn, @Nullable RecommendationRequestContext requestContext) { Map aggregationResult = - _entitySearchService.aggregateByValue( - getEntityNames(), getSearchFieldName(), null, getMaxContent()); + entitySearchService.aggregateByValue( + getEntityNames(entityRegistry), getSearchFieldName(), null, getMaxContent()); if (aggregationResult.isEmpty()) { return Collections.emptyList(); @@ -110,9 +114,11 @@ public List getRecommendations( .collect(Collectors.toList()); } - protected List getEntityNames() { + protected List getEntityNames(EntityRegistry entityRegistry) { // By default, no list is applied which means searching across entities. - return null; + return QueryUtils.getQueryByDefaultEntitySpecs(entityRegistry).stream() + .map(EntitySpec::getName) + .collect(Collectors.toList()); } // Get top K entries with the most count diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java index 3012e35baa607a..aecd9bbbf769c3 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java @@ -37,11 +37,12 @@ public class TopPlatformsSource extends EntitySearchAggregationSource { Constants.CONTAINER_ENTITY_NAME, Constants.NOTEBOOK_ENTITY_NAME); - private final EntityService _entityService; + private final EntityService _entityService; private static final String PLATFORM = "platform"; - public TopPlatformsSource(EntityService entityService, EntitySearchService entitySearchService) { - super(entitySearchService); + public TopPlatformsSource( + EntityService entityService, EntitySearchService entitySearchService) { + super(entitySearchService, entityService.getEntityRegistry()); _entityService = entityService; } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTagsSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTagsSource.java index 317f956e1ca8ab..0897d441335fac 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTagsSource.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTagsSource.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.recommendation.candidatesource; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.recommendation.RecommendationRenderType; import com.linkedin.metadata.recommendation.RecommendationRequestContext; import com.linkedin.metadata.recommendation.ScenarioType; @@ -13,8 +14,8 @@ public class TopTagsSource extends EntitySearchAggregationSource { private static final String TAGS = "tags"; - public TopTagsSource(EntitySearchService entitySearchService) { - super(entitySearchService); + public TopTagsSource(EntitySearchService entitySearchService, EntityService entityService) { + super(entitySearchService, entityService.getEntityRegistry()); } @Override diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTermsSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTermsSource.java index 6cdb5fdb659113..0fab9a28b51ea4 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTermsSource.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTermsSource.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.recommendation.candidatesource; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.recommendation.RecommendationRenderType; import com.linkedin.metadata.recommendation.RecommendationRequestContext; import com.linkedin.metadata.recommendation.ScenarioType; @@ -13,8 +14,8 @@ public class TopTermsSource extends EntitySearchAggregationSource { private static final String TERMS = "glossaryTerms"; - public TopTermsSource(EntitySearchService entitySearchService) { - super(entitySearchService); + public TopTermsSource(EntitySearchService entitySearchService, EntityService entityService) { + super(entitySearchService, entityService.getEntityRegistry()); } @Override diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java index 842cc51e117775..a148a45b20e0c7 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java @@ -7,6 +7,10 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.StringArray; import com.linkedin.metadata.aspect.AspectVersion; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.SearchableFieldSpec; +import com.linkedin.metadata.models.annotation.SearchableAnnotation; +import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -15,6 +19,7 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.query.filter.RelationshipFilter; +import com.linkedin.util.Pair; import java.util.Collections; import java.util.List; import java.util.Map; @@ -174,4 +179,20 @@ public static Filter getFilterFromCriteria(List criteria) { new ConjunctiveCriterionArray( new ConjunctiveCriterion().setAnd(new CriterionArray(criteria)))); } + + public static List getQueryByDefaultEntitySpecs(EntityRegistry entityRegistry) { + return entityRegistry.getEntitySpecs().values().stream() + .map( + spec -> + Pair.of( + spec, + spec.getSearchableFieldSpecs().stream() + .map(SearchableFieldSpec::getSearchableAnnotation) + .collect(Collectors.toList()))) + .filter( + specPair -> + specPair.getSecond().stream().anyMatch(SearchableAnnotation::isQueryByDefault)) + .map(Pair::getFirst) + .collect(Collectors.toList()); + } } diff --git a/metadata-service/war/src/main/resources/logback.xml b/metadata-service/war/src/main/resources/logback.xml index 111ec627a6d430..2594f8c48ab535 100644 --- a/metadata-service/war/src/main/resources/logback.xml +++ b/metadata-service/war/src/main/resources/logback.xml @@ -47,7 +47,7 @@ 1 - DEBUG + ${logging.appender.debug_file.level:-DEBUG} ACCEPT DENY diff --git a/smoke-test/.gitignore b/smoke-test/.gitignore index 44d3f620a19372..b8af2eef535a0b 100644 --- a/smoke-test/.gitignore +++ b/smoke-test/.gitignore @@ -127,6 +127,7 @@ venv.bak/ .mypy_cache/ .dmypy.json dmypy.json +.ruff_cache/ # Pyre type checker .pyre/ diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle index 1614a4b8527dc9..a6f3cd793ddd63 100644 --- a/smoke-test/build.gradle +++ b/smoke-test/build.gradle @@ -1,5 +1,10 @@ apply plugin: 'com.github.node-gradle.node' +ext { + python_executable = 'python3' + venv_name = 'venv' +} + node { // If true, it will download node using above parameters. @@ -38,4 +43,32 @@ task yarnInstall(type: YarnTask) { println "Root directory: ${project.rootDir}"; environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] args = ['install', '--cwd', "${project.rootDir}/smoke-test/tests/cypress"] -} \ No newline at end of file +} + +task installDev(type: Exec) { + inputs.file file('pyproject.toml') + inputs.file file('requirements.txt') + outputs.file("${venv_name}/.build_install_dev_sentinel") + commandLine 'bash', '-x', '-c', + "${python_executable} -m venv ${venv_name} && " + + "${venv_name}/bin/pip install --upgrade pip wheel setuptools && " + + "${venv_name}/bin/pip install -r requirements.txt && " + + "touch ${venv_name}/.build_install_dev_sentinel" +} + +task lint(type: Exec, dependsOn: installDev) { + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "black --check --diff tests/ && " + + "isort --check --diff tests/ && " + + "ruff --statistics tests/ && " + + "mypy tests/" +} +task lintFix(type: Exec, dependsOn: installDev) { + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "black tests/ && " + + "isort tests/ && " + + "ruff --fix tests/ && " + + "mypy tests/" +} diff --git a/smoke-test/pyproject.toml b/smoke-test/pyproject.toml new file mode 100644 index 00000000000000..c7745d0e9a3640 --- /dev/null +++ b/smoke-test/pyproject.toml @@ -0,0 +1,46 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "smoke-test" +version = "0.0.0" +description = "" +authors = [ + { name="Acryl Data", email="eng@acryl.io" }, +] +requires-python = ">=3.9" + + +[tool.black] +extend-exclude = ''' +# A regex preceded with ^/ will apply only to files and directories +# in the root of the project. +tmp +venv +''' +include = '\.pyi?$' +target-version = ['py310'] + +[tool.isort] +profile = 'black' + +[tool.ruff] +ignore = [ + 'E501', # Ignore line length, since black handles that. + 'D203', # Ignore 1 blank line required before class docstring. +] + +[tool.mypy] +exclude = "^(venv/|build/|dist/)" +ignore_missing_imports = true +namespace_packages = false +check_untyped_defs = true +disallow_untyped_decorators = true +warn_unused_configs = true +# eventually we'd like to enable these +disallow_incomplete_defs = false +disallow_untyped_defs = false + +[tool.pyright] +extraPaths = ['tests'] diff --git a/smoke-test/requirements.txt b/smoke-test/requirements.txt index e37de9caddc696..c5d43163dff5d5 100644 --- a/smoke-test/requirements.txt +++ b/smoke-test/requirements.txt @@ -7,4 +7,12 @@ slack-sdk==3.18.1 aiohttp joblib pytest-xdist -networkx \ No newline at end of file +networkx +# libaries for linting below this +black==23.7.0 +isort==5.12.0 +mypy==1.5.1 +ruff==0.0.287 +# stub version are copied from metadata-ingestion/setup.py and that should be the source of truth +types-requests>=2.28.11.6,<=2.31.0.3 +types-PyYAML diff --git a/smoke-test/run-quickstart.sh b/smoke-test/run-quickstart.sh index cd747321ad6023..05c321566d54a6 100755 --- a/smoke-test/run-quickstart.sh +++ b/smoke-test/run-quickstart.sh @@ -4,10 +4,8 @@ set -euxo pipefail DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" cd "$DIR" -python3 -m venv venv +../gradlew :smoke-test:installDev source venv/bin/activate -pip install --upgrade pip wheel setuptools -pip install -r requirements.txt mkdir -p ~/.datahub/plugins/frontend/auth/ echo "test_user:test_pass" >> ~/.datahub/plugins/frontend/auth/user.props diff --git a/smoke-test/tests/aspect_generators/timeseries/dataset_profile_gen.py b/smoke-test/tests/aspect_generators/timeseries/dataset_profile_gen.py index bc22b74ed185cd..f808e7a58a3292 100644 --- a/smoke-test/tests/aspect_generators/timeseries/dataset_profile_gen.py +++ b/smoke-test/tests/aspect_generators/timeseries/dataset_profile_gen.py @@ -1,8 +1,10 @@ from typing import Iterable -from datahub.metadata.schema_classes import (DatasetFieldProfileClass, - DatasetProfileClass, - TimeWindowSizeClass) +from datahub.metadata.schema_classes import ( + DatasetFieldProfileClass, + DatasetProfileClass, + TimeWindowSizeClass, +) from tests.utils import get_timestampmillis_at_start_of_day diff --git a/smoke-test/tests/assertions/assertions_test.py b/smoke-test/tests/assertions/assertions_test.py index 48f3564e6cd971..78ba68a840f0d3 100644 --- a/smoke-test/tests/assertions/assertions_test.py +++ b/smoke-test/tests/assertions/assertions_test.py @@ -7,24 +7,30 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext, RecordEnvelope from datahub.ingestion.api.sink import NoopWriteCallback -from datahub.ingestion.sink.file import FileSink, FileSinkConfig -from datahub.metadata.com.linkedin.pegasus2avro.assertion import \ - AssertionStdAggregation -from datahub.metadata.schema_classes import (AssertionInfoClass, - AssertionResultClass, - AssertionResultTypeClass, - AssertionRunEventClass, - AssertionRunStatusClass, - AssertionStdOperatorClass, - AssertionTypeClass, - DatasetAssertionInfoClass, - DatasetAssertionScopeClass, - PartitionSpecClass, - PartitionTypeClass) +from datahub.ingestion.sink.file import FileSink +from datahub.metadata.com.linkedin.pegasus2avro.assertion import AssertionStdAggregation +from datahub.metadata.schema_classes import ( + AssertionInfoClass, + AssertionResultClass, + AssertionResultTypeClass, + AssertionRunEventClass, + AssertionRunStatusClass, + AssertionStdOperatorClass, + AssertionTypeClass, + DatasetAssertionInfoClass, + DatasetAssertionScopeClass, + PartitionSpecClass, + PartitionTypeClass, +) import requests_wrapper as requests -from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info, - ingest_file_via_rest, wait_for_healthcheck_util) +from tests.utils import ( + delete_urns_from_file, + get_gms_url, + get_sleep_info, + ingest_file_via_rest, + wait_for_healthcheck_util, +) restli_default_headers = { "X-RestLi-Protocol-Version": "2.0.0", @@ -210,7 +216,7 @@ def create_test_data(test_file): ) fileSink: FileSink = FileSink.create( - FileSinkConfig(filename=test_file), ctx=PipelineContext(run_id="test-file") + {"filename": test_file}, ctx=PipelineContext(run_id="test-file") ) for mcp in [mcp1, mcp2, mcp3, mcp4, mcp5, mcp6, mcp7]: fileSink.write_record_async( diff --git a/smoke-test/tests/browse/browse_test.py b/smoke-test/tests/browse/browse_test.py index 550f0062d5a398..adeb6775a150d7 100644 --- a/smoke-test/tests/browse/browse_test.py +++ b/smoke-test/tests/browse/browse_test.py @@ -1,10 +1,6 @@ -import time - import pytest -import requests_wrapper as requests -from tests.utils import (delete_urns_from_file, get_frontend_url, - ingest_file_via_rest) +from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest TEST_DATASET_1_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-1,PROD)" TEST_DATASET_2_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-2,PROD)" @@ -29,7 +25,6 @@ def test_healthchecks(wait_for_healthchecks): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_get_browse_paths(frontend_session, ingest_cleanup_data): - # Iterate through each browse path, starting with the root get_browse_paths_query = """query browse($input: BrowseInput!) {\n diff --git a/smoke-test/tests/cli/datahub-cli.py b/smoke-test/tests/cli/datahub_cli.py similarity index 78% rename from smoke-test/tests/cli/datahub-cli.py rename to smoke-test/tests/cli/datahub_cli.py index c3db6028efceb8..d1620d03c88b24 100644 --- a/smoke-test/tests/cli/datahub-cli.py +++ b/smoke-test/tests/cli/datahub_cli.py @@ -1,10 +1,7 @@ import json -from time import sleep import pytest -from datahub.cli.cli_utils import (get_aspects_for_entity, guess_entity_type, - post_entity) -from datahub.cli.ingest_cli import get_session_and_host, rollback +from datahub.cli.cli_utils import get_aspects_for_entity, get_session_and_host from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync @@ -27,8 +24,8 @@ def test_setup(): session, gms_host = get_session_and_host() - assert "browsePaths" not in get_aspects_for_entity( - entity_urn=dataset_urn, aspects=["browsePaths"], typed=False + assert "browsePathsV2" not in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["browsePathsV2"], typed=False ) assert "editableDatasetProperties" not in get_aspects_for_entity( entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False @@ -39,8 +36,8 @@ def test_setup(): ).config.run_id print("Setup ingestion id: " + ingested_dataset_run_id) - assert "browsePaths" in get_aspects_for_entity( - entity_urn=dataset_urn, aspects=["browsePaths"], typed=False + assert "browsePathsV2" in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["browsePathsV2"], typed=False ) yield @@ -61,8 +58,8 @@ def test_setup(): ), ) - assert "browsePaths" not in get_aspects_for_entity( - entity_urn=dataset_urn, aspects=["browsePaths"], typed=False + assert "browsePathsV2" not in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["browsePathsV2"], typed=False ) assert "editableDatasetProperties" not in get_aspects_for_entity( entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False @@ -82,8 +79,8 @@ def test_rollback_editable(): print("Ingested dataset id:", ingested_dataset_run_id) # Assert that second data ingestion worked - assert "browsePaths" in get_aspects_for_entity( - entity_urn=dataset_urn, aspects=["browsePaths"], typed=False + assert "browsePathsV2" in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["browsePathsV2"], typed=False ) # Make editable change @@ -114,6 +111,6 @@ def test_rollback_editable(): entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False ) # But first ingestion aspects should not be present - assert "browsePaths" not in get_aspects_for_entity( - entity_urn=dataset_urn, typed=False + assert "browsePathsV2" not in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["browsePathsV2"], typed=False ) diff --git a/smoke-test/tests/cli/datahub_graph_test.py b/smoke-test/tests/cli/datahub_graph_test.py index 17c8924fb0998b..1e324477adb6b6 100644 --- a/smoke-test/tests/cli/datahub_graph_test.py +++ b/smoke-test/tests/cli/datahub_graph_test.py @@ -1,11 +1,14 @@ import pytest import tenacity from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph -from datahub.metadata.schema_classes import (KafkaSchemaClass, - SchemaMetadataClass) +from datahub.metadata.schema_classes import KafkaSchemaClass, SchemaMetadataClass -from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info, - ingest_file_via_rest) +from tests.utils import ( + delete_urns_from_file, + get_gms_url, + get_sleep_info, + ingest_file_via_rest, +) sleep_sec, sleep_times = get_sleep_info() diff --git a/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py b/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py index 106da7cd8d71e5..cfbbacea1ed79e 100644 --- a/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py +++ b/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py @@ -2,7 +2,6 @@ import logging import sys import tempfile -import time from json import JSONDecodeError from typing import Any, Dict, List, Optional @@ -12,11 +11,8 @@ from datahub.entrypoints import datahub from datahub.metadata.schema_classes import DatasetProfileClass -import requests_wrapper as requests -from tests.aspect_generators.timeseries.dataset_profile_gen import \ - gen_dataset_profiles -from tests.utils import (get_strftime_from_timestamp_millis, - wait_for_writes_to_sync) +from tests.aspect_generators.timeseries.dataset_profile_gen import gen_dataset_profiles +from tests.utils import get_strftime_from_timestamp_millis, wait_for_writes_to_sync logger = logging.getLogger(__name__) diff --git a/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py b/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py index e962b1a5cafd6a..aa7c90cc6f988b 100644 --- a/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py +++ b/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py @@ -1,5 +1,4 @@ import json -import time from typing import Any, Dict, List, Optional import datahub.emitter.mce_builder as builder @@ -8,7 +7,6 @@ from datahub.entrypoints import datahub from datahub.metadata.schema_classes import DatasetProfileClass -import requests_wrapper as requests from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync runner = CliRunner(mix_stderr=False) diff --git a/smoke-test/tests/managed-ingestion/__init__.py b/smoke-test/tests/cli/user_groups_cmd/__init__.py similarity index 100% rename from smoke-test/tests/managed-ingestion/__init__.py rename to smoke-test/tests/cli/user_groups_cmd/__init__.py diff --git a/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py b/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py index 7b986d3be0444d..555687c98ed3e2 100644 --- a/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py +++ b/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py @@ -1,16 +1,15 @@ import json import sys import tempfile -import time from typing import Any, Dict, Iterable, List +import pytest import yaml from click.testing import CliRunner, Result from datahub.api.entities.corpgroup.corpgroup import CorpGroup from datahub.entrypoints import datahub from datahub.ingestion.graph.client import DataHubGraph, get_default_graph -import requests_wrapper as requests from tests.utils import wait_for_writes_to_sync runner = CliRunner(mix_stderr=False) @@ -43,7 +42,6 @@ def gen_datahub_groups(num_groups: int) -> Iterable[CorpGroup]: description=f"The Group {i}", picture_link=f"https://images.google.com/group{i}.jpg", slack=f"@group{i}", - admins=["user1"], members=["user2"], ) yield group @@ -65,7 +63,7 @@ def get_group_ownership(user_urn: str) -> List[str]: graph = get_default_graph() entities = graph.get_related_entities( entity_urn=user_urn, - relationship_types="OwnedBy", + relationship_types=["OwnedBy"], direction=DataHubGraph.RelationshipDirection.INCOMING, ) return [entity.urn for entity in entities] @@ -75,12 +73,13 @@ def get_group_membership(user_urn: str) -> List[str]: graph = get_default_graph() entities = graph.get_related_entities( entity_urn=user_urn, - relationship_types="IsMemberOfGroup", + relationship_types=["IsMemberOfGroup"], direction=DataHubGraph.RelationshipDirection.OUTGOING, ) return [entity.urn for entity in entities] +@pytest.mark.skip(reason="Functionality and test needs to be validated for correctness") def test_group_upsert(wait_for_healthchecks: Any) -> None: num_groups: int = 10 for i, datahub_group in enumerate(gen_datahub_groups(num_groups)): diff --git a/smoke-test/tests/consistency_utils.py b/smoke-test/tests/consistency_utils.py index 607835bf3649c0..1af9399c2dc9ae 100644 --- a/smoke-test/tests/consistency_utils.py +++ b/smoke-test/tests/consistency_utils.py @@ -30,7 +30,7 @@ def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None: result = str(completed_process.stdout) lines = result.splitlines() - lag_values = [int(l) for l in lines if l != ""] + lag_values = [int(line) for line in lines if line != ""] maximum_lag = max(lag_values) if maximum_lag == 0: lag_zero = True diff --git a/smoke-test/tests/containers/containers_test.py b/smoke-test/tests/containers/containers_test.py index 227645a87d30ad..4997102702e57d 100644 --- a/smoke-test/tests/containers/containers_test.py +++ b/smoke-test/tests/containers/containers_test.py @@ -1,7 +1,6 @@ import pytest -from tests.utils import (delete_urns_from_file, get_frontend_url, - ingest_file_via_rest) +from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest @pytest.fixture(scope="module", autouse=False) @@ -21,12 +20,10 @@ def test_healthchecks(wait_for_healthchecks): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_get_full_container(frontend_session, ingest_cleanup_data): - container_urn = "urn:li:container:SCHEMA" container_name = "datahub_schema" container_description = "The DataHub schema" editable_container_description = "custom description" - dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)" # Get a full container get_container_json = { @@ -129,7 +126,6 @@ def test_get_full_container(frontend_session, ingest_cleanup_data): @pytest.mark.dependency(depends=["test_healthchecks", "test_get_full_container"]) def test_get_parent_container(frontend_session, ingest_cleanup_data): - dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)" # Get count of existing secrets @@ -165,7 +161,6 @@ def test_get_parent_container(frontend_session, ingest_cleanup_data): @pytest.mark.dependency(depends=["test_healthchecks", "test_get_full_container"]) def test_update_container(frontend_session, ingest_cleanup_data): - container_urn = "urn:li:container:SCHEMA" new_tag = "urn:li:tag:Test" @@ -227,7 +222,7 @@ def test_update_container(frontend_session, ingest_cleanup_data): "ownerUrn": new_owner, "resourceUrn": container_urn, "ownerEntityType": "CORP_USER", - "ownershipTypeUrn": "urn:li:ownershipType:__system__technical_owner" + "ownershipTypeUrn": "urn:li:ownershipType:__system__technical_owner", } }, } diff --git a/smoke-test/tests/cypress/cypress/e2e/browse/browseV2.js b/smoke-test/tests/cypress/cypress/e2e/browse/browseV2.js index f45edc5fa04819..d951b15d4a5920 100644 --- a/smoke-test/tests/cypress/cypress/e2e/browse/browseV2.js +++ b/smoke-test/tests/cypress/cypress/e2e/browse/browseV2.js @@ -111,6 +111,7 @@ describe("search", () => { ); }); + /* Legacy Browse Path Disabled when showBrowseV2 = `true` it("should take you to the old browse experience when clicking on browse path from entity profile page when browse flag is off", () => { setBrowseFeatureFlag(false); cy.login(); @@ -122,6 +123,7 @@ describe("search", () => { }); cy.url().should("include", "/browse/dataset/prod/bigquery/cypress_project"); }); +*/ it("should take you to the unified search and browse experience when clicking entity type on home page with the browse flag on", () => { setBrowseFeatureFlag(true); diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py index 4ad2bc53fa87d9..4124ced9994461 100644 --- a/smoke-test/tests/cypress/integration_test.py +++ b/smoke-test/tests/cypress/integration_test.py @@ -5,11 +5,17 @@ import pytest -from tests.setup.lineage.ingest_time_lineage import (get_time_lineage_urns, - ingest_time_lineage) -from tests.utils import (create_datahub_step_state_aspects, delete_urns, - delete_urns_from_file, get_admin_username, - ingest_file_via_rest) +from tests.setup.lineage.ingest_time_lineage import ( + get_time_lineage_urns, + ingest_time_lineage, +) +from tests.utils import ( + create_datahub_step_state_aspects, + delete_urns, + delete_urns_from_file, + get_admin_username, + ingest_file_via_rest, +) CYPRESS_TEST_DATA_DIR = "tests/cypress" @@ -178,8 +184,10 @@ def test_run_cypress(frontend_session, wait_for_healthchecks): print(f"test strategy is {test_strategy}") test_spec_arg = "" if test_strategy is not None: - specs = _get_spec_map(strategy_spec_map.get(test_strategy)) - test_spec_arg = f" --spec '{specs}' " + specs = strategy_spec_map.get(test_strategy) + assert specs is not None + specs_str = _get_spec_map(specs) + test_spec_arg = f" --spec '{specs_str}' " print("Running Cypress tests with command") command = f"NO_COLOR=1 npx cypress run {record_arg} {test_spec_arg} {tag_arg}" @@ -194,6 +202,8 @@ def test_run_cypress(frontend_session, wait_for_healthchecks): stderr=subprocess.PIPE, cwd=f"{CYPRESS_TEST_DATA_DIR}", ) + assert proc.stdout is not None + assert proc.stderr is not None stdout = proc.stdout.read() stderr = proc.stderr.read() return_code = proc.wait() diff --git a/smoke-test/tests/tags-and-terms/__init__.py b/smoke-test/tests/dataproduct/queries/__init__.py similarity index 100% rename from smoke-test/tests/tags-and-terms/__init__.py rename to smoke-test/tests/dataproduct/queries/__init__.py diff --git a/smoke-test/tests/dataproduct/test_dataproduct.py b/smoke-test/tests/dataproduct/test_dataproduct.py index baef1cb1cb3ba0..0d0141e9111c0d 100644 --- a/smoke-test/tests/dataproduct/test_dataproduct.py +++ b/smoke-test/tests/dataproduct/test_dataproduct.py @@ -1,8 +1,6 @@ import logging import os -import subprocess import tempfile -import time from random import randint from typing import List @@ -14,17 +12,24 @@ from datahub.ingestion.api.sink import NoopWriteCallback from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.ingestion.sink.file import FileSink, FileSinkConfig -from datahub.metadata.schema_classes import (DataProductPropertiesClass, - DatasetPropertiesClass, - DomainPropertiesClass, - DomainsClass) +from datahub.metadata.schema_classes import ( + DataProductPropertiesClass, + DatasetPropertiesClass, + DomainPropertiesClass, + DomainsClass, +) from datahub.utilities.urns.urn import Urn +from tests.utils import ( + delete_urns_from_file, + get_gms_url, + get_sleep_info, + ingest_file_via_rest, + wait_for_writes_to_sync, +) + logger = logging.getLogger(__name__) -import requests_wrapper as requests -from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info, - ingest_file_via_rest, wait_for_writes_to_sync) start_index = randint(10, 10000) dataset_urns = [ @@ -82,7 +87,6 @@ def create_test_data(filename: str): @pytest.fixture(scope="module", autouse=False) def ingest_cleanup_data(request): - new_file, filename = tempfile.mkstemp() try: create_test_data(filename) @@ -160,7 +164,6 @@ def validate_relationships( ) @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_data_product(ingest_cleanup_data): - domain_urn = Urn("domain", [datahub_guid({"name": "Marketing"})]) graph: DataHubGraph = DataHubGraph(config=DatahubClientConfig(server=get_gms_url())) result = graph.execute_graphql( @@ -191,6 +194,7 @@ def test_create_data_product(ingest_cleanup_data): assert result["batchSetDataProduct"] is True data_product_props = graph.get_aspect(data_product_urn, DataProductPropertiesClass) assert data_product_props is not None + assert data_product_props.assets is not None assert data_product_props.description == "Test Description" assert data_product_props.name == "Test Data Product" assert len(data_product_props.assets) == len(dataset_urns) diff --git a/smoke-test/tests/delete/delete_test.py b/smoke-test/tests/delete/delete_test.py index d920faaf3a89a4..3a80e05d0cc4b5 100644 --- a/smoke-test/tests/delete/delete_test.py +++ b/smoke-test/tests/delete/delete_test.py @@ -1,14 +1,16 @@ import json import os -from time import sleep import pytest -from datahub.cli.cli_utils import get_aspects_for_entity -from datahub.cli.ingest_cli import get_session_and_host +from datahub.cli.cli_utils import get_aspects_for_entity, get_session_and_host -from tests.utils import (delete_urns_from_file, get_datahub_graph, - ingest_file_via_rest, wait_for_healthcheck_util, - wait_for_writes_to_sync) +from tests.utils import ( + delete_urns_from_file, + get_datahub_graph, + ingest_file_via_rest, + wait_for_healthcheck_util, + wait_for_writes_to_sync, +) # Disable telemetry os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false" @@ -39,8 +41,8 @@ def test_setup(): session, gms_host = get_session_and_host() try: - assert "browsePaths" not in get_aspects_for_entity( - entity_urn=dataset_urn, aspects=["browsePaths"], typed=False + assert "institutionalMemory" not in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["institutionalMemory"], typed=False ) assert "editableDatasetProperties" not in get_aspects_for_entity( entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False @@ -53,8 +55,8 @@ def test_setup(): "tests/delete/cli_test_data.json" ).config.run_id - assert "browsePaths" in get_aspects_for_entity( - entity_urn=dataset_urn, aspects=["browsePaths"], typed=False + assert "institutionalMemory" in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["institutionalMemory"], typed=False ) yield @@ -68,8 +70,8 @@ def test_setup(): wait_for_writes_to_sync() - assert "browsePaths" not in get_aspects_for_entity( - entity_urn=dataset_urn, aspects=["browsePaths"], typed=False + assert "institutionalMemory" not in get_aspects_for_entity( + entity_urn=dataset_urn, aspects=["institutionalMemory"], typed=False ) assert "editableDatasetProperties" not in get_aspects_for_entity( entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False diff --git a/smoke-test/tests/deprecation/deprecation_test.py b/smoke-test/tests/deprecation/deprecation_test.py index a8969804d03d7b..ae3890aeda9561 100644 --- a/smoke-test/tests/deprecation/deprecation_test.py +++ b/smoke-test/tests/deprecation/deprecation_test.py @@ -1,7 +1,11 @@ import pytest -from tests.utils import (delete_urns_from_file, get_frontend_url, get_root_urn, - ingest_file_via_rest) +from tests.utils import ( + delete_urns_from_file, + get_frontend_url, + get_root_urn, + ingest_file_via_rest, +) @pytest.fixture(scope="module", autouse=True) diff --git a/smoke-test/tests/domains/domains_test.py b/smoke-test/tests/domains/domains_test.py index fa8c918e3cbe16..1d83b032d7a8fe 100644 --- a/smoke-test/tests/domains/domains_test.py +++ b/smoke-test/tests/domains/domains_test.py @@ -1,8 +1,13 @@ import pytest import tenacity -from tests.utils import (delete_urns_from_file, get_frontend_url, get_gms_url, - get_sleep_info, ingest_file_via_rest) +from tests.utils import ( + delete_urns_from_file, + get_frontend_url, + get_gms_url, + get_sleep_info, + ingest_file_via_rest, +) sleep_sec, sleep_times = get_sleep_info() @@ -26,7 +31,6 @@ def test_healthchecks(wait_for_healthchecks): stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_more_domains(frontend_session, list_domains_json, before_count): - # Get new count of Domains response = frontend_session.post( f"{get_frontend_url()}/api/v2/graphql", json=list_domains_json @@ -47,7 +51,6 @@ def _ensure_more_domains(frontend_session, list_domains_json, before_count): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_list_get_domain(frontend_session): - # Setup: Delete the domain (if exists) response = frontend_session.post( f"{get_gms_url()}/entities?action=delete", json={"urn": "urn:li:domain:test id"} @@ -167,7 +170,6 @@ def test_create_list_get_domain(frontend_session): @pytest.mark.dependency(depends=["test_healthchecks", "test_create_list_get_domain"]) def test_set_unset_domain(frontend_session, ingest_cleanup_data): - # Set and Unset a Domain for a dataset. Note that this doesn't test for adding domains to charts, dashboards, charts, & jobs. dataset_urn = ( "urn:li:dataset:(urn:li:dataPlatform:kafka,test-tags-terms-sample-kafka,PROD)" diff --git a/smoke-test/tests/lineage/test_lineage.py b/smoke-test/tests/lineage/test_lineage.py index 52d61d666c7d9d..9cd98d1245bbbb 100644 --- a/smoke-test/tests/lineage/test_lineage.py +++ b/smoke-test/tests/lineage/test_lineage.py @@ -49,6 +49,7 @@ from datahub.utilities.urns.dataset_urn import DatasetUrn from datahub.utilities.urns.urn import Urn from pydantic import BaseModel, validator + from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync logger = logging.getLogger(__name__) @@ -85,7 +86,6 @@ def ingest_tableau_cll_via_rest(wait_for_healthchecks) -> None: ingest_file_via_rest( "tests/lineage/tableau_cll_mcps.json", ) - yield def search_across_lineage( @@ -499,6 +499,7 @@ def get_lineage_mcps(self) -> Iterable[MetadataChangeProposalWrapper]: def get_lineage_mcps_for_hop( self, hop_index: int ) -> Iterable[MetadataChangeProposalWrapper]: + assert self.expectations is not None if self.lineage_style == Scenario.LineageStyle.DATASET_JOB_DATASET: fine_grained_lineage = FineGrainedLineage( upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, @@ -722,11 +723,9 @@ def cleanup(self, delete_agent: DeleteAgent) -> None: for urn in self._generated_urns: delete_agent.delete_entity(urn) - def generate_expectation(self, query: ImpactQuery) -> LineageExpectation: - return self.expectations.generate_query_expectation_pairs(query) - def test_expectation(self, graph: DataHubGraph) -> bool: print("Testing expectation...") + assert self.expectations is not None try: for hop_index in range(self.num_hops): for dataset_urn in self.get_upstream_dataset_urns(hop_index): diff --git a/smoke-test/tests/managed_ingestion/__init__.py b/smoke-test/tests/managed_ingestion/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/managed-ingestion/managed_ingestion_test.py b/smoke-test/tests/managed_ingestion/managed_ingestion_test.py similarity index 98% rename from smoke-test/tests/managed-ingestion/managed_ingestion_test.py rename to smoke-test/tests/managed_ingestion/managed_ingestion_test.py index 6d95f731f32b1d..4a4bdca2e45926 100644 --- a/smoke-test/tests/managed-ingestion/managed_ingestion_test.py +++ b/smoke-test/tests/managed_ingestion/managed_ingestion_test.py @@ -3,8 +3,7 @@ import pytest import tenacity -from tests.utils import (get_frontend_url, get_sleep_info, - wait_for_healthcheck_util) +from tests.utils import get_frontend_url, get_sleep_info, wait_for_healthcheck_util sleep_sec, sleep_times = get_sleep_info() @@ -206,7 +205,6 @@ def _ensure_execution_request_present(frontend_session, execution_request_urn): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_list_get_remove_secret(frontend_session): - # Get count of existing secrets json_q = { "query": """query listSecrets($input: ListSecretsInput!) {\n @@ -265,7 +263,13 @@ def test_create_list_get_remove_secret(frontend_session): "query": """mutation updateSecret($input: UpdateSecretInput!) {\n updateSecret(input: $input) }""", - "variables": {"input": {"urn": secret_urn, "name": "SMOKE_TEST", "value": "mytestvalue.updated"}}, + "variables": { + "input": { + "urn": secret_urn, + "name": "SMOKE_TEST", + "value": "mytestvalue.updated", + } + }, } response = frontend_session.post( @@ -333,7 +337,6 @@ def test_create_list_get_remove_secret(frontend_session): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_list_get_remove_ingestion_source(frontend_session): - # Get count of existing ingestion sources res_data = _get_ingestionSources(frontend_session) diff --git a/smoke-test/tests/patch/common_patch_tests.py b/smoke-test/tests/patch/common_patch_tests.py index f1d6abf5da794c..9530edb760c137 100644 --- a/smoke-test/tests/patch/common_patch_tests.py +++ b/smoke-test/tests/patch/common_patch_tests.py @@ -2,21 +2,26 @@ import uuid from typing import Dict, Optional, Type -from datahub.emitter.mce_builder import (make_tag_urn, make_term_urn, - make_user_urn) +from datahub.emitter.mce_builder import make_tag_urn, make_term_urn, make_user_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_patch_builder import MetadataPatchProposal from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig -from datahub.metadata.schema_classes import (AuditStampClass, GlobalTagsClass, - GlossaryTermAssociationClass, - GlossaryTermsClass, OwnerClass, - OwnershipClass, - OwnershipTypeClass, - TagAssociationClass, _Aspect) +from datahub.metadata.schema_classes import ( + AuditStampClass, + GlobalTagsClass, + GlossaryTermAssociationClass, + GlossaryTermsClass, + OwnerClass, + OwnershipClass, + OwnershipTypeClass, + TagAssociationClass, + _Aspect, +) def helper_test_entity_terms_patch( - test_entity_urn: str, patch_builder_class: Type[MetadataPatchProposal] + test_entity_urn: str, + patch_builder_class: Type[MetadataPatchProposal], ): def get_terms(graph, entity_urn): return graph.get_aspect( @@ -57,9 +62,9 @@ def get_terms(graph, entity_urn): assert terms_read.terms[1].urn == new_term.urn assert terms_read.terms[1].context is None - for patch_mcp in ( - patch_builder_class(test_entity_urn).remove_term(term_urn).build() - ): + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "remove_term") + for patch_mcp in patch_builder.remove_term(term_urn).build(): graph.emit_mcp(patch_mcp) pass @@ -69,9 +74,9 @@ def get_terms(graph, entity_urn): def helper_test_dataset_tags_patch( - test_entity_urn: str, patch_builder_class: Type[MetadataPatchProposal] + test_entity_urn: str, + patch_builder_class: Type[MetadataPatchProposal], ): - tag_urn = make_tag_urn(tag=f"testTag-{uuid.uuid4()}") tag_association = TagAssociationClass(tag=tag_urn, context="test") @@ -80,10 +85,11 @@ def helper_test_dataset_tags_patch( with DataHubGraph(DataHubGraphConfig()) as graph: graph.emit_mcp(mcpw) - tags_read: GlobalTagsClass = graph.get_aspect( + tags_read = graph.get_aspect( entity_urn=test_entity_urn, aspect_type=GlobalTagsClass, ) + assert tags_read is not None assert tags_read.tags[0].tag == tag_urn assert tags_read.tags[0].context == "test" @@ -98,14 +104,15 @@ def helper_test_dataset_tags_patch( entity_urn=test_entity_urn, aspect_type=GlobalTagsClass, ) + assert tags_read is not None assert tags_read.tags[0].tag == tag_urn assert tags_read.tags[0].context == "test" assert tags_read.tags[1].tag == new_tag.tag assert tags_read.tags[1].context is None - for patch_mcp in ( - patch_builder_class(test_entity_urn).remove_tag(tag_urn).build() - ): + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "remove_tag") + for patch_mcp in patch_builder.remove_tag(tag_urn).build(): graph.emit_mcp(patch_mcp) pass @@ -113,12 +120,14 @@ def helper_test_dataset_tags_patch( entity_urn=test_entity_urn, aspect_type=GlobalTagsClass, ) + assert tags_read is not None assert len(tags_read.tags) == 1 assert tags_read.tags[0].tag == new_tag.tag def helper_test_ownership_patch( - test_entity_urn: str, patch_builder_class: Type[MetadataPatchProposal] + test_entity_urn: str, + patch_builder_class: Type[MetadataPatchProposal], ): owner_to_set = OwnerClass( owner=make_user_urn("jdoe"), type=OwnershipTypeClass.DATAOWNER @@ -133,27 +142,26 @@ def helper_test_ownership_patch( ) with DataHubGraph(DataHubGraphConfig()) as graph: graph.emit_mcp(mcpw) - owner: OwnershipClass = graph.get_aspect( - entity_urn=test_entity_urn, aspect_type=OwnershipClass - ) + owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass) + assert owner is not None assert owner.owners[0].owner == make_user_urn("jdoe") - for patch_mcp in ( - patch_builder_class(test_entity_urn).add_owner(owner_to_add).build() - ): + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "add_owner") + for patch_mcp in patch_builder.add_owner(owner_to_add).build(): graph.emit_mcp(patch_mcp) owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass) + assert owner is not None assert len(owner.owners) == 2 - for patch_mcp in ( - patch_builder_class(test_entity_urn) - .remove_owner(make_user_urn("gdoe")) - .build() - ): + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "remove_owner") + for patch_mcp in patch_builder.remove_owner(make_user_urn("gdoe")).build(): graph.emit_mcp(patch_mcp) owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass) + assert owner is not None assert len(owner.owners) == 1 assert owner.owners[0].owner == make_user_urn("jdoe") @@ -172,6 +180,7 @@ def get_custom_properties( aspect_type=custom_properties_aspect_class, ) assert custom_properties_aspect + assert hasattr(custom_properties_aspect, "customProperties") return custom_properties_aspect.customProperties base_property_map = {"base_property": "base_property_value"} @@ -195,6 +204,7 @@ def get_custom_properties( } entity_patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(entity_patch_builder, "add_custom_property") for k, v in new_properties.items(): entity_patch_builder.add_custom_property(k, v) @@ -212,11 +222,9 @@ def get_custom_properties( assert custom_properties[k] == v # Remove property - for patch_mcp in ( - patch_builder_class(test_entity_urn) - .remove_custom_property("test_property") - .build() - ): + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "remove_custom_property") + for patch_mcp in patch_builder.remove_custom_property("test_property").build(): graph.emit_mcp(patch_mcp) custom_properties = get_custom_properties(graph, test_entity_urn) @@ -230,11 +238,9 @@ def get_custom_properties( assert custom_properties[k] == v # Replace custom properties - for patch_mcp in ( - patch_builder_class(test_entity_urn) - .set_custom_properties(new_properties) - .build() - ): + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "set_custom_properties") + for patch_mcp in patch_builder.set_custom_properties(new_properties).build(): graph.emit_mcp(patch_mcp) custom_properties = get_custom_properties(graph, test_entity_urn) diff --git a/smoke-test/tests/patch/test_datajob_patches.py b/smoke-test/tests/patch/test_datajob_patches.py index 342d5d683228a7..ce63d4571d6c81 100644 --- a/smoke-test/tests/patch/test_datajob_patches.py +++ b/smoke-test/tests/patch/test_datajob_patches.py @@ -3,14 +3,19 @@ from datahub.emitter.mce_builder import make_data_job_urn, make_dataset_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig -from datahub.metadata.schema_classes import (DataJobInfoClass, - DataJobInputOutputClass, - EdgeClass) +from datahub.metadata.schema_classes import ( + DataJobInfoClass, + DataJobInputOutputClass, + EdgeClass, +) from datahub.specific.datajob import DataJobPatchBuilder from tests.patch.common_patch_tests import ( - helper_test_custom_properties_patch, helper_test_dataset_tags_patch, - helper_test_entity_terms_patch, helper_test_ownership_patch) + helper_test_custom_properties_patch, + helper_test_dataset_tags_patch, + helper_test_entity_terms_patch, + helper_test_ownership_patch, +) def _make_test_datajob_urn( @@ -76,10 +81,12 @@ def test_datajob_inputoutput_dataset_patch(wait_for_healthchecks): with DataHubGraph(DataHubGraphConfig()) as graph: graph.emit_mcp(mcpw) - inputoutput_lineage_read: DataJobInputOutputClass = graph.get_aspect( + inputoutput_lineage_read = graph.get_aspect( entity_urn=datajob_urn, aspect_type=DataJobInputOutputClass, ) + assert inputoutput_lineage_read is not None + assert inputoutput_lineage_read.inputDatasetEdges is not None assert ( inputoutput_lineage_read.inputDatasetEdges[0].destinationUrn == other_dataset_urn @@ -97,6 +104,8 @@ def test_datajob_inputoutput_dataset_patch(wait_for_healthchecks): entity_urn=datajob_urn, aspect_type=DataJobInputOutputClass, ) + assert inputoutput_lineage_read is not None + assert inputoutput_lineage_read.inputDatasetEdges is not None assert len(inputoutput_lineage_read.inputDatasetEdges) == 2 assert ( inputoutput_lineage_read.inputDatasetEdges[0].destinationUrn @@ -119,6 +128,8 @@ def test_datajob_inputoutput_dataset_patch(wait_for_healthchecks): entity_urn=datajob_urn, aspect_type=DataJobInputOutputClass, ) + assert inputoutput_lineage_read is not None + assert inputoutput_lineage_read.inputDatasetEdges is not None assert len(inputoutput_lineage_read.inputDatasetEdges) == 1 assert ( inputoutput_lineage_read.inputDatasetEdges[0].destinationUrn diff --git a/smoke-test/tests/patch/test_dataset_patches.py b/smoke-test/tests/patch/test_dataset_patches.py index 6704d19760fb9a..ec6b4a91fa6bed 100644 --- a/smoke-test/tests/patch/test_dataset_patches.py +++ b/smoke-test/tests/patch/test_dataset_patches.py @@ -1,29 +1,27 @@ -import time import uuid from typing import Dict, Optional -from datahub.emitter.mce_builder import (make_dataset_urn, make_tag_urn, - make_term_urn, make_user_urn) +from datahub.emitter.mce_builder import make_dataset_urn, make_tag_urn, make_term_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig -from datahub.metadata.schema_classes import (AuditStampClass, - DatasetLineageTypeClass, - DatasetPropertiesClass, - EditableSchemaFieldInfoClass, - EditableSchemaMetadataClass, - GlobalTagsClass, - GlossaryTermAssociationClass, - GlossaryTermsClass, OwnerClass, - OwnershipClass, - OwnershipTypeClass, - TagAssociationClass, - UpstreamClass, - UpstreamLineageClass) +from datahub.metadata.schema_classes import ( + DatasetLineageTypeClass, + DatasetPropertiesClass, + EditableSchemaFieldInfoClass, + EditableSchemaMetadataClass, + GlossaryTermAssociationClass, + TagAssociationClass, + UpstreamClass, + UpstreamLineageClass, +) from datahub.specific.dataset import DatasetPatchBuilder from tests.patch.common_patch_tests import ( - helper_test_custom_properties_patch, helper_test_dataset_tags_patch, - helper_test_entity_terms_patch, helper_test_ownership_patch) + helper_test_custom_properties_patch, + helper_test_dataset_tags_patch, + helper_test_entity_terms_patch, + helper_test_ownership_patch, +) # Common Aspect Patch Tests @@ -135,7 +133,6 @@ def get_field_info( def test_field_terms_patch(wait_for_healthchecks): - dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) @@ -174,6 +171,7 @@ def test_field_terms_patch(wait_for_healthchecks): assert field_info assert field_info.description == "This is a test field" + assert field_info.glossaryTerms is not None assert len(field_info.glossaryTerms.terms) == 1 assert field_info.glossaryTerms.terms[0].urn == new_term.urn @@ -191,11 +189,11 @@ def test_field_terms_patch(wait_for_healthchecks): assert field_info assert field_info.description == "This is a test field" + assert field_info.glossaryTerms is not None assert len(field_info.glossaryTerms.terms) == 0 def test_field_tags_patch(wait_for_healthchecks): - dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) @@ -235,6 +233,7 @@ def test_field_tags_patch(wait_for_healthchecks): assert field_info assert field_info.description == "This is a test field" + assert field_info.globalTags is not None assert len(field_info.globalTags.tags) == 1 assert field_info.globalTags.tags[0].tag == new_tag.tag @@ -253,6 +252,7 @@ def test_field_tags_patch(wait_for_healthchecks): assert field_info assert field_info.description == "This is a test field" + assert field_info.globalTags is not None assert len(field_info.globalTags.tags) == 1 assert field_info.globalTags.tags[0].tag == new_tag.tag @@ -270,6 +270,7 @@ def test_field_tags_patch(wait_for_healthchecks): assert field_info assert field_info.description == "This is a test field" + assert field_info.globalTags is not None assert len(field_info.globalTags.tags) == 0 @@ -285,7 +286,6 @@ def get_custom_properties( def test_custom_properties_patch(wait_for_healthchecks): - dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) diff --git a/smoke-test/tests/policies/test_policies.py b/smoke-test/tests/policies/test_policies.py index 67142181d2b960..186550482190c0 100644 --- a/smoke-test/tests/policies/test_policies.py +++ b/smoke-test/tests/policies/test_policies.py @@ -1,8 +1,13 @@ import pytest import tenacity -from tests.utils import (get_frontend_session, get_frontend_url, get_root_urn, - get_sleep_info, wait_for_healthcheck_util) +from tests.utils import ( + get_frontend_session, + get_frontend_url, + get_root_urn, + get_sleep_info, + wait_for_healthcheck_util, +) TEST_POLICY_NAME = "Updated Platform Policy" @@ -90,7 +95,6 @@ def _ensure_policy_present(frontend_session, new_urn): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_frontend_policy_operations(frontend_session): - json = { "query": """mutation createPolicy($input: PolicyUpdateInput!) {\n createPolicy(input: $input) }""", diff --git a/smoke-test/tests/privileges/__init__.py b/smoke-test/tests/privileges/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/privileges/test_privileges.py b/smoke-test/tests/privileges/test_privileges.py index e1cb848cccf8eb..a4c607dac89f2e 100644 --- a/smoke-test/tests/privileges/test_privileges.py +++ b/smoke-test/tests/privileges/test_privileges.py @@ -1,14 +1,34 @@ import pytest import tenacity -from tests.utils import (get_frontend_session, wait_for_writes_to_sync, wait_for_healthcheck_util, - get_frontend_url, get_admin_credentials,get_sleep_info) -from tests.privileges.utils import * +from tests.privileges.utils import ( + assign_role, + assign_user_to_group, + create_group, + create_user, + create_user_policy, + remove_group, + remove_policy, + remove_user, + set_base_platform_privileges_policy_status, + set_view_dataset_sensitive_info_policy_status, + set_view_entity_profile_privileges_policy_status, +) +from tests.utils import ( + get_admin_credentials, + get_frontend_session, + get_frontend_url, + get_sleep_info, + login_as, + wait_for_healthcheck_util, + wait_for_writes_to_sync, +) pytestmark = pytest.mark.no_cypress_suite1 sleep_sec, sleep_times = get_sleep_info() + @pytest.fixture(scope="session") def wait_for_healthchecks(): wait_for_healthcheck_util() @@ -37,7 +57,7 @@ def privileges_and_test_user_setup(admin_session): # Sleep for eventual consistency wait_for_writes_to_sync() - # Create a new user + # Create a new user admin_session = create_user(admin_session, "user", "user") yield @@ -57,15 +77,16 @@ def privileges_and_test_user_setup(admin_session): @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_cant_perform_action(session, json,assertion_key): - action_response = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json) +def _ensure_cant_perform_action(session, json, assertion_key): + action_response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) action_response.raise_for_status() action_data = action_response.json() - assert action_data["errors"][0]["extensions"]["code"] == 403, action_data["errors"][0] + assert action_data["errors"][0]["extensions"]["code"] == 403, action_data["errors"][ + 0 + ] assert action_data["errors"][0]["extensions"]["type"] == "UNAUTHORIZED" - assert action_data["data"][assertion_key] == None + assert action_data["data"][assertion_key] is None @tenacity.retry( @@ -73,7 +94,8 @@ def _ensure_cant_perform_action(session, json,assertion_key): ) def _ensure_can_create_secret(session, json, urn): create_secret_success = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json) + f"{get_frontend_url()}/api/v2/graphql", json=json + ) create_secret_success.raise_for_status() secret_data = create_secret_success.json() @@ -81,14 +103,15 @@ def _ensure_can_create_secret(session, json, urn): assert secret_data["data"] assert secret_data["data"]["createSecret"] assert secret_data["data"]["createSecret"] == urn - + @tenacity.retry( stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_can_create_ingestion_source(session, json): create_ingestion_success = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json) + f"{get_frontend_url()}/api/v2/graphql", json=json + ) create_ingestion_success.raise_for_status() ingestion_data = create_ingestion_success.json() @@ -98,14 +121,15 @@ def _ensure_can_create_ingestion_source(session, json): assert ingestion_data["data"]["createIngestionSource"] is not None return ingestion_data["data"]["createIngestionSource"] - + @tenacity.retry( stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_can_create_access_token(session, json): create_access_token_success = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json) + f"{get_frontend_url()}/api/v2/graphql", json=json + ) create_access_token_success.raise_for_status() ingestion_data = create_access_token_success.json() @@ -128,50 +152,49 @@ def _ensure_can_create_user_policy(session, json): assert res_data["data"] assert res_data["data"]["createPolicy"] is not None - return res_data["data"]["createPolicy"] + return res_data["data"]["createPolicy"] @pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_to_create_and_manage_secrets(): - (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) user_session = login_as("user", "user") secret_urn = "urn:li:dataHubSecret:TestSecretName" # Verify new user can't create secrets - create_secret = { + create_secret = { "query": """mutation createSecret($input: CreateSecretInput!) {\n createSecret(input: $input)\n}""", "variables": { - "input":{ - "name":"TestSecretName", - "value":"Test Secret Value", - "description":"Test Secret Description" - } + "input": { + "name": "TestSecretName", + "value": "Test Secret Value", + "description": "Test Secret Description", + } }, } - _ensure_cant_perform_action(user_session, create_secret,"createSecret") - + _ensure_cant_perform_action(user_session, create_secret, "createSecret") # Assign privileges to the new user to manage secrets - policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_SECRETS"], admin_session) + policy_urn = create_user_policy( + "urn:li:corpuser:user", ["MANAGE_SECRETS"], admin_session + ) # Verify new user can create and manage secrets # Create a secret _ensure_can_create_secret(user_session, create_secret, secret_urn) - # Remove a secret - remove_secret = { + remove_secret = { "query": """mutation deleteSecret($urn: String!) {\n deleteSecret(urn: $urn)\n}""", - "variables": { - "urn": secret_urn - }, + "variables": {"urn": secret_urn}, } - remove_secret_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_secret) + remove_secret_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=remove_secret + ) remove_secret_response.raise_for_status() secret_data = remove_secret_response.json() @@ -180,28 +203,29 @@ def test_privilege_to_create_and_manage_secrets(): assert secret_data["data"]["deleteSecret"] assert secret_data["data"]["deleteSecret"] == secret_urn - # Remove the policy remove_policy(policy_urn, admin_session) # Ensure user can't create secret after policy is removed - _ensure_cant_perform_action(user_session, create_secret,"createSecret") + _ensure_cant_perform_action(user_session, create_secret, "createSecret") @pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_to_create_and_manage_ingestion_source(): - (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) user_session = login_as("user", "user") # Verify new user can't create ingestion source - create_ingestion_source = { + create_ingestion_source = { "query": """mutation createIngestionSource($input: UpdateIngestionSourceInput!) {\n createIngestionSource(input: $input)\n}""", - "variables": {"input":{"type":"snowflake","name":"test","config": - {"recipe": - """{\"source\":{\"type\":\"snowflake\",\"config\":{ + "variables": { + "input": { + "type": "snowflake", + "name": "test", + "config": { + "recipe": """{\"source\":{\"type\":\"snowflake\",\"config\":{ \"account_id\":null, \"include_table_lineage\":true, \"include_view_lineage\":true, @@ -209,25 +233,39 @@ def test_privilege_to_create_and_manage_ingestion_source(): \"include_views\":true, \"profiling\":{\"enabled\":true,\"profile_table_level_only\":true}, \"stateful_ingestion\":{\"enabled\":true}}}}""", - "executorId":"default","debugMode":False,"extraArgs":[]}}}, + "executorId": "default", + "debugMode": False, + "extraArgs": [], + }, + } + }, } - _ensure_cant_perform_action(user_session, create_ingestion_source, "createIngestionSource") + _ensure_cant_perform_action( + user_session, create_ingestion_source, "createIngestionSource" + ) + # Assign privileges to the new user to manage ingestion source + policy_urn = create_user_policy( + "urn:li:corpuser:user", ["MANAGE_INGESTION"], admin_session + ) - # Assign privileges to the new user to manage ingestion source - policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_INGESTION"], admin_session) - # Verify new user can create and manage ingestion source(edit, delete) - ingestion_source_urn = _ensure_can_create_ingestion_source(user_session, create_ingestion_source) + ingestion_source_urn = _ensure_can_create_ingestion_source( + user_session, create_ingestion_source + ) # Edit ingestion source - update_ingestion_source = { + update_ingestion_source = { "query": """mutation updateIngestionSource($urn: String!, $input: UpdateIngestionSourceInput!) {\n updateIngestionSource(urn: $urn, input: $input)\n}""", - "variables": {"urn":ingestion_source_urn, - "input":{"type":"snowflake","name":"test updated", - "config":{"recipe":"""{\"source\":{\"type\":\"snowflake\",\"config\":{ + "variables": { + "urn": ingestion_source_urn, + "input": { + "type": "snowflake", + "name": "test updated", + "config": { + "recipe": """{\"source\":{\"type\":\"snowflake\",\"config\":{ \"account_id\":null, \"include_table_lineage\":true, \"include_view_lineage\":true, @@ -235,11 +273,17 @@ def test_privilege_to_create_and_manage_ingestion_source(): \"include_views\":true, \"profiling\":{\"enabled\":true,\"profile_table_level_only\":true}, \"stateful_ingestion\":{\"enabled\":true}}}}""", - "executorId":"default","debugMode":False,"extraArgs":[]}}} + "executorId": "default", + "debugMode": False, + "extraArgs": [], + }, + }, + }, } update_ingestion_success = user_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=update_ingestion_source) + f"{get_frontend_url()}/api/v2/graphql", json=update_ingestion_source + ) update_ingestion_success.raise_for_status() ingestion_data = update_ingestion_success.json() @@ -248,17 +292,16 @@ def test_privilege_to_create_and_manage_ingestion_source(): assert ingestion_data["data"]["updateIngestionSource"] assert ingestion_data["data"]["updateIngestionSource"] == ingestion_source_urn - # Delete ingestion source - remove_ingestion_source = { + remove_ingestion_source = { "query": """mutation deleteIngestionSource($urn: String!) {\n deleteIngestionSource(urn: $urn)\n}""", - "variables": { - "urn": ingestion_source_urn - }, + "variables": {"urn": ingestion_source_urn}, } - remove_ingestion_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_ingestion_source) + remove_ingestion_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=remove_ingestion_source + ) remove_ingestion_response.raise_for_status() ingestion_data = remove_ingestion_response.json() @@ -271,75 +314,81 @@ def test_privilege_to_create_and_manage_ingestion_source(): remove_policy(policy_urn, admin_session) # Ensure that user can't create ingestion source after policy is removed - _ensure_cant_perform_action(user_session, create_ingestion_source, "createIngestionSource") + _ensure_cant_perform_action( + user_session, create_ingestion_source, "createIngestionSource" + ) +@pytest.mark.skip(reason="Functionality and test needs to be validated for correctness") @pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_to_create_and_manage_access_tokens(): - (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) user_session = login_as("user", "user") - # Verify new user can't create access token - create_access_token = { + create_access_token = { "query": """mutation createAccessToken($input: CreateAccessTokenInput!) {\n createAccessToken(input: $input) {\n accessToken\n __typename\n }\n}\n""", - "variables": {"input":{"actorUrn":"urn:li:corpuser:user", - "type":"PERSONAL", - "duration":"ONE_MONTH", - "name":"test", - "description":"test"}} + "variables": { + "input": { + "actorUrn": "urn:li:corpuser:user", + "type": "PERSONAL", + "duration": "ONE_MONTH", + "name": "test", + "description": "test", + } + }, } - _ensure_cant_perform_action(user_session, create_access_token,"createAccessToken") - + _ensure_cant_perform_action(user_session, create_access_token, "createAccessToken") # Assign privileges to the new user to create and manage access tokens - policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_ACCESS_TOKENS"], admin_session) - + policy_urn = create_user_policy( + "urn:li:corpuser:user", ["MANAGE_ACCESS_TOKENS"], admin_session + ) # Verify new user can create and manage access token(create, revoke) # Create a access token _ensure_can_create_access_token(user_session, create_access_token) - # List access tokens first to get token id - list_access_tokens = { + list_access_tokens = { "query": """query listAccessTokens($input: ListAccessTokenInput!) {\n listAccessTokens(input: $input) {\n start\n count\n total\n tokens {\n urn\n type\n id\n name\n description\n actorUrn\n ownerUrn\n createdAt\n expiresAt\n __typename\n }\n __typename\n }\n}\n""", "variables": { - "input":{ - "start":0,"count":10,"filters":[{ - "field":"ownerUrn", - "values":["urn:li:corpuser:user"]}]} - } + "input": { + "start": 0, + "count": 10, + "filters": [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}], + } + }, } - list_tokens_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=list_access_tokens) + list_tokens_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=list_access_tokens + ) list_tokens_response.raise_for_status() list_tokens_data = list_tokens_response.json() assert list_tokens_data assert list_tokens_data["data"] assert list_tokens_data["data"]["listAccessTokens"]["tokens"][0]["id"] is not None - - access_token_id = list_tokens_data["data"]["listAccessTokens"]["tokens"][0]["id"] + access_token_id = list_tokens_data["data"]["listAccessTokens"]["tokens"][0]["id"] # Revoke access token - revoke_access_token = { + revoke_access_token = { "query": "mutation revokeAccessToken($tokenId: String!) {\n revokeAccessToken(tokenId: $tokenId)\n}\n", - "variables": { - "tokenId": access_token_id - }, + "variables": {"tokenId": access_token_id}, } - revoke_token_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=revoke_access_token) + revoke_token_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=revoke_access_token + ) revoke_token_response.raise_for_status() revoke_token_data = revoke_token_response.json() @@ -348,22 +397,18 @@ def test_privilege_to_create_and_manage_access_tokens(): assert revoke_token_data["data"]["revokeAccessToken"] assert revoke_token_data["data"]["revokeAccessToken"] is True - # Remove the policy remove_policy(policy_urn, admin_session) - # Ensure that user can't create access token after policy is removed - _ensure_cant_perform_action(user_session, create_access_token,"createAccessToken") + _ensure_cant_perform_action(user_session, create_access_token, "createAccessToken") @pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_to_create_and_manage_policies(): - (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) user_session = login_as("user", "user") - # Verify new user can't create a policy create_policy = { @@ -376,7 +421,7 @@ def test_privilege_to_create_and_manage_policies(): "name": "Policy Name", "description": "Policy Description", "state": "ACTIVE", - "resources": {"filter":{"criteria":[]}}, + "resources": {"filter": {"criteria": []}}, "privileges": ["MANAGE_POLICIES"], "actors": { "users": [], @@ -388,19 +433,19 @@ def test_privilege_to_create_and_manage_policies(): }, } - _ensure_cant_perform_action(user_session, create_policy,"createPolicy") - + _ensure_cant_perform_action(user_session, create_policy, "createPolicy") # Assign privileges to the new user to create and manage policies - admin_policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_POLICIES"], admin_session) - + admin_policy_urn = create_user_policy( + "urn:li:corpuser:user", ["MANAGE_POLICIES"], admin_session + ) # Verify new user can create and manage policy(create, edit, delete) # Create a policy user_policy_urn = _ensure_can_create_user_policy(user_session, create_policy) # Edit a policy - edit_policy = { + edit_policy = { "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n updatePolicy(urn: $urn, input: $input) }""", "variables": { @@ -422,7 +467,9 @@ def test_privilege_to_create_and_manage_policies(): }, }, } - edit_policy_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=edit_policy) + edit_policy_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=edit_policy + ) edit_policy_response.raise_for_status() res_data = edit_policy_response.json() @@ -431,12 +478,14 @@ def test_privilege_to_create_and_manage_policies(): assert res_data["data"]["updatePolicy"] == user_policy_urn # Delete a policy - remove_user_policy = { + remove_user_policy = { "query": "mutation deletePolicy($urn: String!) {\n deletePolicy(urn: $urn)\n}\n", - "variables":{"urn":user_policy_urn} + "variables": {"urn": user_policy_urn}, } - remove_policy_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_user_policy) + remove_policy_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=remove_user_policy + ) remove_policy_response.raise_for_status() res_data = remove_policy_response.json() @@ -444,18 +493,16 @@ def test_privilege_to_create_and_manage_policies(): assert res_data["data"] assert res_data["data"]["deletePolicy"] == user_policy_urn - # Remove the user privilege by admin remove_policy(admin_policy_urn, admin_session) - # Ensure that user can't create a policy after privilege is removed by admin - _ensure_cant_perform_action(user_session, create_policy,"createPolicy") + _ensure_cant_perform_action(user_session, create_policy, "createPolicy") +@pytest.mark.skip(reason="Functionality and test needs to be validated for correctness") @pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_from_group_role_can_create_and_manage_secret(): - (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) user_session = login_as("user", "user") @@ -466,20 +513,20 @@ def test_privilege_from_group_role_can_create_and_manage_secret(): "query": """mutation createSecret($input: CreateSecretInput!) {\n createSecret(input: $input)\n}""", "variables": { - "input":{ - "name":"TestSecretName", - "value":"Test Secret Value", - "description":"Test Secret Description" + "input": { + "name": "TestSecretName", + "value": "Test Secret Value", + "description": "Test Secret Description", } }, } - _ensure_cant_perform_action(user_session, create_secret,"createSecret") + _ensure_cant_perform_action(user_session, create_secret, "createSecret") # Create group and grant it the admin role. group_urn = create_group(admin_session, "Test Group") # Assign admin role to group - assign_role(admin_session,"urn:li:dataHubRole:Admin", [group_urn]) + assign_role(admin_session, "urn:li:dataHubRole:Admin", [group_urn]) # Assign user to group assign_user_to_group(admin_session, group_urn, ["urn:li:corpuser:user"]) @@ -492,12 +539,12 @@ def test_privilege_from_group_role_can_create_and_manage_secret(): remove_secret = { "query": """mutation deleteSecret($urn: String!) {\n deleteSecret(urn: $urn)\n}""", - "variables": { - "urn": secret_urn - }, + "variables": {"urn": secret_urn}, } - remove_secret_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_secret) + remove_secret_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=remove_secret + ) remove_secret_response.raise_for_status() secret_data = remove_secret_response.json() @@ -510,4 +557,4 @@ def test_privilege_from_group_role_can_create_and_manage_secret(): remove_group(admin_session, group_urn) # Ensure user can't create secret after policy is removed - _ensure_cant_perform_action(user_session, create_secret,"createSecret") + _ensure_cant_perform_action(user_session, create_secret, "createSecret") diff --git a/smoke-test/tests/privileges/utils.py b/smoke-test/tests/privileges/utils.py index eeb385a243a901..1e58ec4085b703 100644 --- a/smoke-test/tests/privileges/utils.py +++ b/smoke-test/tests/privileges/utils.py @@ -1,10 +1,9 @@ -import requests_wrapper as requests from tests.consistency_utils import wait_for_writes_to_sync -from tests.utils import (get_frontend_url, wait_for_writes_to_sync, get_admin_credentials) +from tests.utils import get_admin_credentials, get_frontend_url, login_as def set_base_platform_privileges_policy_status(status, session): - base_platform_privileges = { + base_platform_privileges = { "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n updatePolicy(urn: $urn, input: $input) }""", "variables": { @@ -14,18 +13,20 @@ def set_base_platform_privileges_policy_status(status, session): "state": status, "name": "All Users - Base Platform Privileges", "description": "Grants base platform privileges to ALL users of DataHub. Change this policy to alter that behavior.", - "privileges": ["MANAGE_INGESTION", - "MANAGE_SECRETS", - "MANAGE_USERS_AND_GROUPS", - "VIEW_ANALYTICS", - "GENERATE_PERSONAL_ACCESS_TOKENS", - "MANAGE_DOMAINS", - "MANAGE_GLOBAL_ANNOUNCEMENTS", - "MANAGE_TESTS", - "MANAGE_GLOSSARIES", - "MANAGE_TAGS", - "MANAGE_GLOBAL_VIEWS", - "MANAGE_GLOBAL_OWNERSHIP_TYPES"], + "privileges": [ + "MANAGE_INGESTION", + "MANAGE_SECRETS", + "MANAGE_USERS_AND_GROUPS", + "VIEW_ANALYTICS", + "GENERATE_PERSONAL_ACCESS_TOKENS", + "MANAGE_DOMAINS", + "MANAGE_GLOBAL_ANNOUNCEMENTS", + "MANAGE_TESTS", + "MANAGE_GLOSSARIES", + "MANAGE_TAGS", + "MANAGE_GLOBAL_VIEWS", + "MANAGE_GLOBAL_OWNERSHIP_TYPES", + ], "actors": { "users": [], "groups": None, @@ -38,13 +39,15 @@ def set_base_platform_privileges_policy_status(status, session): }, } base_privileges_response = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=base_platform_privileges) + f"{get_frontend_url()}/api/v2/graphql", json=base_platform_privileges + ) base_privileges_response.raise_for_status() base_res_data = base_privileges_response.json() assert base_res_data["data"]["updatePolicy"] == "urn:li:dataHubPolicy:7" + def set_view_dataset_sensitive_info_policy_status(status, session): - dataset_sensitive_information = { + dataset_sensitive_information = { "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n updatePolicy(urn: $urn, input: $input) }""", "variables": { @@ -54,7 +57,7 @@ def set_view_dataset_sensitive_info_policy_status(status, session): "state": status, "name": "All Users - View Dataset Sensitive Information", "description": "Grants viewing privileges of usage and profile information of all datasets for all users", - "privileges": ["VIEW_DATASET_USAGE","VIEW_DATASET_PROFILE"], + "privileges": ["VIEW_DATASET_USAGE", "VIEW_DATASET_PROFILE"], "actors": { "users": [], "groups": None, @@ -67,13 +70,18 @@ def set_view_dataset_sensitive_info_policy_status(status, session): }, } sensitive_info_response = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_sensitive_information) + f"{get_frontend_url()}/api/v2/graphql", json=dataset_sensitive_information + ) sensitive_info_response.raise_for_status() sens_info_data = sensitive_info_response.json() - assert sens_info_data["data"]["updatePolicy"] == "urn:li:dataHubPolicy:view-dataset-sensitive" + assert ( + sens_info_data["data"]["updatePolicy"] + == "urn:li:dataHubPolicy:view-dataset-sensitive" + ) + def set_view_entity_profile_privileges_policy_status(status, session): - view_entity_page = { + view_entity_page = { "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n updatePolicy(urn: $urn, input: $input) }""", "variables": { @@ -83,12 +91,14 @@ def set_view_entity_profile_privileges_policy_status(status, session): "state": status, "name": "All Users - View Entity Page", "description": "Grants entity view to all users", - "privileges": ["VIEW_ENTITY_PAGE", - "SEARCH_PRIVILEGE", - "GET_COUNTS_PRIVILEGE", - "GET_TIMESERIES_ASPECT_PRIVILEGE", - "GET_ENTITY_PRIVILEGE", - "GET_TIMELINE_PRIVILEGE"], + "privileges": [ + "VIEW_ENTITY_PAGE", + "SEARCH_PRIVILEGE", + "GET_COUNTS_PRIVILEGE", + "GET_TIMESERIES_ASPECT_PRIVILEGE", + "GET_ENTITY_PRIVILEGE", + "GET_TIMELINE_PRIVILEGE", + ], "actors": { "users": [], "groups": None, @@ -101,10 +111,15 @@ def set_view_entity_profile_privileges_policy_status(status, session): }, } view_entity_response = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=view_entity_page) + f"{get_frontend_url()}/api/v2/graphql", json=view_entity_page + ) view_entity_response.raise_for_status() view_entity_data = view_entity_response.json() - assert view_entity_data["data"]["updatePolicy"] == "urn:li:dataHubPolicy:view-entity-page-all" + assert ( + view_entity_data["data"]["updatePolicy"] + == "urn:li:dataHubPolicy:view-entity-page-all" + ) + def create_user(session, email, password): # Remove user if exists @@ -136,9 +151,7 @@ def create_user(session, email, password): "title": "Data Engineer", "inviteToken": invite_token, } - sign_up_response = session.post( - f"{get_frontend_url()}/signUp", json=sign_up_json - ) + sign_up_response = session.post(f"{get_frontend_url()}/signUp", json=sign_up_json) sign_up_response.raise_for_status() assert sign_up_response assert "error" not in sign_up_response @@ -149,16 +162,6 @@ def create_user(session, email, password): return admin_session -def login_as(username, password): - session = requests.Session() - headers = { - "Content-Type": "application/json", - } - data = '{"username":"' + username + '", "password":"' + password + '"}' - response = session.post(f"{get_frontend_url()}/logIn", headers=headers, data=data) - response.raise_for_status() - return session - def remove_user(session, urn): json = { "query": """mutation removeUser($urn: String!) {\n @@ -170,6 +173,7 @@ def remove_user(session, urn): response.raise_for_status() return response.json() + def create_group(session, name): json = { "query": """mutation createGroup($input: CreateGroupInput!) {\n @@ -185,6 +189,7 @@ def create_group(session, name): assert res_data["data"]["createGroup"] return res_data["data"]["createGroup"] + def remove_group(session, urn): json = { "query": """mutation removeGroup($urn: String!) {\n @@ -200,6 +205,7 @@ def remove_group(session, urn): assert res_data["data"]["removeGroup"] return res_data["data"]["removeGroup"] + def assign_user_to_group(session, group_urn, user_urns): json = { "query": """mutation addGroupMembers($groupUrn: String!, $userUrns: [String!]!) {\n @@ -215,6 +221,7 @@ def assign_user_to_group(session, group_urn, user_urns): assert res_data["data"]["addGroupMembers"] return res_data["data"]["addGroupMembers"] + def assign_role(session, role_urn, actor_urns): json = { "query": """mutation batchAssignRole($input: BatchAssignRoleInput!) {\n @@ -231,6 +238,7 @@ def assign_role(session, role_urn, actor_urns): assert res_data["data"]["batchAssignRole"] return res_data["data"]["batchAssignRole"] + def create_user_policy(user_urn, privileges, session): policy = { "query": """mutation createPolicy($input: PolicyUpdateInput!) {\n @@ -241,7 +249,7 @@ def create_user_policy(user_urn, privileges, session): "name": "Policy Name", "description": "Policy Description", "state": "ACTIVE", - "resources": {"filter":{"criteria":[]}}, + "resources": {"filter": {"criteria": []}}, "privileges": privileges, "actors": { "users": [user_urn], @@ -262,6 +270,7 @@ def create_user_policy(user_urn, privileges, session): assert res_data["data"]["createPolicy"] return res_data["data"]["createPolicy"] + def remove_policy(urn, session): remove_policy_json = { "query": """mutation deletePolicy($urn: String!) {\n @@ -269,11 +278,13 @@ def remove_policy(urn, session): "variables": {"urn": urn}, } - response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_policy_json) + response = session.post( + f"{get_frontend_url()}/api/v2/graphql", json=remove_policy_json + ) response.raise_for_status() res_data = response.json() assert res_data assert res_data["data"] assert res_data["data"]["deletePolicy"] - assert res_data["data"]["deletePolicy"] == urn \ No newline at end of file + assert res_data["data"]["deletePolicy"] == urn diff --git a/smoke-test/tests/read_only/test_services_up.py b/smoke-test/tests/read_only/test_services_up.py index 792a5063d3f8b0..4e00f910ceb73b 100644 --- a/smoke-test/tests/read_only/test_services_up.py +++ b/smoke-test/tests/read_only/test_services_up.py @@ -1,8 +1,8 @@ import os +import re import pytest import requests -import re from tests.utils import get_gms_url, wait_for_healthcheck_util @@ -14,9 +14,11 @@ def test_services_up(): wait_for_healthcheck_util() + def looks_like_a_short_sha(sha: str) -> bool: return len(sha) == 7 and re.match(r"[0-9a-f]{7}", sha) is not None + @pytest.mark.read_only def test_gms_config_accessible() -> None: gms_config = requests.get(f"{get_gms_url()}/config").json() @@ -33,4 +35,6 @@ def test_gms_config_accessible() -> None: default_cli_version: str = gms_config["managedIngestion"]["defaultCliVersion"] print(f"Default CLI version: {default_cli_version}") assert not default_cli_version.startswith("@") - assert "." in default_cli_version or looks_like_a_short_sha(default_cli_version), "Default CLI version does not look like a version string" + assert "." in default_cli_version or looks_like_a_short_sha( + default_cli_version + ), "Default CLI version does not look like a version string" diff --git a/smoke-test/tests/setup/lineage/ingest_data_job_change.py b/smoke-test/tests/setup/lineage/ingest_data_job_change.py index 588a1625419bc2..2746baf89600e2 100644 --- a/smoke-test/tests/setup/lineage/ingest_data_job_change.py +++ b/smoke-test/tests/setup/lineage/ingest_data_job_change.py @@ -1,20 +1,31 @@ from typing import List -from datahub.emitter.mce_builder import (make_data_flow_urn, - make_data_job_urn_with_flow, - make_dataset_urn) +from datahub.emitter.mce_builder import ( + make_data_flow_urn, + make_data_job_urn_with_flow, + make_dataset_urn, +) from datahub.emitter.rest_emitter import DatahubRestEmitter -from datahub.metadata.schema_classes import (DateTypeClass, NumberTypeClass, - SchemaFieldDataTypeClass, - StringTypeClass) +from datahub.metadata.schema_classes import ( + DateTypeClass, + NumberTypeClass, + SchemaFieldDataTypeClass, + StringTypeClass, +) -from tests.setup.lineage.constants import (AIRFLOW_DATA_PLATFORM, - SNOWFLAKE_DATA_PLATFORM, - TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, - TIMESTAMP_MILLIS_ONE_DAY_AGO) +from tests.setup.lineage.constants import ( + AIRFLOW_DATA_PLATFORM, + SNOWFLAKE_DATA_PLATFORM, + TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, + TIMESTAMP_MILLIS_ONE_DAY_AGO, +) from tests.setup.lineage.helper_classes import Dataset, Field, Pipeline, Task -from tests.setup.lineage.utils import (create_edge, create_node, - create_nodes_and_edges, emit_mcps) +from tests.setup.lineage.utils import ( + create_edge, + create_node, + create_nodes_and_edges, + emit_mcps, +) # Constants for Case 2 DAILY_TEMPERATURE_DATASET_ID = "climate.daily_temperature" diff --git a/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py b/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py index bb9f51b6b5e9b7..4a8da1fcf0588c 100644 --- a/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py +++ b/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py @@ -2,17 +2,26 @@ from datahub.emitter.mce_builder import make_dataset_urn from datahub.emitter.rest_emitter import DatahubRestEmitter -from datahub.metadata.schema_classes import (NumberTypeClass, - SchemaFieldDataTypeClass, - StringTypeClass, UpstreamClass) +from datahub.metadata.schema_classes import ( + NumberTypeClass, + SchemaFieldDataTypeClass, + StringTypeClass, + UpstreamClass, +) -from tests.setup.lineage.constants import (DATASET_ENTITY_TYPE, - SNOWFLAKE_DATA_PLATFORM, - TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, - TIMESTAMP_MILLIS_ONE_DAY_AGO) +from tests.setup.lineage.constants import ( + DATASET_ENTITY_TYPE, + SNOWFLAKE_DATA_PLATFORM, + TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, + TIMESTAMP_MILLIS_ONE_DAY_AGO, +) from tests.setup.lineage.helper_classes import Dataset, Field -from tests.setup.lineage.utils import (create_node, create_upstream_edge, - create_upstream_mcp, emit_mcps) +from tests.setup.lineage.utils import ( + create_node, + create_upstream_edge, + create_upstream_mcp, + emit_mcps, +) # Constants for Case 3 GDP_DATASET_ID = "economic_data.gdp" diff --git a/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py b/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py index 6079d7a3d2b63b..143c65c0826566 100644 --- a/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py +++ b/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py @@ -1,20 +1,30 @@ from typing import List -from datahub.emitter.mce_builder import (make_data_flow_urn, - make_data_job_urn_with_flow, - make_dataset_urn) +from datahub.emitter.mce_builder import ( + make_data_flow_urn, + make_data_job_urn_with_flow, + make_dataset_urn, +) from datahub.emitter.rest_emitter import DatahubRestEmitter -from datahub.metadata.schema_classes import (NumberTypeClass, - SchemaFieldDataTypeClass, - StringTypeClass) +from datahub.metadata.schema_classes import ( + NumberTypeClass, + SchemaFieldDataTypeClass, + StringTypeClass, +) -from tests.setup.lineage.constants import (AIRFLOW_DATA_PLATFORM, - BQ_DATA_PLATFORM, - TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, - TIMESTAMP_MILLIS_ONE_DAY_AGO) +from tests.setup.lineage.constants import ( + AIRFLOW_DATA_PLATFORM, + BQ_DATA_PLATFORM, + TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, + TIMESTAMP_MILLIS_ONE_DAY_AGO, +) from tests.setup.lineage.helper_classes import Dataset, Field, Pipeline, Task -from tests.setup.lineage.utils import (create_edge, create_node, - create_nodes_and_edges, emit_mcps) +from tests.setup.lineage.utils import ( + create_edge, + create_node, + create_nodes_and_edges, + emit_mcps, +) # Constants for Case 1 TRANSACTIONS_DATASET_ID = "transactions.transactions" diff --git a/smoke-test/tests/setup/lineage/ingest_time_lineage.py b/smoke-test/tests/setup/lineage/ingest_time_lineage.py index 3aec979707290d..116e6cd63dd9f5 100644 --- a/smoke-test/tests/setup/lineage/ingest_time_lineage.py +++ b/smoke-test/tests/setup/lineage/ingest_time_lineage.py @@ -4,11 +4,17 @@ from datahub.emitter.rest_emitter import DatahubRestEmitter from tests.setup.lineage.ingest_data_job_change import ( - get_data_job_change_urns, ingest_data_job_change) + get_data_job_change_urns, + ingest_data_job_change, +) from tests.setup.lineage.ingest_dataset_join_change import ( - get_dataset_join_change_urns, ingest_dataset_join_change) + get_dataset_join_change_urns, + ingest_dataset_join_change, +) from tests.setup.lineage.ingest_input_datasets_change import ( - get_input_datasets_change_urns, ingest_input_datasets_change) + get_input_datasets_change_urns, + ingest_input_datasets_change, +) SERVER = os.getenv("DATAHUB_SERVER") or "http://localhost:8080" TOKEN = os.getenv("DATAHUB_TOKEN") or "" diff --git a/smoke-test/tests/setup/lineage/utils.py b/smoke-test/tests/setup/lineage/utils.py index c72f6ccb89b7ad..d4c16ed3b7a211 100644 --- a/smoke-test/tests/setup/lineage/utils.py +++ b/smoke-test/tests/setup/lineage/utils.py @@ -1,29 +1,38 @@ -import datetime from typing import List -from datahub.emitter.mce_builder import (make_data_flow_urn, - make_data_job_urn_with_flow, - make_data_platform_urn, - make_dataset_urn) +from datahub.emitter.mce_builder import ( + make_data_flow_urn, + make_data_job_urn_with_flow, + make_data_platform_urn, + make_dataset_urn, +) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.rest_emitter import DatahubRestEmitter from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage -from datahub.metadata.schema_classes import (AuditStampClass, ChangeTypeClass, - DataFlowInfoClass, - DataJobInfoClass, - DataJobInputOutputClass, - DatasetLineageTypeClass, - DatasetPropertiesClass, EdgeClass, - MySqlDDLClass, SchemaFieldClass, - SchemaMetadataClass, - UpstreamClass) - -from tests.setup.lineage.constants import (DATA_FLOW_ENTITY_TYPE, - DATA_FLOW_INFO_ASPECT_NAME, - DATA_JOB_ENTITY_TYPE, - DATA_JOB_INFO_ASPECT_NAME, - DATA_JOB_INPUT_OUTPUT_ASPECT_NAME, - DATASET_ENTITY_TYPE) +from datahub.metadata.com.linkedin.pegasus2avro.mxe import SystemMetadata +from datahub.metadata.schema_classes import ( + AuditStampClass, + ChangeTypeClass, + DataFlowInfoClass, + DataJobInfoClass, + DataJobInputOutputClass, + DatasetLineageTypeClass, + DatasetPropertiesClass, + EdgeClass, + MySqlDDLClass, + SchemaFieldClass, + SchemaMetadataClass, + UpstreamClass, +) + +from tests.setup.lineage.constants import ( + DATA_FLOW_ENTITY_TYPE, + DATA_FLOW_INFO_ASPECT_NAME, + DATA_JOB_ENTITY_TYPE, + DATA_JOB_INFO_ASPECT_NAME, + DATA_JOB_INPUT_OUTPUT_ASPECT_NAME, + DATASET_ENTITY_TYPE, +) from tests.setup.lineage.helper_classes import Dataset, Pipeline @@ -176,17 +185,16 @@ def create_upstream_mcp( run_id: str = "", ) -> MetadataChangeProposalWrapper: print(f"Creating upstreamLineage aspect for {entity_urn}") - timestamp_millis: int = int(datetime.datetime.now().timestamp() * 1000) mcp = MetadataChangeProposalWrapper( entityType=entity_type, entityUrn=entity_urn, changeType=ChangeTypeClass.UPSERT, aspectName="upstreamLineage", aspect=UpstreamLineage(upstreams=upstreams), - systemMetadata={ - "lastObserved": timestamp_millis, - "runId": run_id, - }, + systemMetadata=SystemMetadata( + lastObserved=timestamp_millis, + runId=run_id, + ), ) return mcp diff --git a/smoke-test/tests/structured_properties/test_structured_properties.py b/smoke-test/tests/structured_properties/test_structured_properties.py index 83994776076b05..de85d2af95e034 100644 --- a/smoke-test/tests/structured_properties/test_structured_properties.py +++ b/smoke-test/tests/structured_properties/test_structured_properties.py @@ -1,31 +1,39 @@ import logging import os -from datahub.ingestion.graph.filters import SearchFilterRule -from tests.consistency_utils import wait_for_writes_to_sync import tempfile from random import randint -from tests.utilities.file_emitter import FileEmitter from typing import Iterable, List, Optional, Union import pytest + # import tenacity from datahub.api.entities.dataset.dataset import Dataset -from datahub.api.entities.structuredproperties.structuredproperties import \ - StructuredProperties +from datahub.api.entities.structuredproperties.structuredproperties import ( + StructuredProperties, +) from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.metadata.schema_classes import ( - EntityTypeInfoClass, PropertyValueClass, StructuredPropertiesClass, - StructuredPropertyDefinitionClass, StructuredPropertyValueAssignmentClass) + EntityTypeInfoClass, + PropertyValueClass, + StructuredPropertiesClass, + StructuredPropertyDefinitionClass, + StructuredPropertyValueAssignmentClass, +) from datahub.specific.dataset import DatasetPatchBuilder -from datahub.utilities.urns.structured_properties_urn import \ - StructuredPropertyUrn +from datahub.utilities.urns.structured_properties_urn import StructuredPropertyUrn from datahub.utilities.urns.urn import Urn -from tests.utils import (delete_urns, delete_urns_from_file, get_gms_url, - get_sleep_info, ingest_file_via_rest, - wait_for_writes_to_sync) +from tests.consistency_utils import wait_for_writes_to_sync +from tests.utilities.file_emitter import FileEmitter +from tests.utils import ( + delete_urns, + delete_urns_from_file, + get_gms_url, + get_sleep_info, + ingest_file_via_rest, +) logger = logging.getLogger(__name__) @@ -36,8 +44,7 @@ ] schema_field_urns = [ - make_schema_field_urn(dataset_urn, "column_1") - for dataset_urn in dataset_urns + make_schema_field_urn(dataset_urn, "column_1") for dataset_urn in dataset_urns ] generated_urns = [d for d in dataset_urns] + [f for f in schema_field_urns] @@ -45,6 +52,7 @@ default_namespace = "io.acryl.privacy" + def create_logical_entity( entity_name: str, ) -> Iterable[MetadataChangeProposalWrapper]: @@ -66,14 +74,13 @@ def create_test_data(filename: str): file_emitter.close() wait_for_writes_to_sync() + sleep_sec, sleep_times = get_sleep_info() @pytest.fixture(scope="module", autouse=False) def graph() -> DataHubGraph: - graph: DataHubGraph = DataHubGraph( - config=DatahubClientConfig(server=get_gms_url()) - ) + graph: DataHubGraph = DataHubGraph(config=DatahubClientConfig(server=get_gms_url())) return graph @@ -132,7 +139,7 @@ def attach_property_to_entity( property_name: str, property_value: Union[str, float, List[str | float]], graph: DataHubGraph, - namespace: str = default_namespace + namespace: str = default_namespace, ): if isinstance(property_value, list): property_values: List[Union[str, float]] = property_value @@ -159,15 +166,12 @@ def get_property_from_entity( property_name: str, graph: DataHubGraph, ): - structured_properties: Optional[ - StructuredPropertiesClass - ] = graph.get_aspect(urn, StructuredPropertiesClass) + structured_properties: Optional[StructuredPropertiesClass] = graph.get_aspect( + urn, StructuredPropertiesClass + ) assert structured_properties is not None for property in structured_properties.properties: - if ( - property.propertyUrn - == f"urn:li:structuredProperty:{property_name}" - ): + if property.propertyUrn == f"urn:li:structuredProperty:{property_name}": return property.values return None @@ -181,16 +185,14 @@ def test_structured_property_string(ingest_cleanup_data, graph): property_name = "retentionPolicy" create_property_definition(property_name, graph) - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.retentionPolicy") - - attach_property_to_entity( - dataset_urns[0], property_name, ["30d"], graph=graph + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.retentionPolicy" ) + attach_property_to_entity(dataset_urns[0], property_name, ["30d"], graph=graph) + try: - attach_property_to_entity( - dataset_urns[0], property_name, 200030, graph=graph - ) + attach_property_to_entity(dataset_urns[0], property_name, 200030, graph=graph) raise AssertionError( "Should not be able to attach a number to a string property" ) @@ -208,12 +210,12 @@ def test_structured_property_string(ingest_cleanup_data, graph): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_structured_property_double(ingest_cleanup_data, graph): property_name = "expiryTime" - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.{property_name}") + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.{property_name}" + ) create_property_definition(property_name, graph, value_type="number") - attach_property_to_entity( - dataset_urns[0], property_name, 2000034, graph=graph - ) + attach_property_to_entity(dataset_urns[0], property_name, 2000034, graph=graph) try: attach_property_to_entity( @@ -232,9 +234,7 @@ def test_structured_property_double(ingest_cleanup_data, graph): attach_property_to_entity( dataset_urns[0], property_name, [2000034, 2000035], graph=graph ) - raise AssertionError( - "Should not be able to attach a list to a number property" - ) + raise AssertionError("Should not be able to attach a list to a number property") except Exception as e: if not isinstance(e, AssertionError): pass @@ -249,15 +249,15 @@ def test_structured_property_double(ingest_cleanup_data, graph): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_structured_property_double_multiple(ingest_cleanup_data, graph): property_name = "versions" - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.{property_name}") + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.{property_name}" + ) create_property_definition( property_name, graph, value_type="number", cardinality="MULTIPLE" ) - attach_property_to_entity( - dataset_urns[0], property_name, [1.0, 2.0], graph=graph - ) + attach_property_to_entity(dataset_urns[0], property_name, [1.0, 2.0], graph=graph) # @tenacity.retry( @@ -265,11 +265,11 @@ def test_structured_property_double_multiple(ingest_cleanup_data, graph): # wait=tenacity.wait_fixed(sleep_sec), # ) @pytest.mark.dependency(depends=["test_healthchecks"]) -def test_structured_property_string_allowed_values( - ingest_cleanup_data, graph -): +def test_structured_property_string_allowed_values(ingest_cleanup_data, graph): property_name = "enumProperty" - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.{property_name}") + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.{property_name}" + ) create_property_definition( property_name, @@ -301,9 +301,7 @@ def test_structured_property_string_allowed_values( @pytest.mark.dependency(depends=["test_healthchecks"]) -def test_structured_property_definition_evolution( - ingest_cleanup_data, graph -): +def test_structured_property_definition_evolution(ingest_cleanup_data, graph): property_name = "enumProperty1234" create_property_definition( @@ -316,7 +314,9 @@ def test_structured_property_definition_evolution( PropertyValueClass(value="bar"), ], ) - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.{property_name}") + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.{property_name}" + ) try: create_property_definition( @@ -345,9 +345,7 @@ def test_structured_property_definition_evolution( # ) @pytest.mark.dependency(depends=["test_healthchecks"]) def test_structured_property_schema_field(ingest_cleanup_data, graph): - property_name = ( - f"deprecationDate{randint(10, 10000)}" - ) + property_name = f"deprecationDate{randint(10, 10000)}" create_property_definition( property_name, @@ -356,26 +354,31 @@ def test_structured_property_schema_field(ingest_cleanup_data, graph): value_type="date", entity_types=["schemaField"], ) - generated_urns.append(f"urn:li:structuredProperty:io.datahubproject.test.{property_name}") + generated_urns.append( + f"urn:li:structuredProperty:io.datahubproject.test.{property_name}" + ) attach_property_to_entity( - schema_field_urns[0], property_name, "2020-10-01", graph=graph, namespace="io.datahubproject.test" + schema_field_urns[0], + property_name, + "2020-10-01", + graph=graph, + namespace="io.datahubproject.test", ) - assert ( - get_property_from_entity( - schema_field_urns[0], f"io.datahubproject.test.{property_name}", graph=graph - ) - == ["2020-10-01"] - ) + assert get_property_from_entity( + schema_field_urns[0], f"io.datahubproject.test.{property_name}", graph=graph + ) == ["2020-10-01"] try: attach_property_to_entity( - schema_field_urns[0], property_name, 200030, graph=graph, namespace="io.datahubproject.test" - ) - raise AssertionError( - "Should not be able to attach a number to a DATE property" + schema_field_urns[0], + property_name, + 200030, + graph=graph, + namespace="io.datahubproject.test", ) + raise AssertionError("Should not be able to attach a number to a DATE property") except Exception as e: if not isinstance(e, AssertionError): pass @@ -388,49 +391,38 @@ def test_dataset_yaml_loader(ingest_cleanup_data, graph): "tests/structured_properties/test_structured_properties.yaml" ) - for dataset in Dataset.from_yaml( - "tests/structured_properties/test_dataset.yaml" - ): + for dataset in Dataset.from_yaml("tests/structured_properties/test_dataset.yaml"): for mcp in dataset.generate_mcp(): graph.emit(mcp) wait_for_writes_to_sync() property_name = "io.acryl.dataManagement.deprecationDate" - assert ( - get_property_from_entity( - make_schema_field_urn( - make_dataset_urn("hive", "user.clicks"), "ip" - ), - property_name, - graph=graph, - ) - == ["2023-01-01"] - ) + assert get_property_from_entity( + make_schema_field_urn(make_dataset_urn("hive", "user.clicks"), "ip"), + property_name, + graph=graph, + ) == ["2023-01-01"] dataset = Dataset.from_datahub( graph=graph, urn="urn:li:dataset:(urn:li:dataPlatform:hive,user.clicks,PROD)", ) field_name = "ip" + assert dataset.schema_metadata is not None + assert dataset.schema_metadata.fields is not None matching_fields = [ f for f in dataset.schema_metadata.fields - if Dataset._simplify_field_path(f.id) == field_name + if f.id is not None and Dataset._simplify_field_path(f.id) == field_name ] assert len(matching_fields) == 1 - assert ( - matching_fields[0].structured_properties[ - Urn.make_structured_property_urn( - "io.acryl.dataManagement.deprecationDate" - ) - ] - == ["2023-01-01"] - ) + assert matching_fields[0].structured_properties is not None + assert matching_fields[0].structured_properties[ + Urn.make_structured_property_urn("io.acryl.dataManagement.deprecationDate") + ] == ["2023-01-01"] -def test_dataset_structured_property_validation( - ingest_cleanup_data, graph, caplog -): +def test_dataset_structured_property_validation(ingest_cleanup_data, graph, caplog): from datahub.api.entities.dataset.dataset import Dataset property_name = "replicationSLA" @@ -440,7 +432,9 @@ def test_dataset_structured_property_validation( create_property_definition( property_name=property_name, graph=graph, value_type=value_type ) - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.replicationSLA") + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.replicationSLA" + ) attach_property_to_entity( dataset_urns[0], property_name, [property_value], graph=graph @@ -453,21 +447,15 @@ def test_dataset_structured_property_validation( float(property_value), ) - assert ( - Dataset.validate_structured_property("testName", "testValue") is None - ) + assert Dataset.validate_structured_property("testName", "testValue") is None bad_property_value = "2023-09-20" assert ( - Dataset.validate_structured_property( - property_name, bad_property_value - ) - is None + Dataset.validate_structured_property(property_name, bad_property_value) is None ) - -def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, caplog): +def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, caplog): def to_es_name(property_name, namespace=default_namespace): namespace_field = namespace.replace(".", "_") return f"structuredProperties.{namespace_field}_{property_name}" @@ -478,88 +466,116 @@ def to_es_name(property_name, namespace=default_namespace): create_property_definition( namespace="io.datahubproject.test", property_name=field_property_name, - graph=graph, value_type="date", entity_types=["schemaField"] + graph=graph, + value_type="date", + entity_types=["schemaField"], + ) + generated_urns.append( + f"urn:li:structuredProperty:io.datahubproject.test.{field_property_name}" ) - generated_urns.append(f"urn:li:structuredProperty:io.datahubproject.test.{field_property_name}") attach_property_to_entity( - schema_field_urns[0], field_property_name, "2020-10-01", graph=graph, namespace="io.datahubproject.test" + schema_field_urns[0], + field_property_name, + "2020-10-01", + graph=graph, + namespace="io.datahubproject.test", ) dataset_property_name = "replicationSLA" property_value = 30 value_type = "number" - create_property_definition(property_name=dataset_property_name, graph=graph, value_type=value_type) - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.{dataset_property_name}") + create_property_definition( + property_name=dataset_property_name, graph=graph, value_type=value_type + ) + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.{dataset_property_name}" + ) - attach_property_to_entity(dataset_urns[0], dataset_property_name, [property_value], graph=graph) + attach_property_to_entity( + dataset_urns[0], dataset_property_name, [property_value], graph=graph + ) # [] = default entities which includes datasets, does not include fields - entity_urns = list(graph.get_urns_by_filter(extraFilters=[ - { - "field": to_es_name(dataset_property_name), - "negated": "false", - "condition": "EXISTS", - } - ])) + entity_urns = list( + graph.get_urns_by_filter( + extraFilters=[ + { + "field": to_es_name(dataset_property_name), + "negated": "false", + "condition": "EXISTS", + } + ] + ) + ) assert len(entity_urns) == 1 assert entity_urns[0] == dataset_urns[0] # Search over schema field specifically - field_structured_prop = graph.get_aspect(entity_urn=schema_field_urns[0], aspect_type=StructuredPropertiesClass) + field_structured_prop = graph.get_aspect( + entity_urn=schema_field_urns[0], aspect_type=StructuredPropertiesClass + ) assert field_structured_prop == StructuredPropertiesClass( properties=[ StructuredPropertyValueAssignmentClass( propertyUrn=f"urn:li:structuredProperty:io.datahubproject.test.{field_property_name}", - values=["2020-10-01"] + values=["2020-10-01"], ) ] ) # Search over entities that do not include the field - field_urns = list(graph.get_urns_by_filter(entity_types=["tag"], - extraFilters=[ - { - "field": to_es_name(field_property_name, - namespace="io.datahubproject.test"), - "negated": "false", - "condition": "EXISTS", - } - ])) + field_urns = list( + graph.get_urns_by_filter( + entity_types=["tag"], + extraFilters=[ + { + "field": to_es_name( + field_property_name, namespace="io.datahubproject.test" + ), + "negated": "false", + "condition": "EXISTS", + } + ], + ) + ) assert len(field_urns) == 0 # OR the two properties together to return both results - field_urns = list(graph.get_urns_by_filter(entity_types=["dataset", "tag"], - extraFilters=[ - { - "field": to_es_name(dataset_property_name), - "negated": "false", - "condition": "EXISTS", - } - ])) + field_urns = list( + graph.get_urns_by_filter( + entity_types=["dataset", "tag"], + extraFilters=[ + { + "field": to_es_name(dataset_property_name), + "negated": "false", + "condition": "EXISTS", + } + ], + ) + ) assert len(field_urns) == 1 assert dataset_urns[0] in field_urns -def test_dataset_structured_property_patch( - ingest_cleanup_data, graph, caplog -): +@pytest.mark.skip(reason="Functionality and test needs to be validated for correctness") +def test_dataset_structured_property_patch(ingest_cleanup_data, graph, caplog): property_name = "replicationSLA" property_value = 30 value_type = "number" create_property_definition( - property_name=property_name, - graph=graph, - value_type=value_type + property_name=property_name, graph=graph, value_type=value_type ) - dataset_patcher: DatasetPatchBuilder = DatasetPatchBuilder( - urn=dataset_urns[0] - ) + dataset_patcher: DatasetPatchBuilder = DatasetPatchBuilder(urn=dataset_urns[0]) - dataset_patcher.set_structured_property(StructuredPropertyUrn.make_structured_property_urn( - f"{default_namespace}.{property_name}"), property_value) + dataset_patcher.set_structured_property( + StructuredPropertyUrn.make_structured_property_urn( + f"{default_namespace}.{property_name}" + ), + property_value, + ) for mcp in dataset_patcher.build(): graph.emit(mcp) @@ -567,11 +583,12 @@ def test_dataset_structured_property_patch( dataset = Dataset.from_datahub(graph=graph, urn=dataset_urns[0]) assert dataset.structured_properties is not None - assert ( - [int(float(k)) for k in dataset.structured_properties[ + assert isinstance(dataset.structured_properties, list) + assert [ + int(float(k)) + for k in dataset.structured_properties[ StructuredPropertyUrn.make_structured_property_urn( f"{default_namespace}.{property_name}" ) - ]] - == [property_value] - ) + ] + ] == [property_value] diff --git a/smoke-test/tests/tags_and_terms/__init__.py b/smoke-test/tests/tags_and_terms/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/tags-and-terms/data.json b/smoke-test/tests/tags_and_terms/data.json similarity index 100% rename from smoke-test/tests/tags-and-terms/data.json rename to smoke-test/tests/tags_and_terms/data.json diff --git a/smoke-test/tests/tags-and-terms/tags_and_terms_test.py b/smoke-test/tests/tags_and_terms/tags_and_terms_test.py similarity index 98% rename from smoke-test/tests/tags-and-terms/tags_and_terms_test.py rename to smoke-test/tests/tags_and_terms/tags_and_terms_test.py index 6ac75765286f00..34404a1ddff594 100644 --- a/smoke-test/tests/tags-and-terms/tags_and_terms_test.py +++ b/smoke-test/tests/tags_and_terms/tags_and_terms_test.py @@ -1,16 +1,20 @@ import pytest -from tests.utils import (delete_urns_from_file, get_frontend_url, - ingest_file_via_rest, wait_for_healthcheck_util) +from tests.utils import ( + delete_urns_from_file, + get_frontend_url, + ingest_file_via_rest, + wait_for_healthcheck_util, +) @pytest.fixture(scope="module", autouse=True) def ingest_cleanup_data(request): print("ingesting test data") - ingest_file_via_rest("tests/tags-and-terms/data.json") + ingest_file_via_rest("tests/tags_and_terms/data.json") yield print("removing test data") - delete_urns_from_file("tests/tags-and-terms/data.json") + delete_urns_from_file("tests/tags_and_terms/data.json") @pytest.fixture(scope="session") diff --git a/smoke-test/tests/telemetry/telemetry_test.py b/smoke-test/tests/telemetry/telemetry_test.py index b7cd6fa0517df7..963d85baef3bb7 100644 --- a/smoke-test/tests/telemetry/telemetry_test.py +++ b/smoke-test/tests/telemetry/telemetry_test.py @@ -5,7 +5,9 @@ def test_no_client_id(): client_id_urn = "urn:li:telemetry:clientId" - aspect = ["clientId"] # this is checking for the removal of the invalid aspect RemoveClientIdAspectStep.java + aspect = [ + "clientId" + ] # this is checking for the removal of the invalid aspect RemoveClientIdAspectStep.java res_data = json.dumps( get_aspects_for_entity(entity_urn=client_id_urn, aspects=aspect, typed=False) diff --git a/smoke-test/tests/test_stateful_ingestion.py b/smoke-test/tests/test_stateful_ingestion.py index c6adb402e5d510..5eac25059ec62c 100644 --- a/smoke-test/tests/test_stateful_ingestion.py +++ b/smoke-test/tests/test_stateful_ingestion.py @@ -4,15 +4,19 @@ from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource from datahub.ingestion.source.state.checkpoint import Checkpoint -from datahub.ingestion.source.state.entity_removal_state import \ - GenericCheckpointState -from datahub.ingestion.source.state.stale_entity_removal_handler import \ - StaleEntityRemovalHandler +from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState +from datahub.ingestion.source.state.stale_entity_removal_handler import ( + StaleEntityRemovalHandler, +) from sqlalchemy import create_engine from sqlalchemy.sql import text -from tests.utils import (get_gms_url, get_mysql_password, get_mysql_url, - get_mysql_username) +from tests.utils import ( + get_gms_url, + get_mysql_password, + get_mysql_url, + get_mysql_username, +) def test_stateful_ingestion(wait_for_healthchecks): diff --git a/smoke-test/tests/tests/tests_test.py b/smoke-test/tests/tests/tests_test.py index 213a2ea087b7a1..28005c8397d0d1 100644 --- a/smoke-test/tests/tests/tests_test.py +++ b/smoke-test/tests/tests/tests_test.py @@ -1,9 +1,13 @@ import pytest import tenacity -from tests.utils import (delete_urns_from_file, get_frontend_url, - get_sleep_info, ingest_file_via_rest, - wait_for_healthcheck_util) +from tests.utils import ( + delete_urns_from_file, + get_frontend_url, + get_sleep_info, + ingest_file_via_rest, + wait_for_healthcheck_util, +) sleep_sec, sleep_times = get_sleep_info() @@ -37,7 +41,6 @@ def test_healthchecks(wait_for_healthchecks): def create_test(frontend_session): - # Create new Test create_test_json = { "query": """mutation createTest($input: CreateTestInput!) {\n @@ -84,7 +87,6 @@ def delete_test(frontend_session, test_urn): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_test(frontend_session, wait_for_healthchecks): - test_urn = create_test(frontend_session) # Get the test diff --git a/smoke-test/tests/timeline/timeline_test.py b/smoke-test/tests/timeline/timeline_test.py index c075d981487dbd..f8a0e425c37816 100644 --- a/smoke-test/tests/timeline/timeline_test.py +++ b/smoke-test/tests/timeline/timeline_test.py @@ -1,15 +1,14 @@ import json -import pytest -from time import sleep +import pytest from datahub.cli import timeline_cli from datahub.cli.cli_utils import guess_entity_type, post_entity -from tests.utils import (get_datahub_graph, ingest_file_via_rest, - wait_for_writes_to_sync) +from tests.utils import get_datahub_graph, ingest_file_via_rest, wait_for_writes_to_sync pytestmark = pytest.mark.no_cypress_suite1 + def test_all(): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-timeline-sample-kafka" @@ -184,7 +183,7 @@ def put(urn: str, aspect: str, aspect_data: str) -> None: entity_type = guess_entity_type(urn) with open(aspect_data) as fp: aspect_obj = json.load(fp) - status = post_entity( + post_entity( urn=urn, aspect_name=aspect, entity_type=entity_type, diff --git a/smoke-test/tests/tokens/revokable_access_token_test.py b/smoke-test/tests/tokens/revokable_access_token_test.py index 55f3de594af4e2..10332b32b9cafc 100644 --- a/smoke-test/tests/tokens/revokable_access_token_test.py +++ b/smoke-test/tests/tokens/revokable_access_token_test.py @@ -1,11 +1,14 @@ import os -from time import sleep import pytest -import requests -from tests.utils import (get_admin_credentials, get_frontend_url, - wait_for_healthcheck_util, wait_for_writes_to_sync) +from tests.utils import ( + get_admin_credentials, + get_frontend_url, + login_as, + wait_for_healthcheck_util, + wait_for_writes_to_sync, +) # Disable telemetry os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false" @@ -29,7 +32,7 @@ def test_healthchecks(wait_for_healthchecks): @pytest.fixture(scope="class", autouse=True) def custom_user_setup(): """Fixture to execute setup before and tear down after all tests are run""" - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) res_data = removeUser(admin_session, "urn:li:corpuser:user") assert res_data @@ -77,7 +80,7 @@ def custom_user_setup(): # signUp will override the session cookie to the new user to be signed up. admin_session.cookies.clear() - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) # Make user created user is there. res_data = listUsers(admin_session) @@ -91,7 +94,7 @@ def custom_user_setup(): res_data = removeUser(admin_session, "urn:li:corpuser:user") assert res_data assert res_data["data"] - assert res_data["data"]["removeUser"] == True + assert res_data["data"]["removeUser"] is True # Sleep for eventual consistency wait_for_writes_to_sync() @@ -106,7 +109,7 @@ def custom_user_setup(): @pytest.fixture(autouse=True) def access_token_setup(): """Fixture to execute asserts before and after a test is run""" - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) res_data = listAccessTokens(admin_session) assert res_data @@ -127,7 +130,7 @@ def access_token_setup(): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_admin_can_create_list_and_revoke_tokens(wait_for_healthchecks): - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) # Using a super account, there should be no tokens res_data = listAccessTokens(admin_session) @@ -170,7 +173,7 @@ def test_admin_can_create_list_and_revoke_tokens(wait_for_healthchecks): assert res_data assert res_data["data"] assert res_data["data"]["revokeAccessToken"] - assert res_data["data"]["revokeAccessToken"] == True + assert res_data["data"]["revokeAccessToken"] is True # Sleep for eventual consistency wait_for_writes_to_sync() @@ -184,7 +187,7 @@ def test_admin_can_create_list_and_revoke_tokens(wait_for_healthchecks): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_admin_can_create_and_revoke_tokens_for_other_user(wait_for_healthchecks): - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) # Using a super account, there should be no tokens res_data = listAccessTokens(admin_session) @@ -227,7 +230,7 @@ def test_admin_can_create_and_revoke_tokens_for_other_user(wait_for_healthchecks assert res_data assert res_data["data"] assert res_data["data"]["revokeAccessToken"] - assert res_data["data"]["revokeAccessToken"] == True + assert res_data["data"]["revokeAccessToken"] is True # Sleep for eventual consistency wait_for_writes_to_sync() @@ -241,7 +244,7 @@ def test_admin_can_create_and_revoke_tokens_for_other_user(wait_for_healthchecks @pytest.mark.dependency(depends=["test_healthchecks"]) def test_non_admin_can_create_list_revoke_tokens(wait_for_healthchecks): - user_session = loginAs("user", "user") + user_session = login_as("user", "user") # Normal user should be able to generate token for himself. res_data = generateAccessToken_v2(user_session, "urn:li:corpuser:user") @@ -280,7 +283,7 @@ def test_non_admin_can_create_list_revoke_tokens(wait_for_healthchecks): assert res_data assert res_data["data"] assert res_data["data"]["revokeAccessToken"] - assert res_data["data"]["revokeAccessToken"] == True + assert res_data["data"]["revokeAccessToken"] is True # Sleep for eventual consistency wait_for_writes_to_sync() @@ -296,7 +299,7 @@ def test_non_admin_can_create_list_revoke_tokens(wait_for_healthchecks): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) # Using a super account, there should be no tokens res_data = listAccessTokens(admin_session) @@ -306,7 +309,7 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): assert len(res_data["data"]["listAccessTokens"]["tokens"]) == 0 admin_session.cookies.clear() - user_session = loginAs("user", "user") + user_session = login_as("user", "user") res_data = generateAccessToken_v2(user_session, "urn:li:corpuser:user") assert res_data assert res_data["data"] @@ -326,7 +329,7 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): # Admin should be able to list other tokens user_session.cookies.clear() - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) res_data = listAccessTokens( admin_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] ) @@ -346,18 +349,18 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): # Admin can delete token created by someone else. admin_session.cookies.clear() - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) res_data = revokeAccessToken(admin_session, user_tokenId) assert res_data assert res_data["data"] assert res_data["data"]["revokeAccessToken"] - assert res_data["data"]["revokeAccessToken"] == True + assert res_data["data"]["revokeAccessToken"] is True # Sleep for eventual consistency wait_for_writes_to_sync() # Using a normal account, check that all its tokens where removed. user_session.cookies.clear() - user_session = loginAs("user", "user") + user_session = login_as("user", "user") res_data = listAccessTokens( user_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] ) @@ -367,7 +370,7 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): assert len(res_data["data"]["listAccessTokens"]["tokens"]) == 0 # Using the super account, check that all tokens where removed. - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) res_data = listAccessTokens( admin_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] ) @@ -379,7 +382,7 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_non_admin_can_not_generate_tokens_for_others(wait_for_healthchecks): - user_session = loginAs("user", "user") + user_session = login_as("user", "user") # Normal user should not be able to generate token for another user res_data = generateAccessToken_v2(user_session, f"urn:li:corpuser:{admin_user}") assert res_data @@ -467,19 +470,6 @@ def revokeAccessToken(session, tokenId): return response.json() -def loginAs(username, password): - session = requests.Session() - - headers = { - "Content-Type": "application/json", - } - data = '{"username":"' + username + '", "password":"' + password + '"}' - response = session.post(f"{get_frontend_url()}/logIn", headers=headers, data=data) - response.raise_for_status() - - return session - - def removeUser(session, urn): # Remove user json = { diff --git a/smoke-test/tests/utilities/file_emitter.py b/smoke-test/tests/utilities/file_emitter.py index 27a91c360af8a8..ddbcff8db31d8b 100644 --- a/smoke-test/tests/utilities/file_emitter.py +++ b/smoke-test/tests/utilities/file_emitter.py @@ -1,11 +1,14 @@ -from datahub.ingestion.sink.file import FileSink, FileSinkConfig +import time + from datahub.ingestion.api.common import PipelineContext, RecordEnvelope from datahub.ingestion.api.sink import NoopWriteCallback -import time +from datahub.ingestion.sink.file import FileSink, FileSinkConfig class FileEmitter: - def __init__(self, filename: str, run_id: str = f"test_{int(time.time()*1000.0)}") -> None: + def __init__( + self, filename: str, run_id: str = f"test_{int(time.time()*1000.0)}" + ) -> None: self.sink: FileSink = FileSink( ctx=PipelineContext(run_id=run_id), config=FileSinkConfig(filename=filename), @@ -18,4 +21,4 @@ def emit(self, event): ) def close(self): - self.sink.close() \ No newline at end of file + self.sink.close() diff --git a/smoke-test/tests/utils.py b/smoke-test/tests/utils.py index bd75b13d1910f6..29b956bde9ab89 100644 --- a/smoke-test/tests/utils.py +++ b/smoke-test/tests/utils.py @@ -2,14 +2,10 @@ import json import logging import os -import subprocess -import time from datetime import datetime, timedelta, timezone -from time import sleep from typing import Any, Dict, List, Tuple -from datahub.cli import cli_utils -from datahub.cli.cli_utils import get_system_auth +from datahub.cli import cli_utils, env_utils from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.ingestion.run.pipeline import Pipeline from joblib import Parallel, delayed @@ -22,23 +18,14 @@ def get_frontend_session(): - session = requests.Session() + username, password = get_admin_credentials() + return login_as(username, password) - headers = { - "Content-Type": "application/json", - } - system_auth = get_system_auth() - if system_auth is not None: - session.headers.update({"Authorization": system_auth}) - else: - username, password = get_admin_credentials() - data = '{"username":"' + username + '", "password":"' + password + '"}' - response = session.post( - f"{get_frontend_url()}/logIn", headers=headers, data=data - ) - response.raise_for_status() - return session +def login_as(username: str, password: str): + return cli_utils.get_session_login_as( + username=username, password=password, frontend_url=get_frontend_url() + ) def get_admin_username() -> str: @@ -146,7 +133,7 @@ def delete_urns(urns: List[str]) -> None: def delete_urns_from_file(filename: str, shared_data: bool = False) -> None: - if not cli_utils.get_boolean_env_variable("CLEANUP_DATA", True): + if not env_utils.get_boolean_env_variable("CLEANUP_DATA", True): print("Not cleaning data to save time") return session = requests.Session() @@ -223,7 +210,7 @@ def create_datahub_step_state_aspect( def create_datahub_step_state_aspects( - username: str, onboarding_ids: str, onboarding_filename + username: str, onboarding_ids: List[str], onboarding_filename: str ) -> None: """ For a specific user, creates dataHubStepState aspects for each onboarding id in the list diff --git a/smoke-test/tests/views/views_test.py b/smoke-test/tests/views/views_test.py index 685c3bd80b04d8..a99f1f0dbb2451 100644 --- a/smoke-test/tests/views/views_test.py +++ b/smoke-test/tests/views/views_test.py @@ -1,10 +1,7 @@ -import time - import pytest import tenacity -from tests.utils import (delete_urns_from_file, get_frontend_url, get_gms_url, - get_sleep_info, ingest_file_via_rest) +from tests.utils import get_frontend_url, get_sleep_info sleep_sec, sleep_times = get_sleep_info() @@ -19,7 +16,6 @@ def test_healthchecks(wait_for_healthchecks): stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_more_views(frontend_session, list_views_json, query_name, before_count): - # Get new count of Views response = frontend_session.post( f"{get_frontend_url()}/api/v2/graphql", json=list_views_json @@ -43,7 +39,6 @@ def _ensure_more_views(frontend_session, list_views_json, query_name, before_cou stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_less_views(frontend_session, list_views_json, query_name, before_count): - # Get new count of Views response = frontend_session.post( f"{get_frontend_url()}/api/v2/graphql", json=list_views_json @@ -64,7 +59,6 @@ def _ensure_less_views(frontend_session, list_views_json, query_name, before_cou @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_list_delete_global_view(frontend_session): - # Get count of existing views list_global_views_json = { "query": """query listGlobalViews($input: ListGlobalViewsInput!) {\n @@ -161,8 +155,6 @@ def test_create_list_delete_global_view(frontend_session): before_count=before_count, ) - delete_json = {"urn": view_urn} - # Delete the View delete_view_json = { "query": """mutation deleteView($urn: String!) {\n @@ -190,7 +182,6 @@ def test_create_list_delete_global_view(frontend_session): depends=["test_healthchecks", "test_create_list_delete_global_view"] ) def test_create_list_delete_personal_view(frontend_session): - # Get count of existing views list_my_views_json = { "query": """query listMyViews($input: ListMyViewsInput!) {\n @@ -314,7 +305,6 @@ def test_create_list_delete_personal_view(frontend_session): depends=["test_healthchecks", "test_create_list_delete_personal_view"] ) def test_update_global_view(frontend_session): - # First create a view new_view_name = "Test View" new_view_description = "Test Description"