diff --git a/.github/workflows/metadata-model.yml b/.github/workflows/metadata-model.yml index 4bae5ccc9a266d..eb098a327e4cb5 100644 --- a/.github/workflows/metadata-model.yml +++ b/.github/workflows/metadata-model.yml @@ -37,6 +37,19 @@ jobs: run: ./metadata-ingestion/scripts/install_deps.sh - name: Run model generation run: ./gradlew :metadata-models:build + - name: Generate metadata files + if: ${{ needs.setup.outputs.publish == 'true' }} + run: ./gradlew :metadata-ingestion:modelDocGen + - name: Configure AWS Credentials + if: ${{ needs.setup.outputs.publish == 'true' }} + uses: aws-actions/configure-aws-credentials@v3 + with: + aws-access-key-id: ${{ secrets.ACRYL_CI_ARTIFACTS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.ACRYL_CI_ARTIFACTS_ACCESS_KEY }} + aws-region: us-west-2 + - name: Upload metadata to S3 + if: ${{ needs.setup.outputs.publish == 'true' }} + run: aws s3 cp ./metadata-ingestion/generated/docs/metadata_model_mces.json s3://${{ secrets.ACRYL_CI_ARTIFACTS_BUCKET }}/datahub/demo/metadata/ - name: Upload metadata to DataHub if: ${{ needs.setup.outputs.publish == 'true' }} env: diff --git a/build.gradle b/build.gradle index 7c5deb47839434..c1278a6dab1a04 100644 --- a/build.gradle +++ b/build.gradle @@ -167,6 +167,7 @@ project.ext.externalDependency = [ 'parquetHadoop': 'org.apache.parquet:parquet-hadoop:1.13.1', 'picocli': 'info.picocli:picocli:4.5.0', 'playCache': "com.typesafe.play:play-cache_2.12:$playVersion", + 'playCaffeineCache': "com.typesafe.play:play-caffeine-cache_2.12:$playVersion", 'playWs': 'com.typesafe.play:play-ahc-ws-standalone_2.12:2.1.10', 'playDocs': "com.typesafe.play:play-docs_2.12:$playVersion", 'playGuice': "com.typesafe.play:play-guice_2.12:$playVersion", diff --git a/datahub-frontend/build.gradle b/datahub-frontend/build.gradle index eb81b317455361..9a5fb3210a3115 100644 --- a/datahub-frontend/build.gradle +++ b/datahub-frontend/build.gradle @@ -89,6 +89,22 @@ docker { buildx(true) load(true) push(false) + + // Add build args if they are defined (needed for some CI or enterprise environments) + def dockerBuildArgs = [:] + if (project.hasProperty('alpineApkRepositoryUrl')) { + dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl') + } + if (project.hasProperty('githubMirrorUrl')) { + dockerBuildArgs.GITHUB_REPO_URL = project.getProperty('githubMirrorUrl') + } + if (project.hasProperty('mavenCentralRepositoryUrl')) { + dockerBuildArgs.MAVEN_CENTRAL_REPO_URL = project.getProperty('mavenCentralRepositoryUrl') + } + + if (dockerBuildArgs.size() > 0) { + buildArgs(dockerBuildArgs) + } } task unversionZip(type: Copy, dependsOn: [':datahub-web-react:build', dist]) { @@ -104,4 +120,4 @@ task cleanLocalDockerImages { rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } -dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file +dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/datahub-frontend/play.gradle b/datahub-frontend/play.gradle index daecba16cbf723..dd1ceee411f746 100644 --- a/datahub-frontend/play.gradle +++ b/datahub-frontend/play.gradle @@ -58,6 +58,7 @@ dependencies { implementation externalDependency.shiroCore implementation externalDependency.playCache + implementation externalDependency.playCaffeineCache implementation externalDependency.playWs implementation externalDependency.playServer implementation externalDependency.playAkkaHttpServer diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index 3356445cda7e1c..71baa8af99468a 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -108,6 +108,22 @@ docker { buildx(true) load(true) push(false) + + // Add build args if they are defined (needed for some CI or enterprise environments) + def dockerBuildArgs = [:] + if (project.hasProperty('alpineApkRepositoryUrl')) { + dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl') + } + if (project.hasProperty('githubMirrorUrl')) { + dockerBuildArgs.GITHUB_REPO_URL = project.getProperty('githubMirrorUrl') + } + if (project.hasProperty('mavenCentralRepositoryUrl')) { + dockerBuildArgs.MAVEN_CENTRAL_REPO_URL = project.getProperty('mavenCentralRepositoryUrl') + } + + if (dockerBuildArgs.size() > 0) { + buildArgs(dockerBuildArgs) + } } tasks.getByPath(":datahub-upgrade:docker").dependsOn([bootJar]) diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json index 583a17ce69ef50..cd69780814422a 100644 --- a/datahub-web-react/package.json +++ b/datahub-web-react/package.json @@ -150,6 +150,8 @@ "@ant-design/colors": "6.0.0", "refractor": "3.3.1", "json-schema": "0.4.0", - "@babel/traverse": ">=7.23.2" + "@babel/traverse": ">=7.23.2", + "prismjs": "^1.27.0", + "nth-check": "^2.0.1" } } diff --git a/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx b/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx index b8e1505fceaeca..ac73a1f5ece7ce 100644 --- a/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx +++ b/datahub-web-react/src/app/permissions/policy/PolicyPrivilegeForm.tsx @@ -319,7 +319,7 @@ export default function PolicyPrivilegeForm({ .filter((privs) => privs.resourceType !== 'all') .map((resPrivs) => { return ( - + {resPrivs.resourceTypeDisplayName} ); @@ -355,7 +355,9 @@ export default function PolicyPrivilegeForm({ )} > {resourceSearchResults?.map((result) => ( - {renderSearchResult(result)} + + {renderSearchResult(result)} + ))} @@ -389,7 +391,9 @@ export default function PolicyPrivilegeForm({ dropdownStyle={isShowingDomainNavigator ? { display: 'none' } : {}} > {domainSearchResults?.map((result) => ( - {renderSearchResult(result)} + + {renderSearchResult(result)} + ))} @@ -412,9 +416,14 @@ export default function PolicyPrivilegeForm({ )} > - {privilegeOptions.map((priv) => ( - {priv.displayName} - ))} + {privilegeOptions.map((priv, index) => { + const key = `${priv.type}-${index}`; + return ( + + {priv.displayName} + + ); + })} All Privileges diff --git a/datahub-web-react/src/app/search/sidebar/BrowseSidebar.tsx b/datahub-web-react/src/app/search/sidebar/BrowseSidebar.tsx index 0d3d40c4a71af8..822e75b65febc3 100644 --- a/datahub-web-react/src/app/search/sidebar/BrowseSidebar.tsx +++ b/datahub-web-react/src/app/search/sidebar/BrowseSidebar.tsx @@ -26,12 +26,12 @@ const SidebarHeader = styled.div` white-space: nowrap; `; -const SidebarBody = styled.div` +const SidebarBody = styled.div<{ visible: boolean }>` height: calc(100% - 47px); padding-left: 16px; padding-right: 12px; padding-bottom: 200px; - overflow: auto; + overflow: ${(props) => (props.visible ? 'auto' : 'hidden')}; white-space: nowrap; `; @@ -50,7 +50,7 @@ const BrowseSidebar = ({ visible, width }: Props) => { Navigate - + {entityAggregations && !entityAggregations.length &&
No results found
} {entityAggregations?.map((entityAggregation) => ( diff --git a/datahub-web-react/src/app/shared/LogoCountCard.tsx b/datahub-web-react/src/app/shared/LogoCountCard.tsx index ebf0d9cd4f54ee..e67898520e7b85 100644 --- a/datahub-web-react/src/app/shared/LogoCountCard.tsx +++ b/datahub-web-react/src/app/shared/LogoCountCard.tsx @@ -7,6 +7,7 @@ import { HomePageButton } from './components'; const PlatformLogo = styled(Image)` max-height: 32px; + height: 32px; width: auto; object-fit: contain; background-color: transparent; diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock index d46ee303d5d571..bdbf2f940f90ab 100644 --- a/datahub-web-react/yarn.lock +++ b/datahub-web-react/yarn.lock @@ -6063,7 +6063,7 @@ bonjour-service@^1.0.11: fast-deep-equal "^3.1.3" multicast-dns "^7.2.5" -boolbase@^1.0.0, boolbase@~1.0.0: +boolbase@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e" integrity sha1-aN/1++YMUes3cl6p4+0xDcwed24= @@ -6502,15 +6502,6 @@ cli-width@^3.0.0: resolved "https://registry.yarnpkg.com/cli-width/-/cli-width-3.0.0.tgz#a2f48437a2caa9a22436e794bf071ec9e61cedf6" integrity sha512-FxqpkPPwu1HjuN93Omfm4h8uIanXofW0RxVEW3k5RKx+mJJYSthzNhp32Kzxxy3YAEZ/Dc/EWN1vZRY0+kOhbw== -clipboard@^2.0.0: - version "2.0.8" - resolved "https://registry.yarnpkg.com/clipboard/-/clipboard-2.0.8.tgz#ffc6c103dd2967a83005f3f61976aa4655a4cdba" - integrity sha512-Y6WO0unAIQp5bLmk1zdThRhgJt/x3ks6f30s3oE3H1mgIEU33XyQjEf8gsf6DxC7NPX8Y1SsNWjUjL/ywLnnbQ== - dependencies: - good-listener "^1.2.2" - select "^1.1.2" - tiny-emitter "^2.0.0" - cliui@^6.0.0: version "6.0.0" resolved "https://registry.yarnpkg.com/cliui/-/cliui-6.0.0.tgz#511d702c0c4e41ca156d7d0e96021f23e13225b1" @@ -7454,11 +7445,6 @@ delayed-stream@~1.0.0: resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619" integrity sha1-3zrhmayt+31ECqrgsp4icrJOxhk= -delegate@^3.1.2: - version "3.2.0" - resolved "https://registry.yarnpkg.com/delegate/-/delegate-3.2.0.tgz#b66b71c3158522e8ab5744f720d8ca0c2af59166" - integrity sha512-IofjkYBZaZivn0V8nnsMJGBr4jVLxHDheKSW88PyxS5QC4Vo9ZbZVvhzlSxY87fVq3STR6r+4cGepyHkcWOQSw== - depd@2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/depd/-/depd-2.0.0.tgz#b696163cc757560d09cf22cc8fad1571b79e76df" @@ -9032,13 +9018,6 @@ globby@^13.1.1: merge2 "^1.4.1" slash "^4.0.0" -good-listener@^1.2.2: - version "1.2.2" - resolved "https://registry.yarnpkg.com/good-listener/-/good-listener-1.2.2.tgz#d53b30cdf9313dffb7dc9a0d477096aa6d145c50" - integrity sha1-1TswzfkxPf+33JoNR3CWqm0UXFA= - dependencies: - delegate "^3.1.2" - got@^9.6.0: version "9.6.0" resolved "https://registry.yarnpkg.com/got/-/got-9.6.0.tgz#edf45e7d67f99545705de1f7bbeeeb121765ed85" @@ -12182,14 +12161,7 @@ npm-run-path@^4.0.1: dependencies: path-key "^3.0.0" -nth-check@^1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-1.0.2.tgz#b2bd295c37e3dd58a3bf0700376663ba4d9cf05c" - integrity sha512-WeBOdju8SnzPN5vTUJYxYUxLeXpCaVP5i5e0LF8fg7WORF2Wd7wFX/pk0tYZk7s8T+J7VLy0Da6J1+wCT0AtHg== - dependencies: - boolbase "~1.0.0" - -nth-check@^2.0.1: +nth-check@^1.0.2, nth-check@^2.0.1: version "2.1.1" resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-2.1.1.tgz#c9eab428effce36cd6b92c924bdb000ef1f1ed1d" integrity sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w== @@ -13322,17 +13294,10 @@ pretty-format@^28.1.3: ansi-styles "^5.0.0" react-is "^18.0.0" -prismjs@^1.22.0: - version "1.24.1" - resolved "https://registry.yarnpkg.com/prismjs/-/prismjs-1.24.1.tgz#c4d7895c4d6500289482fa8936d9cdd192684036" - integrity sha512-mNPsedLuk90RVJioIky8ANZEwYm5w9LcvCXrxHlwf4fNVSn8jEipMybMkWUyyF0JhnC+C4VcOVSBuHRKs1L5Ow== - -prismjs@~1.23.0: - version "1.23.0" - resolved "https://registry.yarnpkg.com/prismjs/-/prismjs-1.23.0.tgz#d3b3967f7d72440690497652a9d40ff046067f33" - integrity sha512-c29LVsqOaLbBHuIbsTxaKENh1N2EQBOHaWv7gkHN4dgRbxSREqDnDbtFJYdpPauS4YCplMSNCABQ6Eeor69bAA== - optionalDependencies: - clipboard "^2.0.0" +prismjs@^1.22.0, prismjs@^1.27.0, prismjs@~1.23.0: + version "1.29.0" + resolved "https://registry.yarnpkg.com/prismjs/-/prismjs-1.29.0.tgz#f113555a8fa9b57c35e637bba27509dcf802dd12" + integrity sha512-Kx/1w86q/epKcmte75LNrEoT+lX8pBpavuAbvJWRXar7Hz8jrtF+e3vY751p0R8H9HdArwaCTNDDzHg/ScJK1Q== process-nextick-args@~2.0.0: version "2.0.1" @@ -15099,11 +15064,6 @@ select-hose@^2.0.0: resolved "https://registry.yarnpkg.com/select-hose/-/select-hose-2.0.0.tgz#625d8658f865af43ec962bfc376a37359a4994ca" integrity sha1-Yl2GWPhlr0Psliv8N2o3NZpJlMo= -select@^1.1.2: - version "1.1.2" - resolved "https://registry.yarnpkg.com/select/-/select-1.1.2.tgz#0e7350acdec80b1108528786ec1d4418d11b396d" - integrity sha1-DnNQrN7ICxEIUoeG7B1EGNEbOW0= - selfsigned@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/selfsigned/-/selfsigned-2.1.1.tgz#18a7613d714c0cd3385c48af0075abf3f266af61" @@ -16127,11 +16087,6 @@ thunky@^1.0.2: resolved "https://registry.yarnpkg.com/thunky/-/thunky-1.1.0.tgz#5abaf714a9405db0504732bbccd2cedd9ef9537d" integrity sha512-eHY7nBftgThBqOyHGVN+l8gF0BucP09fMo0oO/Lb0w1OF80dJv+lDVpXG60WMQvkcxAkNybKsrEIE3ZtKGmPrA== -tiny-emitter@^2.0.0: - version "2.1.0" - resolved "https://registry.yarnpkg.com/tiny-emitter/-/tiny-emitter-2.1.0.tgz#1d1a56edfc51c43e863cbb5382a72330e3555423" - integrity sha512-NB6Dk1A9xgQPMoGqC5CVXn123gWyte215ONT5Pp5a0yt4nlEoO1ZWeCwpncaekPHXO60i47ihFnZPiRPjRMq4Q== - tiny-invariant@^1.0.2: version "1.1.0" resolved "https://registry.yarnpkg.com/tiny-invariant/-/tiny-invariant-1.1.0.tgz#634c5f8efdc27714b7f386c35e6760991d230875" diff --git a/docker/build.gradle b/docker/build.gradle index 56634a5fe0c675..c7f783af6c9978 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -15,6 +15,7 @@ ext { ':metadata-service:war', ':datahub-frontend', ] + debug_modules = quickstart_modules - [':metadata-jobs:mce-consumer-job', ':metadata-jobs:mae-consumer-job'] debug_compose_args = [ @@ -27,6 +28,13 @@ ext { 'datahub-gms', 'datahub-frontend-react' ] + + // Postgres + pg_quickstart_modules = quickstart_modules - [':docker:mysql-setup'] + [':docker:postgres-setup'] + pg_compose_args = [ + '-f', 'docker-compose-without-neo4j.yml', + '-f', 'docker-compose-without-neo4j.postgres.override.yml' + ] } task quickstart(type: Exec, dependsOn: ':metadata-ingestion:install') { @@ -125,3 +133,29 @@ task debugReload(type: Exec) { def cmd = ['docker compose -p datahub'] + debug_compose_args + ['restart'] + debug_reloadable commandLine 'bash', '-c', cmd.join(" ") } + +task quickstartPg(type: Exec, dependsOn: ':metadata-ingestion:install') { + dependsOn(pg_quickstart_modules.collect { it + ':dockerTag' }) + shouldRunAfter ':metadata-ingestion:clean', 'quickstartNuke' + + environment "DATAHUB_TELEMETRY_ENABLED", "false" + environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}" + environment "DATAHUB_POSTGRES_VERSION", "15.5" + + // OpenSearch + environment "DATAHUB_SEARCH_IMAGE", 'opensearchproject/opensearch' + environment "DATAHUB_SEARCH_TAG", '2.9.0' + environment "XPACK_SECURITY_ENABLED", 'plugins.security.disabled=true' + environment "USE_AWS_ELASTICSEARCH", 'true' + + def cmd = [ + 'source ../metadata-ingestion/venv/bin/activate && ', + 'datahub docker quickstart', + '--no-pull-images', + '--standalone_consumers', + '--version', "v${version}", + '--dump-logs-on-failure' + ] + pg_compose_args + + commandLine 'bash', '-c', cmd.join(" ") +} diff --git a/docker/datahub-frontend/Dockerfile b/docker/datahub-frontend/Dockerfile index aaace5ae38ca36..9c26d73f4f40bd 100644 --- a/docker/datahub-frontend/Dockerfile +++ b/docker/datahub-frontend/Dockerfile @@ -3,14 +3,22 @@ ARG APP_ENV=prod FROM alpine:3 AS base +# Configurable repositories +ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine +ARG GITHUB_REPO_URL=https://github.com +ARG MAVEN_CENTRAL_REPO_URL=https://repo1.maven.org/maven2 + RUN addgroup -S datahub && adduser -S datahub -G datahub +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + # Upgrade Alpine and base packages # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ && apk --no-cache add curl sqlite libc6-compat java-snappy \ - && apk --no-cache add openjdk11-jre-headless --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \ - && apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ + && apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ + && apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/ ENV LD_LIBRARY_PATH="/lib:/lib64" @@ -22,8 +30,8 @@ COPY ./docker/monitoring/client-prometheus-config.yaml /datahub-frontend/ RUN chown -R datahub:datahub /datahub-frontend && chmod 755 /datahub-frontend ENV JMX_VERSION=0.18.0 -RUN wget https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar -O opentelemetry-javaagent.jar \ - && wget https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar +RUN wget ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar -O opentelemetry-javaagent.jar \ + && wget ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar FROM base as dev-install # Dummy stage for development. Assumes code is built on your machine and mounted to this image. diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile index c5696bbd2d1d2c..1e13fa492c7f06 100644 --- a/docker/datahub-gms/Dockerfile +++ b/docker/datahub-gms/Dockerfile @@ -1,11 +1,23 @@ # Defining environment ARG APP_ENV=prod +# Defining custom repo urls for use in enterprise environments. Re-used between stages below. +ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine +ARG GITHUB_REPO_URL=https://github.com +ARG MAVEN_CENTRAL_REPO_URL=https://repo1.maven.org/maven2 + +FROM golang:1-alpine3.18 AS binary FROM golang:1-alpine3.18 AS binary +# Re-declaring arg from above to make it available in this stage (will inherit default value) +ARG ALPINE_REPO_URL + ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + RUN apk --no-cache --update add openssl git tar curl WORKDIR /go/src/github.com/jwilder/dockerize @@ -16,16 +28,25 @@ FROM alpine:3 AS base # Upgrade Alpine and base packages ENV JMX_VERSION=0.18.0 + +# Re-declaring args from above to make them available in this stage (will inherit default values) +ARG ALPINE_REPO_URL +ARG GITHUB_REPO_URL +ARG MAVEN_CENTRAL_REPO_URL + +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ && apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat java-snappy \ - && apk --no-cache add openjdk11-jre-headless --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \ - && apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ \ - && curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.46.v20220331/jetty-runner-9.4.46.v20220331.jar --output jetty-runner.jar \ - && curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-jmx/9.4.46.v20220331/jetty-jmx-9.4.46.v20220331.jar --output jetty-jmx.jar \ - && curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-util/9.4.46.v20220331/jetty-util-9.4.46.v20220331.jar --output jetty-util.jar \ - && wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ - && wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ + && apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ + && apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/ \ + && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-runner/9.4.46.v20220331/jetty-runner-9.4.46.v20220331.jar --output jetty-runner.jar \ + && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-jmx/9.4.46.v20220331/jetty-jmx-9.4.46.v20220331.jar --output jetty-jmx.jar \ + && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-util/9.4.46.v20220331/jetty-util-9.4.46.v20220331.jar --output jetty-util.jar \ + && wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ + && wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ && cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile index 25afe9b8b3dce0..e0f9fdc997071c 100644 --- a/docker/datahub-ingestion-base/Dockerfile +++ b/docker/datahub-ingestion-base/Dockerfile @@ -1,11 +1,23 @@ ARG APP_ENV=full ARG BASE_IMAGE=base +# Defining custom repo urls for use in enterprise environments. Re-used between stages below. +ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine +ARG GITHUB_REPO_URL=https://github.com +ARG DEBIAN_REPO_URL=http://deb.debian.org/debian +ARG PIP_MIRROR_URL=null + FROM golang:1-alpine3.18 AS dockerize-binary +# Re-declaring arg from above to make it available in this stage (will inherit default value) +ARG ALPINE_REPO_URL + ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + RUN apk --no-cache --update add openssl git tar curl WORKDIR /go/src/github.com/jwilder/dockerize @@ -14,11 +26,19 @@ RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION FROM python:3.10 as base +ARG DEBIAN_REPO_URL +ARG PIP_MIRROR_URL +ARG GITHUB_REPO_URL + ENV LIBRDKAFKA_VERSION=1.6.2 ENV CONFLUENT_KAFKA_VERSION=1.6.1 ENV DEBIAN_FRONTEND noninteractive +# Optionally set corporate mirror for apk and pip +RUN if [ "${DEBIAN_REPO_URL}" != "http://deb.debian.org/debian" ] ; then sed -i "s#http.*://deb.debian.org/debian#${DEBIAN_REPO_URL}#g" /etc/apt/sources.list.d/debian.sources ; fi +RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi + RUN apt-get update && apt-get install -y -qq \ make \ python3-ldap \ @@ -33,7 +53,7 @@ RUN apt-get update && apt-get install -y -qq \ unzip \ ldap-utils \ && python -m pip install --no-cache --upgrade pip wheel setuptools \ - && wget -q https://github.com/edenhill/librdkafka/archive/v${LIBRDKAFKA_VERSION}.tar.gz -O - | \ + && wget -q ${GITHUB_REPO_URL}/edenhill/librdkafka/archive/v${LIBRDKAFKA_VERSION}.tar.gz -O - | \ tar -xz -C /root \ && cd /root/librdkafka-${LIBRDKAFKA_VERSION} \ && ./configure --prefix /usr && make && make install && cd .. && rm -rf /root/librdkafka-${LIBRDKAFKA_VERSION} \ @@ -84,4 +104,4 @@ FROM ${BASE_IMAGE} as slim-install FROM ${APP_ENV}-install USER datahub -ENV PATH="/datahub-ingestion/.local/bin:$PATH" \ No newline at end of file +ENV PATH="/datahub-ingestion/.local/bin:$PATH" diff --git a/docker/datahub-ingestion-base/build.gradle b/docker/datahub-ingestion-base/build.gradle index c4d8a962dcd325..e0168290c48f86 100644 --- a/docker/datahub-ingestion-base/build.gradle +++ b/docker/datahub-ingestion-base/build.gradle @@ -25,7 +25,24 @@ docker { }.exclude { i -> (!i.file.name.endsWith(".dockerignore") && i.file.isHidden()) } - buildArgs([APP_ENV: docker_target]) + + def dockerBuildArgs = [APP_ENV: docker_target] + + // Add build args if they are defined (needed for some CI or enterprise environments) + if (project.hasProperty('alpineApkRepositoryUrl')) { + dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl') + } + if (project.hasProperty('githubMirrorUrl')) { + dockerBuildArgs.GITHUB_REPO_URL = project.getProperty('githubMirrorUrl') + } + if (project.hasProperty('debianAptRepositoryUrl')) { + dockerBuildArgs.DEBIAN_REPO_URL = project.getProperty('debianAptRepositoryUrl') + } + if (project.hasProperty('pipMirrorUrl')) { + dockerBuildArgs.PIP_MIRROR_URL = project.getProperty('pipMirrorUrl') + } + + buildArgs(dockerBuildArgs) } tasks.getByName('docker').dependsOn('build') @@ -42,4 +59,4 @@ task cleanLocalDockerImages { rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } -dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file +dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index 1aee79a428a98a..9516c31a19e21b 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -2,6 +2,8 @@ ARG APP_ENV=full ARG BASE_IMAGE=acryldata/datahub-ingestion-base ARG DOCKER_VERSION=head +ARG PIP_MIRROR_URL=null +ARG DEBIAN_REPO_URL=http://deb.debian.org/debian FROM $BASE_IMAGE:$DOCKER_VERSION as base USER 0 @@ -20,16 +22,23 @@ USER datahub ENV PATH="/datahub-ingestion/.local/bin:$PATH" FROM base as slim-install +ARG PIP_MIRROR_URL + +RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" FROM base as full-install-build +ARG PIP_MIRROR_URL +ARG DEBIAN_REPO_URL USER 0 +RUN if [ "${DEBIAN_REPO_URL}" != "http://deb.debian.org/debian" ] ; then sed -i "s#http.*://deb.debian.org/debian#${DEBIAN_REPO_URL}#g" /etc/apt/sources.list.d/debian.sources ; fi RUN apt-get update && apt-get install -y -qq maven USER datahub COPY ./docker/datahub-ingestion/pyspark_jars.sh . +RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi RUN pip install --no-cache --user ".[base]" && \ pip install --no-cache --user "./airflow-plugin[acryl-datahub-airflow-plugin]" && \ pip install --no-cache --user ".[all]" diff --git a/docker/datahub-ingestion/Dockerfile-slim-only b/docker/datahub-ingestion/Dockerfile-slim-only index cb8c27ab463c48..4112f470c25bee 100644 --- a/docker/datahub-ingestion/Dockerfile-slim-only +++ b/docker/datahub-ingestion/Dockerfile-slim-only @@ -1,6 +1,7 @@ # Defining environment ARG BASE_IMAGE=acryldata/datahub-ingestion-base ARG DOCKER_VERSION=head-slim +ARG PIP_MIRROR_URL=null FROM $BASE_IMAGE:$DOCKER_VERSION as base USER 0 @@ -17,6 +18,10 @@ USER datahub ENV PATH="/datahub-ingestion/.local/bin:$PATH" FROM base as slim-install + +ARG PIP_MIRROR_URL + +RUN if [ "${PIP_MIRROR_URL}" != "null" ] ; then pip config set global.index-url ${PIP_MIRROR_URL} ; fi RUN pip install --no-cache --user ".[base,datahub-rest,datahub-kafka,snowflake,bigquery,redshift,mysql,postgres,hive,clickhouse,glue,dbt,looker,lookml,tableau,powerbi,superset,datahub-business-glossary]" FROM slim-install as final diff --git a/docker/datahub-ingestion/build.gradle b/docker/datahub-ingestion/build.gradle index 247b896d6955cb..52db594e2ef852 100644 --- a/docker/datahub-ingestion/build.gradle +++ b/docker/datahub-ingestion/build.gradle @@ -32,8 +32,18 @@ docker { }.exclude { i -> (!i.file.name.endsWith(".dockerignore") && i.file.isHidden()) } - buildArgs([DOCKER_VERSION: version, - RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace("-slim", '')]) + + def dockerBuildArgs = [DOCKER_VERSION: version, RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace("-slim", '')] + + // Add build args if they are defined (needed for some CI or enterprise environments) + if (project.hasProperty('pipMirrorUrl')) { + dockerBuildArgs.PIP_MIRROR_URL = project.getProperty('pipMirrorUrl') + } + if (project.hasProperty('debianAptRepositoryUrl')) { + dockerBuildArgs.DEBIAN_REPO_URL = project.getProperty('debianAptRepositoryUrl') + } + + buildArgs(dockerBuildArgs) } tasks.getByName('docker').dependsOn(['build', ':docker:datahub-ingestion-base:docker', @@ -51,4 +61,4 @@ task cleanLocalDockerImages { rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } -dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file +dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile index 07af7c66a77837..3bacd3b2dc81ae 100644 --- a/docker/datahub-mae-consumer/Dockerfile +++ b/docker/datahub-mae-consumer/Dockerfile @@ -1,11 +1,22 @@ # Defining environment ARG APP_ENV=prod +# Defining custom repo urls for use in enterprise environments. Re-used between stages below. +ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine +ARG GITHUB_REPO_URL=https://github.com +ARG MAVEN_CENTRAL_REPO_URL=https://repo1.maven.org/maven2 + FROM golang:1-alpine3.18 AS binary +# Re-declaring arg from above to make it available in this stage (will inherit default value) +ARG ALPINE_REPO_URL + ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + RUN apk --no-cache --update add openssl git tar curl WORKDIR /go/src/github.com/jwilder/dockerize @@ -14,15 +25,23 @@ RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION FROM alpine:3 AS base +# Re-declaring args from above to make them available in this stage (will inherit default values) +ARG ALPINE_REPO_URL +ARG GITHUB_REPO_URL +ARG MAVEN_CENTRAL_REPO_URL + +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + # Upgrade Alpine and base packages ENV JMX_VERSION=0.18.0 # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ && apk --no-cache add curl bash coreutils sqlite libc6-compat java-snappy \ - && apk --no-cache add openjdk11-jre-headless --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \ - && apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ \ - && wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ - && wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ + && apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ + && apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/ \ + && wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ + && wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ && cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin diff --git a/docker/datahub-mce-consumer/Dockerfile b/docker/datahub-mce-consumer/Dockerfile index 97861d6be31419..bb22ab82f44021 100644 --- a/docker/datahub-mce-consumer/Dockerfile +++ b/docker/datahub-mce-consumer/Dockerfile @@ -1,11 +1,22 @@ # Defining environment ARG APP_ENV=prod +# Defining custom repo urls for use in enterprise environments. Re-used between stages below. +ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine +ARG GITHUB_REPO_URL=https://github.com +ARG MAVEN_CENTRAL_REPO_URL=https://repo1.maven.org/maven2 + FROM golang:1-alpine3.18 AS binary +# Re-declaring arg from above to make it available in this stage (will inherit default value) +ARG ALPINE_REPO_URL + ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + RUN apk --no-cache --update add openssl git tar curl WORKDIR /go/src/github.com/jwilder/dockerize @@ -14,15 +25,23 @@ RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION FROM alpine:3 AS base +# Re-declaring args from above to make them available in this stage (will inherit default values) +ARG ALPINE_REPO_URL +ARG GITHUB_REPO_URL +ARG MAVEN_CENTRAL_REPO_URL + +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + # Upgrade Alpine and base packages ENV JMX_VERSION=0.18.0 # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ && apk --no-cache add curl bash sqlite libc6-compat java-snappy \ - && apk --no-cache add openjdk11-jre-headless --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \ - && apk --no-cache add jattach --repository http://dl-cdn.alpinelinux.org/alpine/edge/community/ \ - && wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ - && wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ + && apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ + && apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/ \ + && wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ + && wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ && cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin diff --git a/docker/datahub-upgrade/Dockerfile b/docker/datahub-upgrade/Dockerfile index fa8e65009662ba..551d61f41b979e 100644 --- a/docker/datahub-upgrade/Dockerfile +++ b/docker/datahub-upgrade/Dockerfile @@ -1,11 +1,22 @@ # Defining environment ARG APP_ENV=prod +# Defining custom repo urls for use in enterprise environments. Re-used between stages below. +ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine +ARG GITHUB_REPO_URL=https://github.com +ARG MAVEN_CENTRAL_REPO_URL=https://repo1.maven.org/maven2 + FROM golang:1-alpine3.18 AS binary +# Re-declaring arg from above to make it available in this stage (will inherit default value) +ARG ALPINE_REPO_URL + ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + RUN apk --no-cache --update add openssl git tar curl WORKDIR /go/src/github.com/jwilder/dockerize @@ -14,17 +25,25 @@ RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION FROM alpine:3 AS base +# Re-declaring args from above to make them available in this stage (will inherit default values) +ARG ALPINE_REPO_URL +ARG GITHUB_REPO_URL +ARG MAVEN_CENTRAL_REPO_URL + +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + # Upgrade Alpine and base packages ENV JMX_VERSION=0.18.0 # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update-cache --available upgrade \ && apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat java-snappy \ - && apk --no-cache add openjdk11-jre-headless --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community \ - && curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.46.v20220331/jetty-runner-9.4.46.v20220331.jar --output jetty-runner.jar \ - && curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-jmx/9.4.46.v20220331/jetty-jmx-9.4.46.v20220331.jar --output jetty-jmx.jar \ - && curl -sS https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-util/9.4.46.v20220331/jetty-util-9.4.46.v20220331.jar --output jetty-util.jar \ - && wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ - && wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ + && apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ + && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-runner/9.4.46.v20220331/jetty-runner-9.4.46.v20220331.jar --output jetty-runner.jar \ + && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-jmx/9.4.46.v20220331/jetty-jmx-9.4.46.v20220331.jar --output jetty-jmx.jar \ + && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-util/9.4.46.v20220331/jetty-util-9.4.46.v20220331.jar --output jetty-util.jar \ + && wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ + && wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ && cp /usr/lib/jvm/java-11-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks COPY --from=binary /go/bin/dockerize /usr/local/bin diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml index 39f43416005729..48239fcd87831e 100644 --- a/docker/docker-compose-with-cassandra.yml +++ b/docker/docker-compose-with-cassandra.yml @@ -43,7 +43,7 @@ services: dockerfile: docker/datahub-gms/Dockerfile env_file: ./datahub-gms/env/docker.cassandra.env healthcheck: - test: curl -sS --fail http://datahub-gms:${DATAHUB_MAPPED_GMS_PORT:-8080}/health + test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 20s interval: 1s retries: 20 diff --git a/docker/docker-compose-without-neo4j.postgres.override.yml b/docker/docker-compose-without-neo4j.postgres.override.yml index e4c754b30afd79..369b5a155fc36b 100644 --- a/docker/docker-compose-without-neo4j.postgres.override.yml +++ b/docker/docker-compose-without-neo4j.postgres.override.yml @@ -53,7 +53,7 @@ services: postgres: container_name: postgres hostname: postgres - image: postgres:12.3 + image: postgres:${DATAHUB_POSTGRES_VERSION:-12.3} env_file: postgres/env/docker.env ports: - '5432:5432' diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index 235e89e340551b..6191994eaa1ea5 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -44,7 +44,7 @@ services: dockerfile: docker/datahub-gms/Dockerfile env_file: datahub-gms/env/docker-without-neo4j.env healthcheck: - test: curl -sS --fail http://datahub-gms:${DATAHUB_MAPPED_GMS_PORT:-8080}/health + test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 90s interval: 1s retries: 3 @@ -119,7 +119,7 @@ services: limits: memory: 1G healthcheck: - test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s + test: curl -sS --fail http://elasticsearch:$${DATAHUB_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s start_period: 20s interval: 1s retries: 3 @@ -134,7 +134,7 @@ services: - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 env_file: schema-registry/env/docker.env healthcheck: - test: nc -z schema-registry ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081} + test: nc -z schema-registry ${DATAHUB_SCHEMA_REGISTRY_PORT:-8081} start_period: 60s interval: 1s retries: 3 @@ -150,7 +150,7 @@ services: - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 env_file: broker/env/docker.env healthcheck: - test: nc -z broker $${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092} + test: nc -z broker $${DATAHUB_KAFKA_BROKER_PORT:-9092} start_period: 60s interval: 1s retries: 5 @@ -168,7 +168,7 @@ services: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 env_file: zookeeper/env/docker.env healthcheck: - test: echo srvr | nc zookeeper $${DATAHUB_MAPPED_ZK_PORT:-2181} + test: echo srvr | nc zookeeper $${DATAHUB_ZK_PORT:-2181} start_period: 30s interval: 5s retries: 3 diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 46da8c6fdbd2ae..95f56fe47e3cca 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -42,7 +42,7 @@ services: context: ../ dockerfile: docker/datahub-gms/Dockerfile healthcheck: - test: curl -sS --fail http://datahub-gms:${DATAHUB_MAPPED_GMS_PORT:-8080}/health + test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 90s interval: 1s retries: 3 @@ -124,7 +124,7 @@ services: limits: memory: 1G healthcheck: - test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s + test: curl -sS --fail http://elasticsearch:$${DATAHUB_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s start_period: 20s interval: 1s retries: 3 @@ -140,7 +140,7 @@ services: - ${DATAHUB_MAPPED_NEO4J_BOLT_PORT:-7687}:7687 env_file: neo4j/env/docker.env healthcheck: - test: wget http://neo4j:$${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474} + test: wget http://neo4j:$${DATAHUB_NEO4J_HTTP_PORT:-7474} start_period: 5s interval: 1s retries: 5 @@ -155,7 +155,7 @@ services: - ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081 env_file: schema-registry/env/docker.env healthcheck: - test: nc -z schema-registry ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081} + test: nc -z schema-registry ${DATAHUB_SCHEMA_REGISTRY_PORT:-8081} start_period: 60s interval: 1s retries: 3 @@ -171,7 +171,7 @@ services: - ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092 env_file: broker/env/docker.env healthcheck: - test: nc -z broker $${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092} + test: nc -z broker $${DATAHUB_KAFKA_BROKER_PORT:-9092} start_period: 60s interval: 1s retries: 5 @@ -189,7 +189,7 @@ services: - ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181 env_file: zookeeper/env/docker.env healthcheck: - test: echo srvr | nc zookeeper $${DATAHUB_MAPPED_ZK_PORT:-2181} + test: echo srvr | nc zookeeper $${DATAHUB_ZK_PORT:-2181} start_period: 10s interval: 5s retries: 3 diff --git a/docker/elasticsearch-setup/Dockerfile b/docker/elasticsearch-setup/Dockerfile index c8fb2eba911b8a..f4dd1cb9b018e3 100644 --- a/docker/elasticsearch-setup/Dockerfile +++ b/docker/elasticsearch-setup/Dockerfile @@ -3,11 +3,19 @@ # Defining environment ARG APP_ENV=prod +# Defining custom repo urls for use in enterprise environments. Re-used between stages below. +ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine + FROM golang:1-alpine3.18 AS binary +ARG ALPINE_REPO_URL + ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk --no-cache --update add openssl git tar curl sqlite @@ -16,6 +24,12 @@ WORKDIR /go/src/github.com/jwilder/dockerize RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION FROM alpine:3 AS base + +ARG ALPINE_REPO_URL + +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + RUN apk add --no-cache curl jq bash coreutils COPY --from=binary /go/bin/dockerize /usr/local/bin diff --git a/docker/elasticsearch-setup/build.gradle b/docker/elasticsearch-setup/build.gradle index ac935ca42fd12a..f9dff3032b56db 100644 --- a/docker/elasticsearch-setup/build.gradle +++ b/docker/elasticsearch-setup/build.gradle @@ -27,6 +27,16 @@ docker { buildx(true) load(true) push(false) + + // Add build args if they are defined (needed for some CI or enterprise environments) + def dockerBuildArgs = [:] + if (project.hasProperty('alpineApkRepositoryUrl')) { + dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl') + } + + if (dockerBuildArgs.size() > 0) { + buildArgs(dockerBuildArgs) + } } tasks.getByName('docker').dependsOn('build') @@ -42,4 +52,4 @@ task cleanLocalDockerImages { rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } -dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file +dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/docker/kafka-setup/Dockerfile b/docker/kafka-setup/Dockerfile index e7f084739a5767..f6a4b62a793562 100644 --- a/docker/kafka-setup/Dockerfile +++ b/docker/kafka-setup/Dockerfile @@ -1,28 +1,41 @@ ARG KAFKA_DOCKER_VERSION=7.4.1 +# Defining custom repo urls for use in enterprise environments. Re-used between stages below. +ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine +ARG GITHUB_REPO_URL=https://github.com +ARG MAVEN_CENTRAL_REPO_URL=https://repo1.maven.org/maven2 +ARG APACHE_DOWNLOAD_URL=null + # Using as a base image because to get the needed jars for confluent utils FROM confluentinc/cp-base-new:$KAFKA_DOCKER_VERSION as confluent_base -ARG MAVEN_REPO="https://repo1.maven.org/maven2" +ARG MAVEN_CENTRAL_REPO_URL ARG SNAKEYAML_VERSION="2.0" RUN rm /usr/share/java/cp-base-new/snakeyaml-*.jar \ - && wget -P /usr/share/java/cp-base-new $MAVEN_REPO/org/yaml/snakeyaml/$SNAKEYAML_VERSION/snakeyaml-$SNAKEYAML_VERSION.jar + && wget -P /usr/share/java/cp-base-new $MAVEN_CENTRAL_REPO_URL/org/yaml/snakeyaml/$SNAKEYAML_VERSION/snakeyaml-$SNAKEYAML_VERSION.jar # Based on https://github.com/blacktop's alpine kafka build FROM python:3-alpine +ARG ALPINE_REPO_URL +ARG APACHE_DOWNLOAD_URL +ARG GITHUB_REPO_URL + ENV KAFKA_VERSION 3.4.1 ENV SCALA_VERSION 2.13 LABEL name="kafka" version=${KAFKA_VERSION} +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + RUN apk add --no-cache bash coreutils -RUN apk --no-cache add openjdk11-jre-headless --repository=http://dl-cdn.alpinelinux.org/alpine/edge/community +RUN apk --no-cache add openjdk11-jre-headless --repository=${ALPINE_REPO_URL}/edge/community RUN apk add --no-cache -t .build-deps git curl ca-certificates jq gcc musl-dev libffi-dev zip RUN mkdir -p /opt \ - && mirror=$(curl --stderr /dev/null https://www.apache.org/dyn/closer.cgi\?as_json\=1 | jq -r '.preferred') \ + && if [ "${APACHE_DOWNLOAD_URL}" != "null" ] ; then mirror="${APACHE_DOWNLOAD_URL}/" ; else mirror=$(curl --stderr /dev/null https://www.apache.org/dyn/closer.cgi\?as_json\=1 | jq -r '.preferred'); fi \ && curl -sSL "${mirror}kafka/${KAFKA_VERSION}/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz" \ | tar -xzf - -C /opt \ && mv /opt/kafka_${SCALA_VERSION}-${KAFKA_VERSION} /opt/kafka \ @@ -39,8 +52,8 @@ RUN ls -la COPY --from=confluent_base /usr/share/java/cp-base-new/ /usr/share/java/cp-base-new/ COPY --from=confluent_base /etc/cp-base-new/log4j.properties /etc/cp-base-new/log4j.properties -ADD --chown=kafka:kafka https://github.com/aws/aws-msk-iam-auth/releases/download/v1.1.6/aws-msk-iam-auth-1.1.6-all.jar /usr/share/java/cp-base-new -ADD --chown=kafka:kafka https://github.com/aws/aws-msk-iam-auth/releases/download/v1.1.6/aws-msk-iam-auth-1.1.6-all.jar /opt/kafka/libs +ADD --chown=kafka:kafka ${GITHUB_REPO_URL}/aws/aws-msk-iam-auth/releases/download/v1.1.6/aws-msk-iam-auth-1.1.6-all.jar /usr/share/java/cp-base-new +ADD --chown=kafka:kafka ${GITHUB_REPO_URL}/aws/aws-msk-iam-auth/releases/download/v1.1.6/aws-msk-iam-auth-1.1.6-all.jar /opt/kafka/libs ENV METADATA_AUDIT_EVENT_NAME="MetadataAuditEvent_v4" ENV METADATA_CHANGE_EVENT_NAME="MetadataChangeEvent_v4" diff --git a/docker/kafka-setup/build.gradle b/docker/kafka-setup/build.gradle index 25f9847190de3c..d7bc5c2d7d13f4 100644 --- a/docker/kafka-setup/build.gradle +++ b/docker/kafka-setup/build.gradle @@ -26,6 +26,25 @@ docker { buildx(true) load(true) push(false) + + // Add build args if they are defined (needed for some CI or enterprise environments) + def dockerBuildArgs = [:] + if (project.hasProperty('alpineApkRepositoryUrl')) { + dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl') + } + if (project.hasProperty('githubMirrorUrl')) { + dockerBuildArgs.GITHUB_REPO_URL = project.getProperty('githubMirrorUrl') + } + if (project.hasProperty('mavenCentralRepositoryUrl')) { + dockerBuildArgs.MAVEN_CENTRAL_REPO_URL = project.getProperty('mavenCentralRepositoryUrl') + } + if (project.hasProperty('apacheDownloadUrl')) { + dockerBuildArgs.APACHE_DOWNLOAD_URL = project.getProperty('apacheDownloadUrl') + } + + if (dockerBuildArgs.size() > 0) { + buildArgs(dockerBuildArgs) + } } tasks.getByName('docker').dependsOn('build') diff --git a/docker/mysql-setup/Dockerfile b/docker/mysql-setup/Dockerfile index 56bab611804892..8b7ca704c32cd9 100644 --- a/docker/mysql-setup/Dockerfile +++ b/docker/mysql-setup/Dockerfile @@ -1,8 +1,16 @@ +# Defining custom repo urls for use in enterprise environments. Re-used between stages below. +ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine + FROM golang:1-alpine3.18 AS binary +ARG ALPINE_REPO_URL + ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + RUN apk --no-cache --update add openssl git tar curl WORKDIR /go/src/github.com/jwilder/dockerize @@ -12,6 +20,11 @@ RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION FROM alpine:3 COPY --from=binary /go/bin/dockerize /usr/local/bin +ARG ALPINE_REPO_URL + +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk add --no-cache mysql-client bash mariadb-connector-c sqlite diff --git a/docker/mysql-setup/build.gradle b/docker/mysql-setup/build.gradle index 1598866914c0ee..5c70a2f0d9a2dc 100644 --- a/docker/mysql-setup/build.gradle +++ b/docker/mysql-setup/build.gradle @@ -27,6 +27,16 @@ docker { buildx(true) load(true) push(false) + + // Add build args if they are defined (needed for some CI or enterprise environments) + def dockerBuildArgs = [:] + if (project.hasProperty('alpineApkRepositoryUrl')) { + dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl') + } + + if (dockerBuildArgs.size() > 0) { + buildArgs(dockerBuildArgs) + } } tasks.getByName('docker').dependsOn('build') diff --git a/docker/postgres-setup/Dockerfile b/docker/postgres-setup/Dockerfile index 7f4d53ae044d46..e10f70571501ea 100644 --- a/docker/postgres-setup/Dockerfile +++ b/docker/postgres-setup/Dockerfile @@ -1,8 +1,16 @@ +# Defining custom repo urls for use in enterprise environments. Re-used between stages below. +ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine + FROM golang:1-alpine3.18 AS binary +ARG ALPINE_REPO_URL + ENV DOCKERIZE_VERSION v0.6.1 WORKDIR /go/src/github.com/jwilder +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + RUN apk --no-cache --update add openssl git tar curl WORKDIR /go/src/github.com/jwilder/dockerize @@ -12,6 +20,11 @@ RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION FROM alpine:3 COPY --from=binary /go/bin/dockerize /usr/local/bin +ARG ALPINE_REPO_URL + +# Optionally set corporate mirror for apk +RUN if [ "${ALPINE_REPO_URL}" != "http://dl-cdn.alpinelinux.org/alpine" ] ; then sed -i "s#http.*://dl-cdn.alpinelinux.org/alpine#${ALPINE_REPO_URL}#g" /etc/apk/repositories ; fi + # PFP-260: Upgrade Sqlite to >=3.28.0-r0 to fix https://security.snyk.io/vuln/SNYK-ALPINE39-SQLITE-449762 RUN apk add --no-cache postgresql-client sqlite diff --git a/docker/postgres-setup/build.gradle b/docker/postgres-setup/build.gradle index e24e206c99145c..5c42a002f45bef 100644 --- a/docker/postgres-setup/build.gradle +++ b/docker/postgres-setup/build.gradle @@ -27,6 +27,16 @@ docker { buildx(true) load(true) push(false) + + // Add build args if they are defined (needed for some CI or enterprise environments) + def dockerBuildArgs = [:] + if (project.hasProperty('alpineApkRepositoryUrl')) { + dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl') + } + + if (dockerBuildArgs.size() > 0) { + buildArgs(dockerBuildArgs) + } } tasks.getByName('docker').dependsOn('build') diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml index 4df32395cf82d5..7b7ca4052f3245 100644 --- a/docker/quickstart/docker-compose-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -22,7 +22,7 @@ services: interval: 1s retries: 5 start_period: 60s - test: nc -z broker $${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092} + test: nc -z broker $${DATAHUB_KAFKA_BROKER_PORT:-9092} timeout: 5s hostname: broker image: confluentinc/cp-kafka:7.4.0 @@ -111,7 +111,7 @@ services: interval: 1s retries: 3 start_period: 90s - test: curl -sS --fail http://datahub-gms:${DATAHUB_MAPPED_GMS_PORT:-8080}/health + test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health timeout: 5s hostname: datahub-gms image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} @@ -171,7 +171,7 @@ services: interval: 1s retries: 3 start_period: 20s - test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s + test: curl -sS --fail http://elasticsearch:$${DATAHUB_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s timeout: 5s hostname: elasticsearch image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} @@ -258,7 +258,7 @@ services: interval: 1s retries: 5 start_period: 5s - test: wget http://neo4j:$${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474} + test: wget http://neo4j:$${DATAHUB_NEO4J_HTTP_PORT:-7474} timeout: 5s hostname: neo4j image: neo4j/neo4j-arm64-experimental:4.0.6-arm64 @@ -280,7 +280,7 @@ services: interval: 1s retries: 3 start_period: 60s - test: nc -z schema-registry ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081} + test: nc -z schema-registry ${DATAHUB_SCHEMA_REGISTRY_PORT:-8081} timeout: 5s hostname: schema-registry image: confluentinc/cp-schema-registry:7.4.0 @@ -295,7 +295,7 @@ services: interval: 5s retries: 3 start_period: 10s - test: echo srvr | nc zookeeper $${DATAHUB_MAPPED_ZK_PORT:-2181} + test: echo srvr | nc zookeeper $${DATAHUB_ZK_PORT:-2181} timeout: 5s hostname: zookeeper image: confluentinc/cp-zookeeper:7.4.0 diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index b1cb6c208a42d6..53dacaf6ef63b0 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -22,7 +22,7 @@ services: interval: 1s retries: 5 start_period: 60s - test: nc -z broker $${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092} + test: nc -z broker $${DATAHUB_KAFKA_BROKER_PORT:-9092} timeout: 5s hostname: broker image: confluentinc/cp-kafka:7.4.0 @@ -106,7 +106,7 @@ services: interval: 1s retries: 3 start_period: 90s - test: curl -sS --fail http://datahub-gms:${DATAHUB_MAPPED_GMS_PORT:-8080}/health + test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health timeout: 5s hostname: datahub-gms image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} @@ -164,7 +164,7 @@ services: interval: 1s retries: 3 start_period: 20s - test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s + test: curl -sS --fail http://elasticsearch:$${DATAHUB_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s timeout: 5s hostname: elasticsearch image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} @@ -253,7 +253,7 @@ services: interval: 1s retries: 3 start_period: 60s - test: nc -z schema-registry ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081} + test: nc -z schema-registry ${DATAHUB_SCHEMA_REGISTRY_PORT:-8081} timeout: 5s hostname: schema-registry image: confluentinc/cp-schema-registry:7.4.0 @@ -268,7 +268,7 @@ services: interval: 5s retries: 3 start_period: 30s - test: echo srvr | nc zookeeper $${DATAHUB_MAPPED_ZK_PORT:-2181} + test: echo srvr | nc zookeeper $${DATAHUB_ZK_PORT:-2181} timeout: 5s hostname: zookeeper image: confluentinc/cp-zookeeper:7.4.0 diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 6eac53229e82ab..1ca91aa19206da 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -22,7 +22,7 @@ services: interval: 1s retries: 5 start_period: 60s - test: nc -z broker $${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092} + test: nc -z broker $${DATAHUB_KAFKA_BROKER_PORT:-9092} timeout: 5s hostname: broker image: confluentinc/cp-kafka:7.4.0 @@ -106,7 +106,7 @@ services: interval: 1s retries: 3 start_period: 90s - test: curl -sS --fail http://datahub-gms:${DATAHUB_MAPPED_GMS_PORT:-8080}/health + test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health timeout: 5s hostname: datahub-gms image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} @@ -164,7 +164,7 @@ services: interval: 1s retries: 3 start_period: 20s - test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s + test: curl -sS --fail http://elasticsearch:$${DATAHUB_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s timeout: 5s hostname: elasticsearch image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} @@ -253,7 +253,7 @@ services: interval: 1s retries: 3 start_period: 60s - test: nc -z schema-registry ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081} + test: nc -z schema-registry ${DATAHUB_SCHEMA_REGISTRY_PORT:-8081} timeout: 5s hostname: schema-registry image: confluentinc/cp-schema-registry:7.4.0 @@ -268,7 +268,7 @@ services: interval: 5s retries: 3 start_period: 30s - test: echo srvr | nc zookeeper $${DATAHUB_MAPPED_ZK_PORT:-2181} + test: echo srvr | nc zookeeper $${DATAHUB_ZK_PORT:-2181} timeout: 5s hostname: zookeeper image: confluentinc/cp-zookeeper:7.4.0 diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index 86d70abd2b8157..c77b4418b6f366 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -22,7 +22,7 @@ services: interval: 1s retries: 5 start_period: 60s - test: nc -z broker $${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092} + test: nc -z broker $${DATAHUB_KAFKA_BROKER_PORT:-9092} timeout: 5s hostname: broker image: confluentinc/cp-kafka:7.4.0 @@ -111,7 +111,7 @@ services: interval: 1s retries: 3 start_period: 90s - test: curl -sS --fail http://datahub-gms:${DATAHUB_MAPPED_GMS_PORT:-8080}/health + test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health timeout: 5s hostname: datahub-gms image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head} @@ -171,7 +171,7 @@ services: interval: 1s retries: 3 start_period: 20s - test: curl -sS --fail http://elasticsearch:$${DATAHUB_MAPPED_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s + test: curl -sS --fail http://elasticsearch:$${DATAHUB_ELASTIC_PORT:-9200}/_cluster/health?wait_for_status=yellow&timeout=0s timeout: 5s hostname: elasticsearch image: ${DATAHUB_SEARCH_IMAGE:-elasticsearch}:${DATAHUB_SEARCH_TAG:-7.10.1} @@ -258,7 +258,7 @@ services: interval: 1s retries: 5 start_period: 5s - test: wget http://neo4j:$${DATAHUB_MAPPED_NEO4J_HTTP_PORT:-7474} + test: wget http://neo4j:$${DATAHUB_NEO4J_HTTP_PORT:-7474} timeout: 5s hostname: neo4j image: neo4j:4.4.9-community @@ -280,7 +280,7 @@ services: interval: 1s retries: 3 start_period: 60s - test: nc -z schema-registry ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081} + test: nc -z schema-registry ${DATAHUB_SCHEMA_REGISTRY_PORT:-8081} timeout: 5s hostname: schema-registry image: confluentinc/cp-schema-registry:7.4.0 @@ -295,7 +295,7 @@ services: interval: 5s retries: 3 start_period: 10s - test: echo srvr | nc zookeeper $${DATAHUB_MAPPED_ZK_PORT:-2181} + test: echo srvr | nc zookeeper $${DATAHUB_ZK_PORT:-2181} timeout: 5s hostname: zookeeper image: confluentinc/cp-zookeeper:7.4.0 diff --git a/docs-website/filterTagIndexes.json b/docs-website/filterTagIndexes.json index c154b586fe66e3..419f16e8d8a52d 100644 --- a/docs-website/filterTagIndexes.json +++ b/docs-website/filterTagIndexes.json @@ -605,6 +605,17 @@ "Features": "Notifications, Alerting" } }, + { + "Path": "docs/generated/ingestion/sources/teradata", + "imgPath": "img/logos/platforms/teradata.svg", + "Title": "Teradata", + "Description": "Teradata is a data warehousing and analytics tool that allows users to store, manage, and analyze large amounts of data in a scalable and cost-effective manner.", + "tags": { + "Platform Type": "BI Tool", + "Connection Type": "Pull", + "Features": "Stateful Ingestion, Column Level Lineage, UI Ingestion, Lower Casing, Status Aspect" + } + }, { "Path": "docs/generated/ingestion/sources/trino", "imgPath": "img/logos/platforms/trino.png", diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 21c4cef2e848b7..3263a9f7c15fb3 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -8,7 +8,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe - Updating MySQL version for quickstarts to 8.2, may cause quickstart issues for existing instances. - #9244: The `redshift-legacy` and `redshift-legacy-usage` sources, which have been deprecated for >6 months, have been removed. The new `redshift` source is a superset of the functionality provided by those legacy sources. - +- `database_alias` config is no longer supported in SQL sources namely - Redshift, MySQL, Oracle, Postgres, Trino, Presto-on-hive. The config will automatically be ignored if it's present in your recipe. It has been deprecated since v0.9.6. ### Potential Downtime ### Deprecations diff --git a/entity-registry/custom-test-model/build.gradle b/entity-registry/custom-test-model/build.gradle index 778e2e42b95c44..8e17de0709188b 100644 --- a/entity-registry/custom-test-model/build.gradle +++ b/entity-registry/custom-test-model/build.gradle @@ -2,7 +2,11 @@ import org.yaml.snakeyaml.Yaml buildscript { repositories{ - mavenCentral() + if (project.hasProperty('apacheMavenRepositoryUrl')) { + maven { url project.getProperty('apacheMavenRepositoryUrl') } + } else { + mavenCentral() + } } dependencies { classpath("org.yaml:snakeyaml:1.33") diff --git a/metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source.md b/metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source.md index 9e39d24fb85782..a152697988c6ff 100644 --- a/metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source.md +++ b/metadata-ingestion/docs/dev_guides/add_stateful_ingestion_to_source.md @@ -5,160 +5,75 @@ the [Redunant Run Elimination](./stateful.md#redundant-run-elimination) use-case capability available for the sources. This document describes how to add support for these two use-cases to new sources. ## Adding Stale Metadata Removal to a Source -Adding the stale metadata removal use-case to a new source involves -1. Defining the new checkpoint state that stores the list of entities emitted from a specific ingestion run. -2. Modifying the `SourceConfig` associated with the source to use a custom `stateful_ingestion` config param. -3. Modifying the `SourceReport` associated with the source to include soft-deleted entities in the report. -4. Modifying the `Source` to - 1. Instantiate the StaleEntityRemovalHandler object - 2. Add entities from the current run to the state object - 3. Emit stale metadata removal workunits + +Adding the stale metadata removal use-case to a new source involves modifying the source config, source report, and the source itself. + +For a full example of all changes required: [Adding stale metadata removal to the MongoDB source](https://github.com/datahub-project/datahub/pull/9118). The [datahub.ingestion.source.state.stale_entity_removal_handler](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py) module provides the supporting infrastructure for all the steps described above and substantially simplifies the implementation on the source side. Below is a detailed explanation of each of these steps along with examples. -### 1. Defining the checkpoint state for the source. -The checkpoint state class is responsible for tracking the entities emitted from each ingestion run. If none of the existing states do not meet the needs of the new source, a new checkpoint state must be created. The state must -inherit from the `StaleEntityCheckpointStateBase` abstract class shown below, and implement each of the abstract methods. -```python -class StaleEntityCheckpointStateBase(CheckpointStateBase, ABC, Generic[Derived]): - """ - Defines the abstract interface for the checkpoint states that are used for stale entity removal. - Examples include sql_common state for tracking table and & view urns, - dbt that tracks node & assertion urns, kafka state tracking topic urns. - """ - - @classmethod - @abstractmethod - def get_supported_types(cls) -> List[str]: - pass - - @abstractmethod - def add_checkpoint_urn(self, type: str, urn: str) -> None: - """ - Adds an urn into the list used for tracking the type. - :param type: The type of the urn such as a 'table', 'view', - 'node', 'topic', 'assertion' that the concrete sub-class understands. - :param urn: The urn string - :return: None. - """ - pass - - @abstractmethod - def get_urns_not_in( - self, type: str, other_checkpoint_state: Derived - ) -> Iterable[str]: - """ - Gets the urns present in this checkpoint but not the other_checkpoint for the given type. - :param type: The type of the urn such as a 'table', 'view', - 'node', 'topic', 'assertion' that the concrete sub-class understands. - :param other_checkpoint_state: the checkpoint state to compute the urn set difference against. - :return: an iterable to the set of urns present in this checkpoing state but not in the other_checkpoint. - """ - pass -``` - -Examples: -* [BaseSQLAlchemyCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/sql_common_state.py#L17) - -### 2. Modifying the SourceConfig +### 1. Modify the source config The source's config must inherit from `StatefulIngestionConfigBase`, and should declare a field named `stateful_ingestion` of type `Optional[StatefulStaleMetadataRemovalConfig]`. -Examples: -- The `KafkaSourceConfig` +Example: + ```python -from typing import List, Optional -import pydantic -from datahub.ingestion.source.state.stale_entity_removal_handler import StatefulStaleMetadataRemovalConfig -from datahub.ingestion.source.state.stateful_ingestion_base import ( +from datahub.ingestion.source.state.stale_entity_removal_handler import ( + StatefulStaleMetadataRemovalConfig, StatefulIngestionConfigBase, ) -class KafkaSourceConfig(StatefulIngestionConfigBase): +class MySourceConfig(StatefulIngestionConfigBase): # ...... stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None ``` -### 3. Modifying the SourceReport -The report class of the source should inherit from `StaleEntityRemovalSourceReport` whose definition is shown below. -```python -from typing import List -from dataclasses import dataclass, field -from datahub.ingestion.source.state.stateful_ingestion_base import StatefulIngestionReport -@dataclass -class StaleEntityRemovalSourceReport(StatefulIngestionReport): - soft_deleted_stale_entities: List[str] = field(default_factory=list) +### 2. Modify the source report - def report_stale_entity_soft_deleted(self, urn: str) -> None: - self.soft_deleted_stale_entities.append(urn) -``` +The report class of the source should inherit from `StaleEntityRemovalSourceReport` instead of `SourceReport`. -Examples: -* The `KafkaSourceReport` ```python -from dataclasses import dataclass -from datahub.ingestion.source.state.stale_entity_removal_handler import StaleEntityRemovalSourceReport +from datahub.ingestion.source.state.stale_entity_removal_handler import ( + StaleEntityRemovalSourceReport, +) + @dataclass -class KafkaSourceReport(StaleEntityRemovalSourceReport): - # + pass ``` -### 4. Modifying the Source -The source must inherit from `StatefulIngestionSourceBase`. +### 3. Modify the source -#### 4.1 Instantiate StaleEntityRemovalHandler in the `__init__` method of the source. +1. The source must inherit from `StatefulIngestionSourceBase` instead of `Source`. +2. The source should contain a custom `get_workunit_processors` method. -Examples: -1. The `KafkaSource` ```python from datahub.ingestion.source.state.stateful_ingestion_base import StatefulIngestionSourceBase from datahub.ingestion.source.state.stale_entity_removal_handler import StaleEntityRemovalHandler -class KafkaSource(StatefulIngestionSourceBase): - def __init__(self, config: KafkaSourceConfig, ctx: PipelineContext): - # - # Create and register the stateful ingestion stale entity removal handler. - self.stale_entity_removal_handler = StaleEntityRemovalHandler( - source=self, - config=self.source_config, - state_type_class=KafkaCheckpointState, - pipeline_name=self.ctx.pipeline_name, - run_id=self.ctx.run_id, - ) -``` -#### 4.2 Adding entities from current run to the state object. -Use the `add_entity_to_state` method of the `StaleEntityRemovalHandler`. -Examples: -```python -# Kafka -self.stale_entity_removal_handler.add_entity_to_state( - type="topic", - urn=topic_urn,) - -# DBT -self.stale_entity_removal_handler.add_entity_to_state( - type="dataset", - urn=node_datahub_urn -) -self.stale_entity_removal_handler.add_entity_to_state( - type="assertion", - urn=node_datahub_urn, -) -``` +class MySource(StatefulIngestionSourceBase): + def __init__(self, config: MySourceConfig, ctx: PipelineContext): + super().__init__(config, ctx) -#### 4.3 Emitting soft-delete workunits associated with the stale entities. -```python -def get_workunits(self) -> Iterable[MetadataWorkUnit]: - # - # Emit the rest of the workunits for the source. - # NOTE: Populating the current state happens during the execution of this code. - # ... - - # Clean up stale entities at the end - yield from self.stale_entity_removal_handler.gen_removed_entity_workunits() + self.config = config + self.report = MySourceReport() + + # other initialization code here + + def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: + return [ + *super().get_workunit_processors(), + StaleEntityRemovalHandler.create( + self, self.config, self.ctx + ).workunit_processor, + ] + + # other methods here ``` ## Adding Redundant Run Elimination to a Source @@ -168,12 +83,13 @@ as snowflake usage, bigquery usage etc.). It typically involves expensive and lo run elimination to a new source to prevent the expensive reruns for the same time range(potentially due to a user error or a scheduler malfunction), the following steps are required. + 1. Update the `SourceConfig` 2. Update the `SourceReport` -3. Modify the `Source` to - 1. Instantiate the RedundantRunSkipHandler object. - 2. Check if the current run should be skipped. - 3. Update the state for the current run(start & end times). +3. Modify the `Source` to + 1. Instantiate the RedundantRunSkipHandler object. + 2. Check if the current run should be skipped. + 3. Update the state for the current run(start & end times). The [datahub.ingestion.source.state.redundant_run_skip_handler](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py) modules provides the supporting infrastructure required for all the steps described above. @@ -181,11 +97,15 @@ modules provides the supporting infrastructure required for all the steps descri NOTE: The handler currently uses a simple state, the [BaseUsageCheckpointState](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/usage_common_state.py), across all sources it supports (unlike the StaleEntityRemovalHandler). + ### 1. Modifying the SourceConfig + The `SourceConfig` must inherit from the [StatefulRedundantRunSkipConfig](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/redundant_run_skip_handler.py#L23) class. Examples: + 1. Snowflake Usage + ```python from datahub.ingestion.source.state.redundant_run_skip_handler import ( StatefulRedundantRunSkipConfig, @@ -193,27 +113,36 @@ from datahub.ingestion.source.state.redundant_run_skip_handler import ( class SnowflakeStatefulIngestionConfig(StatefulRedundantRunSkipConfig): pass ``` + ### 2. Modifying the SourceReport + The `SourceReport` must inherit from the [StatefulIngestionReport](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py#L102) class. Examples: + 1. Snowflake Usage + ```python @dataclass class SnowflakeUsageReport(BaseSnowflakeReport, StatefulIngestionReport): # ``` + ### 3. Modifying the Source + The source must inherit from `StatefulIngestionSourceBase`. + #### 3.1 Instantiate RedundantRunSkipHandler in the `__init__` method of the source. + The source should instantiate an instance of the `RedundantRunSkipHandler` in its `__init__` method. Examples: Snowflake Usage + ```python from datahub.ingestion.source.state.redundant_run_skip_handler import ( RedundantRunSkipHandler, ) class SnowflakeUsageSource(StatefulIngestionSourceBase): - + def __init__(self, config: SnowflakeUsageConfig, ctx: PipelineContext): super(SnowflakeUsageSource, self).__init__(config, ctx) self.config: SnowflakeUsageConfig = config @@ -226,10 +155,13 @@ class SnowflakeUsageSource(StatefulIngestionSourceBase): run_id=self.ctx.run_id, ) ``` + #### 3.2 Checking if the current run should be skipped. + The sources can query if the current run should be skipped using `should_skip_this_run` method of `RedundantRunSkipHandler`. This should done from the `get_workunits` method, before doing any other work. Example code: + ```python def get_workunits(self) -> Iterable[MetadataWorkUnit]: # Skip a redundant run @@ -239,10 +171,13 @@ def get_workunits(self) -> Iterable[MetadataWorkUnit]: return # Generate the workunits. ``` + #### 3.3 Updating the state for the current run. + The source should use the `update_state` method of `RedundantRunSkipHandler` to update the current run's state if the run has not been skipped. This step can be performed in the `get_workunits` if the run has not been skipped. Example code: + ```python def get_workunits(self) -> Iterable[MetadataWorkUnit]: # Skip a redundant run @@ -250,7 +185,7 @@ Example code: cur_start_time_millis=self.config.start_time ): return - + # Generate the workunits. # # Update checkpoint state for this run. @@ -258,4 +193,4 @@ Example code: start_time_millis=self.config.start_time, end_time_millis=self.config.end_time, ) -``` \ No newline at end of file +``` diff --git a/metadata-ingestion/docs/sources/mssql/mssql_recipe.yml b/metadata-ingestion/docs/sources/mssql/mssql_recipe.yml index 5f1e24ce1e9561..93be7a86d72cce 100644 --- a/metadata-ingestion/docs/sources/mssql/mssql_recipe.yml +++ b/metadata-ingestion/docs/sources/mssql/mssql_recipe.yml @@ -9,6 +9,14 @@ source: username: user password: pass + # Options + # Uncomment if you need to use encryption with pytds + # See https://python-tds.readthedocs.io/en/latest/pytds.html#pytds.connect + # options: + # connect_args: + # cafile: server-ca.pem + # validate_host: true + sink: # sink configs diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 2b002164a49b95..4f5f09fb148fa8 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -214,7 +214,8 @@ # - 0.6.13 adds a small fix for Databricks # - 0.6.14 uses pure-sasl instead of sasl so it builds on Python 3.11 # - 0.6.15 adds support for thrift > 0.14 (cherry-picked from https://github.com/apache/thrift/pull/2491) - "acryl-pyhive[hive_pure_sasl]==0.6.15", + # - 0.6.16 fixes a regression in 0.6.15 (https://github.com/acryldata/PyHive/pull/9) + "acryl-pyhive[hive-pure-sasl]==0.6.16", # As per https://github.com/datahub-project/datahub/issues/8405 # and https://github.com/dropbox/PyHive/issues/417, version 0.14.0 # of thrift broke PyHive's hive+http transport. @@ -350,7 +351,7 @@ "mlflow": {"mlflow-skinny>=2.3.0"}, "mode": {"requests", "tenacity>=8.0.1"} | sqllineage_lib, "mongodb": {"pymongo[srv]>=3.11", "packaging"}, - "mssql": sql_common | {"sqlalchemy-pytds>=0.3"}, + "mssql": sql_common | {"sqlalchemy-pytds>=0.3", "pyOpenSSL"}, "mssql-odbc": sql_common | {"pyodbc"}, "mysql": mysql, # mariadb should have same dependency as mysql diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index 8940642f7008a7..a272b6e3cffcf2 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -33,6 +33,7 @@ auto_materialize_referenced_tags, auto_status_aspect, auto_workunit_reporter, + re_emit_browse_path_v2, ) from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent @@ -278,13 +279,14 @@ def _get_browse_path_processor(self, dry_run: bool) -> MetadataWorkUnitProcessor if isinstance(config, PlatformInstanceConfigMixin) and config.platform_instance: platform_instance = config.platform_instance - return partial( + browse_path_processor = partial( auto_browse_path_v2, platform=platform, platform_instance=platform_instance, drop_dirs=[s for s in browse_path_drop_dirs if s is not None], dry_run=dry_run, ) + return lambda stream: re_emit_browse_path_v2(browse_path_processor(stream)) class TestableSource(Source): diff --git a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py index fae260226195ce..66365ef0cdc45d 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source_helpers.py @@ -198,6 +198,21 @@ def auto_lowercase_urns( yield wu +def re_emit_browse_path_v2( + stream: Iterable[MetadataWorkUnit], +) -> Iterable[MetadataWorkUnit]: + """Re-emit browse paths v2 aspects, to avoid race condition where server overwrites with default.""" + browse_path_v2_workunits = [] + + for wu in stream: + yield wu + if wu.is_primary_source and wu.get_aspect_of_type(BrowsePathsV2Class): + browse_path_v2_workunits.append(wu) + + for wu in browse_path_v2_workunits: + yield wu + + def auto_browse_path_v2( stream: Iterable[MetadataWorkUnit], *, diff --git a/metadata-ingestion/src/datahub/ingestion/source/ldap.py b/metadata-ingestion/src/datahub/ingestion/source/ldap.py index e1d035a96d42fe..72985688273f60 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ldap.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ldap.py @@ -1,4 +1,5 @@ """LDAP Source""" +import contextlib import dataclasses from typing import Any, Dict, Iterable, List, Optional @@ -390,10 +391,10 @@ def build_corp_user_mce( country_code = get_attr_or_none( attrs, self.config.user_attrs_map["countryCode"] ) - if department_id_str: - department_id = int(department_id_str) - else: - department_id = None + department_id = None + with contextlib.suppress(ValueError): + if department_id_str: + department_id = int(department_id_str) custom_props_map = {} if self.config.custom_props_list: diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index 24145d60210ff0..9f09a4322bb5df 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -54,6 +54,8 @@ class MetabaseConfig(DatasetLineageProviderConfigBase): password: Optional[pydantic.SecretStr] = Field( default=None, description="Metabase password." ) + # TODO: Check and remove this if no longer needed. + # Config database_alias is removed from sql sources. database_alias_map: Optional[dict] = Field( default=None, description="Database name map to use when constructing dataset URN.", diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/common.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/common.py deleted file mode 100644 index 80657c69f88fac..00000000000000 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/common.py +++ /dev/null @@ -1,12 +0,0 @@ -from datahub.ingestion.source.redshift.config import RedshiftConfig - -redshift_datetime_format = "%Y-%m-%d %H:%M:%S" - - -def get_db_name(config: RedshiftConfig) -> str: - db_name = config.database - db_alias = config.database_alias - - db_name = db_alias or db_name - assert db_name is not None, "database name or alias must be specified" - return db_name diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py index 9cbf1823db9395..95038ef2c6212d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/config.py @@ -8,7 +8,7 @@ from datahub.configuration import ConfigModel from datahub.configuration.common import AllowDenyPattern from datahub.configuration.source_common import DatasetLineageProviderConfigBase -from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated +from datahub.configuration.validate_field_removal import pydantic_removed_field from datahub.ingestion.source.data_lake_common.path_spec import PathSpec from datahub.ingestion.source.sql.postgres import BasePostgresConfig from datahub.ingestion.source.state.stateful_ingestion_base import ( @@ -87,10 +87,7 @@ class RedshiftConfig( hidden_from_schema=True, ) - _database_alias_deprecation = pydantic_field_deprecated( - "database_alias", - message="database_alias is deprecated. Use platform_instance instead.", - ) + _database_alias_removed = pydantic_removed_field("database_alias") default_schema: str = Field( default="public", @@ -151,10 +148,8 @@ def check_email_is_set_on_usage(cls, values): return values @root_validator(skip_on_failure=True) - def check_database_or_database_alias_set(cls, values): - assert values.get("database") or values.get( - "database_alias" - ), "either database or database_alias must be set" + def check_database_is_set(cls, values): + assert values.get("database"), "database must be set" return values @root_validator(skip_on_failure=True) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py index c9ddfbe92ab2ab..05011b2d7a7694 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py @@ -16,7 +16,6 @@ from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.source.aws.s3_util import strip_s3_prefix -from datahub.ingestion.source.redshift.common import get_db_name from datahub.ingestion.source.redshift.config import LineageMode, RedshiftConfig from datahub.ingestion.source.redshift.query import RedshiftQuery from datahub.ingestion.source.redshift.redshift_schema import ( @@ -266,7 +265,7 @@ def _populate_lineage_map( try: cll: Optional[List[sqlglot_l.ColumnLineageInfo]] = None raw_db_name = database - alias_db_name = get_db_name(self.config) + alias_db_name = self.config.database for lineage_row in RedshiftDataDictionary.get_lineage_rows( conn=connection, query=query diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index c7d01021773b12..04f0edf5045951 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -38,7 +38,6 @@ DatasetContainerSubTypes, DatasetSubTypes, ) -from datahub.ingestion.source.redshift.common import get_db_name from datahub.ingestion.source.redshift.config import RedshiftConfig from datahub.ingestion.source.redshift.lineage import RedshiftLineageExtractor from datahub.ingestion.source.redshift.profile import RedshiftProfiler @@ -115,6 +114,10 @@ @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration") @capability(SourceCapability.DESCRIPTIONS, "Enabled by default") @capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration") +@capability( + SourceCapability.LINEAGE_FINE, + "Optionally enabled via configuration (`mixed` or `sql_based` lineage needs to be enabled)", +) @capability( SourceCapability.USAGE_STATS, "Enabled by default, can be disabled via configuration `include_usage_statistics`", @@ -393,8 +396,8 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: connection = RedshiftSource.get_redshift_connection(self.config) - database = get_db_name(self.config) - logger.info(f"Processing db {self.config.database} with name {database}") + database = self.config.database + logger.info(f"Processing db {database}") self.report.report_ingestion_stage_start(METADATA_EXTRACTION) self.db_tables[database] = defaultdict() self.db_views[database] = defaultdict() @@ -628,7 +631,7 @@ def gen_view_dataset_workunits( ) -> Iterable[MetadataWorkUnit]: yield from self.gen_dataset_workunits( table=view, - database=get_db_name(self.config), + database=self.config.database, schema=schema, sub_type=DatasetSubTypes.VIEW, custom_properties={}, diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py index bbb1876102578c..c789e605b9c29f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/usage.py @@ -359,10 +359,6 @@ def _gen_access_events_from_history_query( self.report.num_usage_stat_skipped += 1 continue - # Replace database name with the alias name if one is provided in the config. - if self.config.database_alias: - access_event.database = self.config.database_alias - if not self._should_process_event(access_event, all_tables=all_tables): self.report.num_usage_stat_skipped += 1 continue diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index 710825c8ba55da..2442df595d9677 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -48,6 +48,7 @@ ) from datahub.metadata.schema_classes import ( BooleanTypeClass, + NumberTypeClass, StringTypeClass, UnionTypeClass, ) @@ -55,6 +56,8 @@ logger: logging.Logger = logging.getLogger(__name__) register_custom_type(sqlalchemy.dialects.mssql.BIT, BooleanTypeClass) +register_custom_type(sqlalchemy.dialects.mssql.MONEY, NumberTypeClass) +register_custom_type(sqlalchemy.dialects.mssql.SMALLMONEY, NumberTypeClass) register_custom_type(sqlalchemy.dialects.mssql.SQL_VARIANT, UnionTypeClass) register_custom_type(sqlalchemy.dialects.mssql.UNIQUEIDENTIFIER, StringTypeClass) @@ -135,7 +138,7 @@ def host(self): @property def db(self): - return self.database_alias or self.database + return self.database @platform_name("Microsoft SQL Server", id="mssql") @@ -152,7 +155,7 @@ class SQLServerSource(SQLAlchemySource): - Metadata for databases, schemas, views and tables - Column types associated with each table/view - Table, row, and column statistics via optional SQL profiling - We have two options for the underlying library used to connect to SQL Server: (1) [python-tds](https://github.com/denisenkom/pytds) and (2) [pyodbc](https://github.com/mkleehammer/pyodbc). The TDS library is pure Python and hence easier to install, but only PyODBC supports encrypted connections. + We have two options for the underlying library used to connect to SQL Server: (1) [python-tds](https://github.com/denisenkom/pytds) and (2) [pyodbc](https://github.com/mkleehammer/pyodbc). The TDS library is pure Python and hence easier to install. """ def __init__(self, config: SQLServerConfig, ctx: PipelineContext): @@ -657,10 +660,7 @@ def get_identifier( regular = f"{schema}.{entity}" qualified_table_name = regular if self.config.database: - if self.config.database_alias: - qualified_table_name = f"{self.config.database_alias}.{regular}" - else: - qualified_table_name = f"{self.config.database}.{regular}" + qualified_table_name = f"{self.config.database}.{regular}" if self.current_database: qualified_table_name = f"{self.current_database}.{regular}" return ( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py index 891b64066721bd..2126717f835a26 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py @@ -54,11 +54,7 @@ class MySQLConnectionConfig(SQLAlchemyConnectionConfig): class MySQLConfig(MySQLConnectionConfig, TwoTierSQLAlchemyConfig): def get_identifier(self, *, schema: str, table: str) -> str: - regular = f"{schema}.{table}" - if self.database_alias: - return f"{self.database_alias}.{table}" - else: - return regular + return f"{schema}.{table}" @platform_name("MySQL") diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index f2e1fe00ec8a3d..7ee54200c6493c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -88,8 +88,6 @@ def get_sql_alchemy_url(self): def get_identifier(self, schema: str, table: str) -> str: regular = f"{schema}.{table}" if self.add_database_name_to_urn: - if self.database_alias: - return f"{self.database_alias}.{regular}" if self.database: return f"{self.database}.{regular}" return regular diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py index c8418075928efa..5d1e37fbb68a37 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py @@ -139,7 +139,6 @@ class PostgresSource(SQLAlchemySource): - Metadata for databases, schemas, views, and tables - Column types associated with each table - Also supports PostGIS extensions - - database_alias (optional) can be used to change the name of database to be ingested - Table, row, and column statistics via optional SQL profiling """ @@ -271,8 +270,6 @@ def get_identifier( ) -> str: regular = f"{schema}.{entity}" if self.config.database: - if self.config.database_alias: - return f"{self.config.database_alias}.{regular}" return f"{self.config.database}.{regular}" current_database = self.get_db_name(inspector) return f"{current_database}.{regular}" diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py index ceb9ecacb25d21..9657fdab9e2e31 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py @@ -329,8 +329,6 @@ def __init__(self, config: PrestoOnHiveConfig, ctx: PipelineContext) -> None: ) def get_db_name(self, inspector: Inspector) -> str: - if self.config.database_alias: - return f"{self.config.database_alias}" if self.config.database: return f"{self.config.database}" else: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 80f828e9ea2fd1..67af6b2010c832 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -1054,7 +1054,7 @@ def _run_sql_parser( return view_definition_lineage_helper(raw_lineage, view_urn) def get_db_schema(self, dataset_identifier: str) -> Tuple[Optional[str], str]: - database, schema, _view = dataset_identifier.split(".") + database, schema, _view = dataset_identifier.split(".", 2) return database, schema def get_profiler_instance(self, inspector: Inspector) -> "DatahubGEProfiler": diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py index 6a76ae847218d5..54edab6f3b84ba 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_config.py @@ -11,7 +11,7 @@ DatasetSourceConfigMixin, LowerCaseDatasetUrnConfigMixin, ) -from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated +from datahub.configuration.validate_field_removal import pydantic_removed_field from datahub.ingestion.source.ge_profiling_config import GEProfilingConfig from datahub.ingestion.source.state.stale_entity_removal_handler import ( StatefulStaleMetadataRemovalConfig, @@ -129,10 +129,6 @@ class SQLAlchemyConnectionConfig(ConfigModel): host_port: str = Field(description="host URL") database: Optional[str] = Field(default=None, description="database (catalog)") - database_alias: Optional[str] = Field( - default=None, - description="[Deprecated] Alias to apply to database when ingesting.", - ) scheme: str = Field(description="scheme") sqlalchemy_uri: Optional[str] = Field( default=None, @@ -149,10 +145,7 @@ class SQLAlchemyConnectionConfig(ConfigModel): ), ) - _database_alias_deprecation = pydantic_field_deprecated( - "database_alias", - message="database_alias is deprecated. Use platform_instance instead.", - ) + _database_alias_removed = pydantic_removed_field("database_alias") def get_sql_alchemy_url( self, uri_opts: Optional[Dict[str, Any]] = None, database: Optional[str] = None diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index 2b693d9d80d91a..cb2e05765bfff6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -136,12 +136,9 @@ class TrinoConfig(BasicSQLAlchemyConfig): scheme: str = Field(default="trino", description="", hidden_from_docs=True) def get_identifier(self: BasicSQLAlchemyConfig, schema: str, table: str) -> str: - regular = f"{schema}.{table}" - identifier = regular - if self.database_alias: - identifier = f"{self.database_alias}.{regular}" - elif self.database: - identifier = f"{self.database}.{regular}" + identifier = f"{schema}.{table}" + if self.database: # TODO: this should be required field + identifier = f"{self.database}.{identifier}" return ( f"{self.platform_instance}.{identifier}" if self.platform_instance @@ -173,8 +170,6 @@ def __init__( super().__init__(config, ctx, platform) def get_db_name(self, inspector: Inspector) -> str: - if self.config.database_alias: - return f"{self.config.database_alias}" if self.config.database: return f"{self.config.database}" else: diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py index 1ae971e4a82d0a..7f607666db3136 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/superset.py +++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py @@ -96,6 +96,8 @@ class SupersetConfig(StatefulIngestionConfigBase, ConfigModel): default=DEFAULT_ENV, description="Environment to use in namespace when constructing URNs", ) + # TODO: Check and remove this if no longer needed. + # Config database_alias is removed from sql sources. database_alias: Dict[str, str] = Field( default={}, description="Can be used to change mapping for database names in superset to what you have in datahub", diff --git a/metadata-ingestion/tests/integration/mysql/mysql_to_file_dbalias.yml b/metadata-ingestion/tests/integration/mysql/mysql_to_file_dbalias.yml index 1c324641fe1583..89b87505ab527e 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_to_file_dbalias.yml +++ b/metadata-ingestion/tests/integration/mysql/mysql_to_file_dbalias.yml @@ -6,7 +6,6 @@ source: username: root password: example database: metagalaxy - database_alias: foogalaxy host_port: localhost:53307 schema_pattern: allow: diff --git a/metadata-ingestion/tests/integration/mysql/test_mysql.py b/metadata-ingestion/tests/integration/mysql/test_mysql.py index 8c8626a2d2297c..23fd97ff2671ed 100644 --- a/metadata-ingestion/tests/integration/mysql/test_mysql.py +++ b/metadata-ingestion/tests/integration/mysql/test_mysql.py @@ -75,27 +75,3 @@ def test_mysql_ingest_no_db( output_path=tmp_path / "mysql_mces.json", golden_path=test_resources_dir / golden_file, ) - - -@freeze_time(FROZEN_TIME) -@pytest.mark.integration -def test_mysql_ingest_with_db_alias( - mysql_runner, pytestconfig, test_resources_dir, tmp_path, mock_time -): - # Run the metadata ingestion pipeline. - config_file = (test_resources_dir / "mysql_to_file_dbalias.yml").resolve() - run_datahub_cmd(["ingest", "-c", f"{config_file}"], tmp_path=tmp_path) - - # Verify the output. - # Assert that all events generated have instance specific urns - import re - - urn_pattern = "^" + re.escape( - "urn:li:dataset:(urn:li:dataPlatform:mysql,foogalaxy." - ) - mce_helpers.assert_mcp_entity_urn( - filter="ALL", - entity_type="dataset", - regex_pattern=urn_pattern, - file=tmp_path / "mysql_mces_dbalias.json", - ) diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json index 45d13229b2d85e..5607075ed568f9 100644 --- a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json +++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_1.json @@ -1,7 +1,7 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -9,19 +9,20 @@ "customProperties": { "platform": "hive", "env": "PROD", - "database": "hive" + "database": "metastore" }, - "name": "hive" + "name": "metastore" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -31,12 +32,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -46,12 +48,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -63,12 +66,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -78,12 +82,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -91,7 +96,7 @@ "customProperties": { "platform": "hive", "env": "PROD", - "database": "hive", + "database": "metastore", "schema": "db1" }, "name": "db1" @@ -99,12 +104,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -114,12 +120,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -129,12 +136,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -146,42 +154,45 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "container": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -191,12 +202,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -264,15 +276,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "totalSize": "0", + "numRows": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "transient_lastDdlTime": "1688395014", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "map_test", "tags": [] @@ -283,7 +295,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -300,7 +313,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -312,19 +326,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -334,12 +349,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -464,15 +480,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "numRows": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", - "numRows": "0", "totalSize": "0", - "transient_lastDdlTime": "1688395014", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "union_test", "tags": [] @@ -483,7 +499,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -500,7 +517,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -512,19 +530,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -534,12 +553,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -656,15 +676,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "totalSize": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", "numRows": "0", - "totalSize": "0", - "transient_lastDdlTime": "1688395014", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "nested_struct_test", "tags": [] @@ -675,7 +695,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -692,7 +713,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -704,19 +726,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -726,12 +749,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -832,17 +856,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "comment": "This table has array of structs", - "numFiles": "1", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1688395011", - "rawDataSize": "32", + "another.comment": "This table has no partitions", "numRows": "1", + "rawDataSize": "32", "totalSize": "33", - "another.comment": "This table has no partitions", + "numFiles": "1", + "transient_lastDdlTime": "1700805674", + "comment": "This table has array of structs", + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -854,7 +878,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -871,7 +896,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -883,19 +909,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -905,12 +932,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1005,15 +1033,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "transient_lastDdlTime": "1700805671", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1688395008", "rawDataSize": "0", "numRows": "0", "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "struct_test", "tags": [] @@ -1024,7 +1052,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1041,7 +1070,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1053,19 +1083,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1075,12 +1106,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1145,15 +1177,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "transient_lastDdlTime": "1700805671", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1688395008", "rawDataSize": "0", "numRows": "0", "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "_test_table_underscore", "tags": [] @@ -1164,7 +1196,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1181,7 +1214,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1193,19 +1227,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1215,12 +1250,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1299,10 +1335,10 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastDdlTime": "1688395005", + "transient_lastDdlTime": "1700805669", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-07-03", + "create_date": "2023-11-24", "partitioned_columns": "baz" }, "name": "pokes", @@ -1314,7 +1350,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1331,7 +1368,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1343,19 +1381,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1365,12 +1404,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1440,7 +1480,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1457,7 +1498,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1474,7 +1516,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1486,19 +1529,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1508,12 +1552,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1630,7 +1675,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1647,7 +1693,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1664,7 +1711,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1676,19 +1724,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json index 4ec71eb8c39c67..45f78eb61c15b2 100644 --- a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json +++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_2.json @@ -1,7 +1,7 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", + "entityUrn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -9,19 +9,20 @@ "customProperties": { "platform": "presto-on-hive", "env": "PROD", - "database": "hive" + "database": "metastore" }, - "name": "hive" + "name": "metastore" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", + "entityUrn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -31,12 +32,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", + "entityUrn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -46,12 +48,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", + "entityUrn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -63,12 +66,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", + "entityUrn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -78,12 +82,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", + "entityUrn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -91,7 +96,7 @@ "customProperties": { "platform": "presto-on-hive", "env": "PROD", - "database": "hive", + "database": "metastore", "schema": "db1" }, "name": "db1" @@ -99,12 +104,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", + "entityUrn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -114,12 +120,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", + "entityUrn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -129,12 +136,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", + "entityUrn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -146,42 +154,45 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", + "entityUrn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "container": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", + "entityUrn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -191,12 +202,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -264,15 +276,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "totalSize": "0", + "numRows": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "transient_lastDdlTime": "1688395014", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "map_test", "tags": [] @@ -283,7 +295,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -300,7 +313,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -312,19 +326,20 @@ "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -334,12 +349,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -464,15 +480,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "numRows": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", - "numRows": "0", "totalSize": "0", - "transient_lastDdlTime": "1688395014", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "union_test", "tags": [] @@ -483,7 +499,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -500,7 +517,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -512,19 +530,20 @@ "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -534,12 +553,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -656,15 +676,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "totalSize": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", "numRows": "0", - "totalSize": "0", - "transient_lastDdlTime": "1688395014", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "nested_struct_test", "tags": [] @@ -675,7 +695,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -692,7 +713,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -704,19 +726,20 @@ "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -726,12 +749,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -832,17 +856,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "comment": "This table has array of structs", - "numFiles": "1", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1688395011", - "rawDataSize": "32", + "another.comment": "This table has no partitions", "numRows": "1", + "rawDataSize": "32", "totalSize": "33", - "another.comment": "This table has no partitions", + "numFiles": "1", + "transient_lastDdlTime": "1700805674", + "comment": "This table has array of structs", + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -854,7 +878,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -871,7 +896,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -883,19 +909,20 @@ "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -905,12 +932,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1005,15 +1033,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "transient_lastDdlTime": "1700805671", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1688395008", "rawDataSize": "0", "numRows": "0", "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "struct_test", "tags": [] @@ -1024,7 +1052,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1041,7 +1070,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1053,19 +1083,20 @@ "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1075,12 +1106,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1145,15 +1177,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "transient_lastDdlTime": "1700805671", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1688395008", "rawDataSize": "0", "numRows": "0", "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "_test_table_underscore", "tags": [] @@ -1164,7 +1196,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1181,7 +1214,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1193,19 +1227,20 @@ "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1215,12 +1250,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1299,10 +1335,10 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastDdlTime": "1688395005", + "transient_lastDdlTime": "1700805669", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-07-03", + "create_date": "2023-11-24", "partitioned_columns": "baz" }, "name": "pokes", @@ -1314,7 +1350,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1331,7 +1368,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1343,19 +1381,20 @@ "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1365,12 +1404,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1453,7 +1493,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1470,7 +1511,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1487,7 +1529,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1499,19 +1542,20 @@ "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_3.json b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_3.json index 824524782a8e3e..ad1e46eb8fbb02 100644 --- a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_3.json +++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_3.json @@ -1,7 +1,7 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -9,19 +9,20 @@ "customProperties": { "platform": "hive", "env": "PROD", - "database": "hive" + "database": "metastore" }, - "name": "hive" + "name": "metastore" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -31,12 +32,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -46,12 +48,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -63,12 +66,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -78,12 +82,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -91,7 +96,7 @@ "customProperties": { "platform": "hive", "env": "PROD", - "database": "hive", + "database": "metastore", "schema": "db1" }, "name": "db1" @@ -99,12 +104,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -114,12 +120,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -129,12 +136,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -146,63 +154,67 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "container": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.map_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.map_test,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.map_test,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.map_test,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -211,7 +223,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.map_test", + "schemaName": "metastore.db1.map_test", "platform": "urn:li:dataPlatform:hive", "version": 0, "created": { @@ -264,15 +276,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "totalSize": "0", + "numRows": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "transient_lastDdlTime": "1688395014", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "map_test", "tags": [] @@ -283,12 +295,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.map_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.map_test,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -300,52 +313,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.map_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.map_test,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.union_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.union_test,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.union_test,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.union_test,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -354,7 +370,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.union_test", + "schemaName": "metastore.db1.union_test", "platform": "urn:li:dataPlatform:hive", "version": 0, "created": { @@ -464,15 +480,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "numRows": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", - "numRows": "0", "totalSize": "0", - "transient_lastDdlTime": "1688395014", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "union_test", "tags": [] @@ -483,12 +499,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.union_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.union_test,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -500,52 +517,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.union_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.union_test,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.nested_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.nested_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.nested_struct_test,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.nested_struct_test,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -554,7 +574,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.nested_struct_test", + "schemaName": "metastore.db1.nested_struct_test", "platform": "urn:li:dataPlatform:hive", "version": 0, "created": { @@ -656,15 +676,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "totalSize": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", "numRows": "0", - "totalSize": "0", - "transient_lastDdlTime": "1688395014", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "nested_struct_test", "tags": [] @@ -675,12 +695,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.nested_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.nested_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -692,52 +713,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.nested_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.nested_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -746,7 +770,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.array_struct_test", + "schemaName": "metastore.db1.array_struct_test", "platform": "urn:li:dataPlatform:hive", "version": 0, "created": { @@ -832,17 +856,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "comment": "This table has array of structs", - "numFiles": "1", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1688395011", - "rawDataSize": "32", + "another.comment": "This table has no partitions", "numRows": "1", + "rawDataSize": "32", "totalSize": "33", - "another.comment": "This table has no partitions", + "numFiles": "1", + "transient_lastDdlTime": "1700805674", + "comment": "This table has array of structs", + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -854,12 +878,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -871,52 +896,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.struct_test,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.struct_test,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.struct_test,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -925,7 +953,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.struct_test", + "schemaName": "metastore.db1.struct_test", "platform": "urn:li:dataPlatform:hive", "version": 0, "created": { @@ -1005,15 +1033,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "transient_lastDdlTime": "1700805671", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1688395008", "rawDataSize": "0", "numRows": "0", "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "struct_test", "tags": [] @@ -1024,12 +1052,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.struct_test,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1041,52 +1070,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.struct_test,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1._test_table_underscore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1._test_table_underscore,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1._test_table_underscore,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1._test_table_underscore,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1095,7 +1127,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1._test_table_underscore", + "schemaName": "metastore.db1._test_table_underscore", "platform": "urn:li:dataPlatform:hive", "version": 0, "created": { @@ -1145,15 +1177,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "transient_lastDdlTime": "1700805671", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1688395008", "rawDataSize": "0", "numRows": "0", "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "_test_table_underscore", "tags": [] @@ -1164,12 +1196,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1._test_table_underscore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1._test_table_underscore,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1181,52 +1214,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1._test_table_underscore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1._test_table_underscore,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.pokes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.pokes,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.pokes,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.pokes,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1235,7 +1271,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.pokes", + "schemaName": "metastore.db1.pokes", "platform": "urn:li:dataPlatform:hive", "version": 0, "created": { @@ -1299,10 +1335,10 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastDdlTime": "1688395005", + "transient_lastDdlTime": "1700805669", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-07-03", + "create_date": "2023-11-24", "partitioned_columns": "baz" }, "name": "pokes", @@ -1314,12 +1350,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.pokes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.pokes,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1331,52 +1368,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.pokes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.pokes,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test_presto_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test_presto_view,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test_presto_view,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test_presto_view,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1385,7 +1425,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.array_struct_test_presto_view", + "schemaName": "metastore.db1.array_struct_test_presto_view", "platform": "urn:li:dataPlatform:hive", "version": 0, "created": { @@ -1440,12 +1480,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test_presto_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test_presto_view,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1457,12 +1498,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test_presto_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test_presto_view,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1474,52 +1516,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test_presto_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test_presto_view,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test_view,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test_view,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test_view,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1528,7 +1573,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.array_struct_test_view", + "schemaName": "metastore.db1.array_struct_test_view", "platform": "urn:li:dataPlatform:hive", "version": 0, "created": { @@ -1630,12 +1675,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test_view,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1647,12 +1693,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test_view,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1664,31 +1711,33 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,hive.db1.array_struct_test_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test_view,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_4.json b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_4.json index 3f2980457daa41..007f45238e23f1 100644 --- a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_4.json +++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_4.json @@ -1,7 +1,7 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", + "entityUrn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -9,19 +9,20 @@ "customProperties": { "platform": "presto-on-hive", "env": "PROD", - "database": "hive" + "database": "metastore" }, - "name": "hive" + "name": "metastore" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", + "entityUrn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -31,12 +32,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", + "entityUrn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -46,12 +48,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", + "entityUrn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -63,12 +66,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", + "entityUrn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -78,12 +82,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", + "entityUrn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -91,7 +96,7 @@ "customProperties": { "platform": "presto-on-hive", "env": "PROD", - "database": "hive", + "database": "metastore", "schema": "db1" }, "name": "db1" @@ -99,12 +104,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", + "entityUrn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -114,12 +120,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", + "entityUrn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -129,12 +136,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", + "entityUrn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -146,63 +154,67 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", + "entityUrn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "container": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", + "entityUrn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.map_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.map_test,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.map_test,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.map_test,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -211,7 +223,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.map_test", + "schemaName": "metastore.db1.map_test", "platform": "urn:li:dataPlatform:presto-on-hive", "version": 0, "created": { @@ -264,15 +276,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "totalSize": "0", + "numRows": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "transient_lastDdlTime": "1688395014", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "map_test", "tags": [] @@ -283,12 +295,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.map_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.map_test,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -300,52 +313,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.map_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.map_test,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.union_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.union_test,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.union_test,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.union_test,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -354,7 +370,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.union_test", + "schemaName": "metastore.db1.union_test", "platform": "urn:li:dataPlatform:presto-on-hive", "version": 0, "created": { @@ -464,15 +480,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "numRows": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", - "numRows": "0", "totalSize": "0", - "transient_lastDdlTime": "1688395014", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "union_test", "tags": [] @@ -483,12 +499,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.union_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.union_test,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -500,52 +517,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.union_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.union_test,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.nested_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.nested_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.nested_struct_test,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.nested_struct_test,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -554,7 +574,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.nested_struct_test", + "schemaName": "metastore.db1.nested_struct_test", "platform": "urn:li:dataPlatform:presto-on-hive", "version": 0, "created": { @@ -656,15 +676,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "totalSize": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", "numRows": "0", - "totalSize": "0", - "transient_lastDdlTime": "1688395014", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "nested_struct_test", "tags": [] @@ -675,12 +695,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.nested_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.nested_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -692,52 +713,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.nested_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.nested_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.array_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.array_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.array_struct_test,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.array_struct_test,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -746,7 +770,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.array_struct_test", + "schemaName": "metastore.db1.array_struct_test", "platform": "urn:li:dataPlatform:presto-on-hive", "version": 0, "created": { @@ -832,17 +856,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "comment": "This table has array of structs", - "numFiles": "1", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1688395011", - "rawDataSize": "32", + "another.comment": "This table has no partitions", "numRows": "1", + "rawDataSize": "32", "totalSize": "33", - "another.comment": "This table has no partitions", + "numFiles": "1", + "transient_lastDdlTime": "1700805674", + "comment": "This table has array of structs", + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -854,12 +878,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.array_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.array_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -871,52 +896,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.array_struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.array_struct_test,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.struct_test,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.struct_test,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.struct_test,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -925,7 +953,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.struct_test", + "schemaName": "metastore.db1.struct_test", "platform": "urn:li:dataPlatform:presto-on-hive", "version": 0, "created": { @@ -1005,15 +1033,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "transient_lastDdlTime": "1700805671", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1688395008", "rawDataSize": "0", "numRows": "0", "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "struct_test", "tags": [] @@ -1024,12 +1052,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.struct_test,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1041,52 +1070,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.struct_test,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.struct_test,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1._test_table_underscore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1._test_table_underscore,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1._test_table_underscore,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1._test_table_underscore,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1095,7 +1127,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1._test_table_underscore", + "schemaName": "metastore.db1._test_table_underscore", "platform": "urn:li:dataPlatform:presto-on-hive", "version": 0, "created": { @@ -1145,15 +1177,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { + "transient_lastDdlTime": "1700805671", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1688395008", "rawDataSize": "0", "numRows": "0", "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-07-03" + "create_date": "2023-11-24" }, "name": "_test_table_underscore", "tags": [] @@ -1164,12 +1196,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1._test_table_underscore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1._test_table_underscore,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1181,52 +1214,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1._test_table_underscore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1._test_table_underscore,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.pokes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.pokes,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.pokes,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.pokes,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1235,7 +1271,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.pokes", + "schemaName": "metastore.db1.pokes", "platform": "urn:li:dataPlatform:presto-on-hive", "version": 0, "created": { @@ -1299,10 +1335,10 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastDdlTime": "1688395005", + "transient_lastDdlTime": "1700805669", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-07-03", + "create_date": "2023-11-24", "partitioned_columns": "baz" }, "name": "pokes", @@ -1314,12 +1350,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.pokes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.pokes,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1331,52 +1368,55 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.pokes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.pokes,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.array_struct_test_presto_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.array_struct_test_presto_view,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "container": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.array_struct_test_presto_view,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.array_struct_test_presto_view,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1385,7 +1425,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "hive.db1.array_struct_test_presto_view", + "schemaName": "metastore.db1.array_struct_test_presto_view", "platform": "urn:li:dataPlatform:presto-on-hive", "version": 0, "created": { @@ -1453,12 +1493,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.array_struct_test_presto_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.array_struct_test_presto_view,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1470,12 +1511,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.array_struct_test_presto_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.array_struct_test_presto_view,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { @@ -1487,31 +1529,33 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,hive.db1.array_struct_test_presto_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.array_struct_test_presto_view,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab", - "urn": "urn:li:container:e998a77f6edaa92d1326dec9d37c96ab" + "id": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d", + "urn": "urn:li:container:f4ec3d97ca6750de28020a0d393c289d" }, { - "id": "urn:li:container:bb66ab4651750f727700446f9b3aa2df", - "urn": "urn:li:container:bb66ab4651750f727700446f9b3aa2df" + "id": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f", + "urn": "urn:li:container:5bd3e4d159b00200dfe53d79a486ce7f" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_5.json b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_5.json index a0dd4ab82bf24f..111fc0038bdb8f 100644 --- a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_5.json +++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_mces_golden_5.json @@ -1,7 +1,7 @@ [ { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -9,19 +9,20 @@ "customProperties": { "platform": "hive", "env": "PROD", - "database": "hive" + "database": "metastore" }, - "name": "hive" + "name": "metastore" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -31,12 +32,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -46,12 +48,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -63,12 +66,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", + "entityUrn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -78,12 +82,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { @@ -91,7 +96,7 @@ "customProperties": { "platform": "hive", "env": "PROD", - "database": "hive", + "database": "metastore", "schema": "db1" }, "name": "db1" @@ -99,12 +104,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -114,12 +120,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -129,12 +136,13 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -146,42 +154,45 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "container": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { "entityType": "container", - "entityUrn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", + "entityUrn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -191,12 +202,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -264,15 +276,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastDdlTime": "1690956983", + "totalSize": "0", + "numRows": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-08-02" + "create_date": "2023-11-24" }, "name": "map_test", "tags": [] @@ -283,7 +295,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -300,7 +313,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -312,19 +326,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -334,12 +349,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -464,15 +480,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastDdlTime": "1690956983", + "numRows": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", - "numRows": "0", "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-08-02" + "create_date": "2023-11-24" }, "name": "union_test", "tags": [] @@ -483,7 +499,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -500,7 +517,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -512,19 +530,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -534,12 +553,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -656,15 +676,15 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastDdlTime": "1690956983", + "totalSize": "0", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", + "transient_lastDdlTime": "1700805676", "rawDataSize": "0", "numRows": "0", - "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-08-02" + "create_date": "2023-11-24" }, "name": "nested_struct_test", "tags": [] @@ -675,7 +695,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -692,7 +713,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -704,19 +726,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -726,12 +749,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -832,17 +856,17 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "comment": "This table has array of structs", - "transient_lastDdlTime": "1690956980", - "numFiles": "1", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "32", + "another.comment": "This table has no partitions", "numRows": "1", + "rawDataSize": "32", "totalSize": "33", - "another.comment": "This table has no partitions", + "numFiles": "1", + "transient_lastDdlTime": "1700805674", + "comment": "This table has array of structs", + "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-08-02" + "create_date": "2023-11-24" }, "name": "array_struct_test", "description": "This table has array of structs", @@ -854,7 +878,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -871,7 +896,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -883,19 +909,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -905,12 +932,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1005,7 +1033,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastDdlTime": "1690956977", + "transient_lastDdlTime": "1700805671", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", "rawDataSize": "0", @@ -1013,7 +1041,7 @@ "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-08-02" + "create_date": "2023-11-24" }, "name": "struct_test", "tags": [] @@ -1024,7 +1052,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1041,7 +1070,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1053,19 +1083,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1075,12 +1106,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1145,7 +1177,7 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastDdlTime": "1690956977", + "transient_lastDdlTime": "1700805671", "numFiles": "0", "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", "rawDataSize": "0", @@ -1153,7 +1185,7 @@ "totalSize": "0", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-08-02" + "create_date": "2023-11-24" }, "name": "_test_table_underscore", "tags": [] @@ -1164,7 +1196,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1181,7 +1214,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1193,19 +1227,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1215,12 +1250,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1299,10 +1335,10 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "transient_lastDdlTime": "1690956974", + "transient_lastDdlTime": "1700805669", "table_type": "MANAGED_TABLE", "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-08-02", + "create_date": "2023-11-24", "partitioned_columns": "baz" }, "name": "pokes", @@ -1314,7 +1350,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1331,7 +1368,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1343,19 +1381,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1365,12 +1404,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1440,7 +1480,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1457,7 +1498,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1474,7 +1516,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1486,19 +1529,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1508,12 +1552,13 @@ "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "container": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1630,7 +1675,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1647,7 +1693,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1664,7 +1711,8 @@ }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1676,19 +1724,20 @@ "json": { "path": [ { - "id": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918", - "urn": "urn:li:container:939ecec0f01fb6bb1ca15fe6f0ead918" + "id": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589", + "urn": "urn:li:container:1cfce89b5a05e1da5092d88ad9eb4589" }, { - "id": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f", - "urn": "urn:li:container:f5e571e4a9acce86333e6b427ba1651f" + "id": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae", + "urn": "urn:li:container:9ba2e350c97c893a91bcaee4838cdcae" } ] } }, "systemMetadata": { "lastObserved": 1632398400000, - "runId": "presto-on-hive-test" + "runId": "presto-on-hive-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_to_file.yml b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_to_file.yml index d4df1364513c86..233fb7fa36057d 100644 --- a/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_to_file.yml +++ b/metadata-ingestion/tests/integration/presto-on-hive/presto_on_hive_to_file.yml @@ -5,7 +5,6 @@ source: config: host_port: localhost:5432 database: metastore - database_alias: hive username: postgres scheme: "postgresql+psycopg2" diff --git a/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py b/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py index 31d801ccf7dee8..23110ef12ae543 100644 --- a/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py +++ b/metadata-ingestion/tests/integration/presto-on-hive/test_presto_on_hive.py @@ -88,9 +88,8 @@ def test_presto_on_hive_ingest( "type": data_platform, "config": { "host_port": "localhost:5432", - "database": "db1", "metastore_db_name": "metastore", - "database_alias": "hive", + "database_pattern": {"allow": ["db1"]}, "username": "postgres", "scheme": "postgresql+psycopg2", "include_views": True, @@ -152,7 +151,6 @@ def test_presto_on_hive_instance_ingest( "config": { "host_port": "localhost:5432", "database": "metastore", - "database_alias": "hive", "username": "postgres", "scheme": "postgresql+psycopg2", "include_views": True, diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py index 4c00e48ede9fbd..1b58696e4014c9 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake.py @@ -142,6 +142,7 @@ def test_snowflake_basic(pytestconfig, tmp_path, mock_time, mock_datahub_graph): type="datahub", config=datahub_classifier_config ) ], + max_workers=1, ), profiling=GEProfilingConfig( enabled=True, diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index 2fe7a76fd01ae6..66ef9b097c973e 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -112,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642", + "job_id": "3565ea3e-9a3a-4cb0-acd5-213d740479a0", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2023-10-27 10:11:55.540000", - "date_modified": "2023-10-27 10:11:55.667000", + "date_created": "2023-11-27 23:08:29.350000", + "date_modified": "2023-11-27 23:08:29.833000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1961,8 +1961,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2023-10-27 10:11:55.460000", - "date_modified": "2023-10-27 10:11:55.460000" + "date_created": "2023-11-27 23:08:29.077000", + "date_modified": "2023-11-27 23:08:29.077000" }, "externalUrl": "", "name": "demodata.Foo.Proc.With.SpecialChar", @@ -3575,6 +3575,18 @@ "nativeDataType": "NVARCHAR()", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "Price", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "MONEY()", + "recursive": false, + "isPartOfKey": false } ] } @@ -3816,6 +3828,18 @@ "nativeDataType": "NVARCHAR()", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "Price", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "SMALLMONEY()", + "recursive": false, + "isPartOfKey": false } ] } diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index 804a8d74d0d512..9ce3664eff6a1f 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -112,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642", + "job_id": "3b767c17-c921-4331-93d9-eb0e006045a4", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2023-10-27 10:11:55.540000", - "date_modified": "2023-10-27 10:11:55.667000", + "date_created": "2023-11-23 11:04:47.927000", + "date_modified": "2023-11-23 11:04:48.090000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1245,7 +1245,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.dbo.Products,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1262,7 +1262,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.dbo.Products,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1278,7 +1278,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "DemoDataAlias.dbo.Products", + "schemaName": "DemoData.dbo.Products", "platform": "urn:li:dataPlatform:mssql", "version": 0, "created": { @@ -1334,7 +1334,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.dbo.Products,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1352,7 +1352,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.dbo.Products,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1486,7 +1486,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.Items,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1503,7 +1503,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.Items,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1520,7 +1520,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "DemoDataAlias.Foo.Items", + "schemaName": "DemoData.Foo.Items", "platform": "urn:li:dataPlatform:mssql", "version": 0, "created": { @@ -1576,7 +1576,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.Items,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1594,7 +1594,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.Items,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1619,7 +1619,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.Persons,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1636,7 +1636,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.Persons,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1652,7 +1652,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "DemoDataAlias.Foo.Persons", + "schemaName": "DemoData.Foo.Persons", "platform": "urn:li:dataPlatform:mssql", "version": 0, "created": { @@ -1733,7 +1733,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.Persons,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1751,7 +1751,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.Persons,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1776,7 +1776,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.SalesReason,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1793,7 +1793,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.SalesReason,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1809,7 +1809,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "DemoDataAlias.Foo.SalesReason", + "schemaName": "DemoData.Foo.SalesReason", "platform": "urn:li:dataPlatform:mssql", "version": 0, "created": { @@ -1868,12 +1868,12 @@ { "name": "FK_TempSales_SalesReason", "foreignFields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.Persons,PROD),ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD),ID)" ], "sourceFields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.SalesReason,PROD),TempID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD),TempID)" ], - "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.Persons,PROD)" + "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Persons,PROD)" } ] } @@ -1889,7 +1889,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.SalesReason,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1907,7 +1907,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoDataAlias.Foo.SalesReason,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.SalesReason,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1961,8 +1961,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2023-10-27 10:11:55.460000", - "date_modified": "2023-10-27 10:11:55.460000" + "date_created": "2023-11-23 11:04:47.857000", + "date_modified": "2023-11-23 11:04:47.857000" }, "externalUrl": "", "name": "demodata.Foo.Proc.With.SpecialChar", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index 9d1b288057a160..037a341b7d66e8 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -112,11 +112,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "1f2f14ba-db84-4fa1-910e-7df71bede642", + "job_id": "3b767c17-c921-4331-93d9-eb0e006045a4", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2023-10-27 10:11:55.540000", - "date_modified": "2023-10-27 10:11:55.667000", + "date_created": "2023-11-23 11:04:47.927000", + "date_modified": "2023-11-23 11:04:48.090000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1245,7 +1245,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.dbo.products,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.dbo.products,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1262,7 +1262,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.dbo.products,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.dbo.products,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1278,7 +1278,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "demodataalias.dbo.products", + "schemaName": "demodata.dbo.products", "platform": "urn:li:dataPlatform:mssql", "version": 0, "created": { @@ -1334,7 +1334,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.dbo.products,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.dbo.products,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1352,7 +1352,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.dbo.products,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.dbo.products,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1486,7 +1486,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.items,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1503,7 +1503,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.items,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1520,7 +1520,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "demodataalias.foo.items", + "schemaName": "demodata.foo.items", "platform": "urn:li:dataPlatform:mssql", "version": 0, "created": { @@ -1576,7 +1576,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.items,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1594,7 +1594,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.items,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1619,7 +1619,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.persons,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1636,7 +1636,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.persons,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1652,7 +1652,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "demodataalias.foo.persons", + "schemaName": "demodata.foo.persons", "platform": "urn:li:dataPlatform:mssql", "version": 0, "created": { @@ -1733,7 +1733,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.persons,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1751,7 +1751,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.persons,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1776,7 +1776,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.salesreason,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1793,7 +1793,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.salesreason,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1809,7 +1809,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "demodataalias.foo.salesreason", + "schemaName": "demodata.foo.salesreason", "platform": "urn:li:dataPlatform:mssql", "version": 0, "created": { @@ -1868,12 +1868,12 @@ { "name": "FK_TempSales_SalesReason", "foreignFields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.persons,PROD),ID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),ID)" ], "sourceFields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.salesreason,PROD),TempID)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),TempID)" ], - "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.persons,PROD)" + "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" } ] } @@ -1889,7 +1889,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.salesreason,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1907,7 +1907,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodataalias.foo.salesreason,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1961,8 +1961,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2023-10-27 10:11:55.460000", - "date_modified": "2023-10-27 10:11:55.460000" + "date_created": "2023-11-23 11:04:47.857000", + "date_modified": "2023-11-23 11:04:47.857000" }, "externalUrl": "", "name": "demodata.Foo.Proc.With.SpecialChar", diff --git a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql index a17d52f9a39b1b..77ecabc5a3fffc 100644 --- a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql +++ b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql @@ -2,11 +2,11 @@ CREATE DATABASE NewData; GO USE NewData; GO -CREATE TABLE ProductsNew (ID int, ProductName nvarchar(max)); +CREATE TABLE ProductsNew (ID int, ProductName nvarchar(max), Price money); GO CREATE SCHEMA FooNew; GO -CREATE TABLE FooNew.ItemsNew (ID int, ItemName nvarchar(max)); +CREATE TABLE FooNew.ItemsNew (ID int, ItemName nvarchar(max), Price smallmoney); GO CREATE TABLE FooNew.PersonsNew ( ID int NOT NULL PRIMARY KEY, diff --git a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_to_file.yml b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_to_file.yml index d347422353d475..c53e3cf6b80452 100644 --- a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_to_file.yml +++ b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_to_file.yml @@ -7,7 +7,6 @@ source: password: test!Password database: DemoData host_port: localhost:51433 - database_alias: DemoDataAlias # use_odbc: True # uri_args: # driver: "ODBC Driver 17 for SQL Server" diff --git a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml index 8d17c49163ca1d..4e96d137670ba9 100644 --- a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml +++ b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml @@ -7,7 +7,6 @@ source: password: test!Password database: DemoData host_port: localhost:51433 - database_alias: DemoDataAlias convert_urns_to_lowercase: true # use_odbc: True # uri_args: diff --git a/metadata-ingestion/tests/integration/trino/test_trino.py b/metadata-ingestion/tests/integration/trino/test_trino.py index 177c273c0d2424..8ab3ed8056e90f 100644 --- a/metadata-ingestion/tests/integration/trino/test_trino.py +++ b/metadata-ingestion/tests/integration/trino/test_trino.py @@ -70,7 +70,6 @@ def test_trino_ingest( "config": TrinoConfig( host_port="localhost:5300", database="postgresqldb", - database_alias="library_catalog", username="foo", schema_pattern=AllowDenyPattern(allow=["^librarydb"]), profile_pattern=AllowDenyPattern( diff --git a/metadata-ingestion/tests/unit/test_postgres_source.py b/metadata-ingestion/tests/unit/test_postgres_source.py index fac491cbaea040..91a62b603bb584 100644 --- a/metadata-ingestion/tests/unit/test_postgres_source.py +++ b/metadata-ingestion/tests/unit/test_postgres_source.py @@ -65,23 +65,6 @@ def tests_get_inspectors_with_sqlalchemy_uri_provided(create_engine_mock): assert create_engine_mock.call_args_list[0][0][0] == "custom_url" -def test_database_alias_takes_precendence(): - config = PostgresConfig.parse_obj( - { - **_base_config(), - "database_alias": "ops_database", - "database": "postgres", - } - ) - mock_inspector = mock.MagicMock() - assert ( - PostgresSource(config, PipelineContext(run_id="test")).get_identifier( - schema="superset", entity="logs", inspector=mock_inspector - ) - == "ops_database.superset.logs" - ) - - def test_database_in_identifier(): config = PostgresConfig.parse_obj({**_base_config(), "database": "postgres"}) mock_inspector = mock.MagicMock() diff --git a/metadata-ingestion/tests/unit/test_sql_common.py b/metadata-ingestion/tests/unit/test_sql_common.py index 808b38192411dc..e23d290b611f4c 100644 --- a/metadata-ingestion/tests/unit/test_sql_common.py +++ b/metadata-ingestion/tests/unit/test_sql_common.py @@ -102,3 +102,17 @@ def test_use_source_schema_for_foreign_key_if_not_specified(): def test_get_platform_from_sqlalchemy_uri(uri: str, expected_platform: str) -> None: platform: str = get_platform_from_sqlalchemy_uri(uri) assert platform == expected_platform + + +def test_get_db_schema_with_dots_in_view_name(): + config: SQLCommonConfig = _TestSQLAlchemyConfig() + ctx: PipelineContext = PipelineContext(run_id="test_ctx") + platform: str = "TEST" + source = _TestSQLAlchemySource(config=config, ctx=ctx, platform=platform) + + database, schema = source.get_db_schema( + dataset_identifier="database.schema.long.view.name1" + ) + + assert database == "database" + assert schema == "schema" diff --git a/metadata-integration/java/datahub-protobuf-example/build.gradle b/metadata-integration/java/datahub-protobuf-example/build.gradle index 71cbb67061887d..4e53d8ed763baa 100644 --- a/metadata-integration/java/datahub-protobuf-example/build.gradle +++ b/metadata-integration/java/datahub-protobuf-example/build.gradle @@ -4,7 +4,11 @@ plugins { } repositories { - mavenCentral() + if (project.hasProperty('apacheMavenRepositoryUrl')) { + maven { url project.getProperty('apacheMavenRepositoryUrl') } + } else { + mavenCentral() + } mavenLocal() } diff --git a/metadata-jobs/mae-consumer-job/build.gradle b/metadata-jobs/mae-consumer-job/build.gradle index 5e735e118493cd..a8920d50b068e9 100644 --- a/metadata-jobs/mae-consumer-job/build.gradle +++ b/metadata-jobs/mae-consumer-job/build.gradle @@ -58,6 +58,22 @@ docker { buildx(true) load(true) push(false) + + // Add build args if they are defined (needed for some CI or enterprise environments) + def dockerBuildArgs = [:] + if (project.hasProperty('alpineApkRepositoryUrl')) { + dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl') + } + if (project.hasProperty('githubMirrorUrl')) { + dockerBuildArgs.GITHUB_REPO_URL = project.getProperty('githubMirrorUrl') + } + if (project.hasProperty('mavenCentralRepositoryUrl')) { + dockerBuildArgs.MAVEN_CENTRAL_REPO_URL = project.getProperty('mavenCentralRepositoryUrl') + } + + if (dockerBuildArgs.size() > 0) { + buildArgs(dockerBuildArgs) + } } tasks.getByPath(":metadata-jobs:mae-consumer-job:docker").dependsOn([bootJar]) @@ -66,4 +82,4 @@ task cleanLocalDockerImages { rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } -dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file +dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/metadata-jobs/mce-consumer-job/build.gradle b/metadata-jobs/mce-consumer-job/build.gradle index ef042188bc3d83..2f60d1ae985fb5 100644 --- a/metadata-jobs/mce-consumer-job/build.gradle +++ b/metadata-jobs/mce-consumer-job/build.gradle @@ -69,6 +69,22 @@ docker { buildx(true) load(true) push(false) + + // Add build args if they are defined (needed for some CI or enterprise environments) + def dockerBuildArgs = [:] + if (project.hasProperty('alpineApkRepositoryUrl')) { + dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl') + } + if (project.hasProperty('githubMirrorUrl')) { + dockerBuildArgs.GITHUB_REPO_URL = project.getProperty('githubMirrorUrl') + } + if (project.hasProperty('mavenCentralRepositoryUrl')) { + dockerBuildArgs.MAVEN_CENTRAL_REPO_URL = project.getProperty('mavenCentralRepositoryUrl') + } + + if (dockerBuildArgs.size() > 0) { + buildArgs(dockerBuildArgs) + } } tasks.getByPath(":metadata-jobs:mce-consumer-job:docker").dependsOn([bootJar]) @@ -77,4 +93,4 @@ task cleanLocalDockerImages { rootProject.ext.cleanLocalDockerImages(docker_registry, docker_repo, "${version}") } } -dockerClean.finalizedBy(cleanLocalDockerImages) \ No newline at end of file +dockerClean.finalizedBy(cleanLocalDockerImages) diff --git a/metadata-models-custom/build.gradle b/metadata-models-custom/build.gradle index 95a00766039a84..71d3b0fd1f736c 100644 --- a/metadata-models-custom/build.gradle +++ b/metadata-models-custom/build.gradle @@ -2,7 +2,11 @@ import org.yaml.snakeyaml.Yaml buildscript { repositories{ - mavenCentral() + if (project.hasProperty('apacheMavenRepositoryUrl')) { + maven { url project.getProperty('apacheMavenRepositoryUrl') } + } else { + mavenCentral() + } } dependencies { classpath("org.yaml:snakeyaml:1.33") diff --git a/metadata-service/war/build.gradle b/metadata-service/war/build.gradle index 35730ad6dfa9f3..fc29b0bb460920 100644 --- a/metadata-service/war/build.gradle +++ b/metadata-service/war/build.gradle @@ -83,6 +83,22 @@ docker { buildx(true) load(true) push(false) + + // Add build args if they are defined (needed for some CI or enterprise environments) + def dockerBuildArgs = [:] + if (project.hasProperty('alpineApkRepositoryUrl')) { + dockerBuildArgs.ALPINE_REPO_URL = project.getProperty('alpineApkRepositoryUrl') + } + if (project.hasProperty('githubMirrorUrl')) { + dockerBuildArgs.GITHUB_REPO_URL = project.getProperty('githubMirrorUrl') + } + if (project.hasProperty('mavenCentralRepositoryUrl')) { + dockerBuildArgs.MAVEN_CENTRAL_REPO_URL = project.getProperty('mavenCentralRepositoryUrl') + } + + if (dockerBuildArgs.size() > 0) { + buildArgs(dockerBuildArgs) + } } tasks.getByPath(":metadata-service:war:docker").dependsOn([build, war]) diff --git a/repositories.gradle b/repositories.gradle index 69eaea6ca12bcb..d82563c2659a07 100644 --- a/repositories.gradle +++ b/repositories.gradle @@ -1,15 +1,31 @@ repositories { gradlePluginPortal() mavenLocal() - mavenCentral() - maven { - url "https://packages.confluent.io/maven/" + + if (project.hasProperty('apacheMavenRepositoryUrl')) { + maven { url project.getProperty('apacheMavenRepositoryUrl') } + } else { + mavenCentral() } - maven { - url "https://plugins.gradle.org/m2/" + + if (project.hasProperty('confluentMavenRepositoryUrl')) { + maven { + url project.getProperty('confluentMavenRepositoryUrl') + } + } else { + maven { + url "https://packages.confluent.io/maven/" + } } - maven { - url "https://linkedin.jfrog.io/artifactory/open-source/" // GMA, pegasus + + if (project.hasProperty('linkedinOpenSourceRepositoryUrl')) { + maven { + url project.getProperty('linkedinOpenSourceRepositoryUrl') + } + } else { + maven { + url "https://linkedin.jfrog.io/artifactory/open-source/" // GMA, pegasus + } } }