diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000000000..0d08e261a2ae9d --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "github-actions" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/.github/pr-labeler-config.yml b/.github/pr-labeler-config.yml index 2625cb1cfdff68..e3fcc15b637e22 100644 --- a/.github/pr-labeler-config.yml +++ b/.github/pr-labeler-config.yml @@ -1,22 +1,32 @@ ingestion: - - "metadata-ingestion/**/*" - - "metadata-ingestion-modules/**/*" - - "metadata-integration/**/*" +- changed-files: + - any-glob-to-any-file: + - "metadata-ingestion/**/*" + - "metadata-ingestion-modules/**/*" + - "metadata-integration/**/*" devops: - - "docker/**/*" - - ".github/**/*" - - "perf-test/**/*" - - "metadata-service/**/*" +- changed-files: + - any-glob-to-any-file: + - "docker/**/*" + - ".github/**/*" + - "perf-test/**/*" + - "metadata-service/**/*" product: - - "datahub-web-react/**/*" - - "datahub-frontend/**/*" - - "datahub-graphql-core/**/*" - - "metadata-io/**/*" +- changed-files: + - any-glob-to-any-file: + - "datahub-web-react/**/*" + - "datahub-frontend/**/*" + - "datahub-graphql-core/**/*" + - "metadata-io/**/*" docs: - - "docs/**/*" +- changed-files: + - any-glob-to-any-file: + - "docs/**/*" smoke_test: - - "smoke-test/**/*" +- changed-files: + - any-glob-to-any-file: + - "smoke-test/**/*" diff --git a/.github/scripts/pre-commit-override.yaml b/.github/scripts/pre-commit-override.yaml index 961134bebe2c98..ecd3c97ad61efa 100644 --- a/.github/scripts/pre-commit-override.yaml +++ b/.github/scripts/pre-commit-override.yaml @@ -5,5 +5,5 @@ repos: name: smoke-test cypress Lint Fix entry: ./gradlew :smoke-test:cypressLintFix language: system - files: ^smoke-test/tests/cypress/.*$ + files: ^smoke-test/tests/cypress/.*\.tsx$ pass_filenames: false \ No newline at end of file diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index e1e0fb0a85e977..31c77d754dc69d 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -56,7 +56,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - uses: acryldata/sane-checkout-action@v3 - uses: actions/setup-python@v5 with: diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 86545946d6afea..7f7dcb9c1f7548 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -63,7 +63,7 @@ jobs: sudo apt-get remove 'dotnet-*' azure-cli || true sudo rm -rf /usr/local/lib/android/ || true sudo docker image prune -a -f || true - - uses: szenius/set-timezone@v1.2 + - uses: szenius/set-timezone@v2.0 with: timezoneLinux: ${{ matrix.timezone }} - name: Check out the repo @@ -83,7 +83,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - name: Gradle build (and test) for NOT metadata ingestion if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }} # datahub-schematron:cli excluded due to dependency on metadata-ingestion diff --git a/.github/workflows/check-datahub-jars.yml b/.github/workflows/check-datahub-jars.yml index dc770f7fc83a61..1b284578483f30 100644 --- a/.github/workflows/check-datahub-jars.yml +++ b/.github/workflows/check-datahub-jars.yml @@ -43,7 +43,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - name: check ${{ matrix.command }} jar run: | ./gradlew :metadata-integration:java:${{ matrix.command }}:build --info diff --git a/.github/workflows/dagster-plugin.yml b/.github/workflows/dagster-plugin.yml index a2ac59d6989a9f..61fddaeb7770bb 100644 --- a/.github/workflows/dagster-plugin.yml +++ b/.github/workflows/dagster-plugin.yml @@ -44,7 +44,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - uses: acryldata/sane-checkout-action@v3 - uses: actions/setup-python@v5 with: diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index e44e6b11c6d057..5f944c8e28769f 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -130,7 +130,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - name: Run lint on smoke test if: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }} run: | @@ -154,7 +154,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - name: Check out the repo uses: acryldata/sane-checkout-action@v3 - name: Pre-build artifacts for docker image @@ -191,7 +191,7 @@ jobs: with: image: ${{ env.DATAHUB_GMS_IMAGE }}:${{ needs.setup.outputs.unique_tag }} - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.26.0 + uses: aquasecurity/trivy-action@0.29.0 env: TRIVY_OFFLINE_SCAN: true TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2 @@ -205,7 +205,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -225,7 +225,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - name: Check out the repo uses: acryldata/sane-checkout-action@v3 - name: Pre-build artifacts for docker image @@ -262,7 +262,7 @@ jobs: with: image: ${{ env.DATAHUB_MAE_CONSUMER_IMAGE }}:${{ needs.setup.outputs.unique_tag }} - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.26.0 + uses: aquasecurity/trivy-action@0.29.0 env: TRIVY_OFFLINE_SCAN: true TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2 @@ -276,7 +276,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -296,7 +296,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - name: Check out the repo uses: acryldata/sane-checkout-action@v3 - name: Pre-build artifacts for docker image @@ -333,7 +333,7 @@ jobs: with: image: ${{ env.DATAHUB_MCE_CONSUMER_IMAGE }}:${{ needs.setup.outputs.unique_tag }} - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.26.0 + uses: aquasecurity/trivy-action@0.29.0 env: TRIVY_OFFLINE_SCAN: true TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2 @@ -347,7 +347,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -367,7 +367,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - name: Check out the repo uses: acryldata/sane-checkout-action@v3 - name: Pre-build artifacts for docker image @@ -404,7 +404,7 @@ jobs: with: image: ${{ env.DATAHUB_UPGRADE_IMAGE }}:${{ needs.setup.outputs.unique_tag }} - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.26.0 + uses: aquasecurity/trivy-action@0.29.0 env: TRIVY_OFFLINE_SCAN: true TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2 @@ -418,7 +418,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -438,7 +438,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - name: Check out the repo uses: acryldata/sane-checkout-action@v3 - name: Pre-build artifacts for docker image @@ -475,7 +475,7 @@ jobs: with: image: ${{ env.DATAHUB_FRONTEND_IMAGE }}:${{ needs.setup.outputs.unique_tag }} - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.26.0 + uses: aquasecurity/trivy-action@0.29.0 env: TRIVY_OFFLINE_SCAN: true TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2 @@ -489,7 +489,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -536,7 +536,7 @@ jobs: with: image: ${{ env.DATAHUB_KAFKA_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }} - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.26.0 + uses: aquasecurity/trivy-action@0.29.0 env: TRIVY_OFFLINE_SCAN: true TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2 @@ -550,7 +550,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -597,7 +597,7 @@ jobs: with: image: ${{ env.DATAHUB_MYSQL_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }} - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.26.0 + uses: aquasecurity/trivy-action@0.29.0 env: TRIVY_OFFLINE_SCAN: true TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2 @@ -611,7 +611,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -658,7 +658,7 @@ jobs: with: image: ${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }} - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.26.0 + uses: aquasecurity/trivy-action@0.29.0 env: TRIVY_OFFLINE_SCAN: true TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2 @@ -672,7 +672,7 @@ jobs: ignore-unfixed: true vuln-type: "os,library" - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -829,7 +829,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - name: Build codegen if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish =='true' }} run: ./gradlew :metadata-ingestion:codegen @@ -886,7 +886,7 @@ jobs: with: image: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_slim_build.outputs.tag }} - name: Run Trivy vulnerability scanner Slim Image - uses: aquasecurity/trivy-action@0.26.0 + uses: aquasecurity/trivy-action@0.29.0 env: TRIVY_OFFLINE_SCAN: true TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2 @@ -901,7 +901,7 @@ jobs: vuln-type: "os,library" timeout: 15m - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -930,7 +930,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - name: Build codegen if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} run: ./gradlew :metadata-ingestion:codegen @@ -985,7 +985,7 @@ jobs: with: image: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_full_build.outputs.tag }} - name: Run Trivy vulnerability scanner Full Image - uses: aquasecurity/trivy-action@0.26.0 + uses: aquasecurity/trivy-action@0.29.0 env: TRIVY_OFFLINE_SCAN: true TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2 @@ -1000,7 +1000,7 @@ jobs: vuln-type: "os,library" timeout: 15m - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v2 + uses: github/codeql-action/upload-sarif@v3 with: sarif_file: "trivy-results.sarif" @@ -1018,8 +1018,8 @@ jobs: # python_batch_count is used to split pytests in the smoke-test (batches of actual test functions) # cypress_batch_count is used to split the collection of cypress test specs into batches. run: | - echo "cypress_batch_count=11" >> "$GITHUB_OUTPUT" - echo "python_batch_count=5" >> "$GITHUB_OUTPUT" + echo "cypress_batch_count=5" >> "$GITHUB_OUTPUT" + echo "python_batch_count=3" >> "$GITHUB_OUTPUT" - id: set-matrix # For m batches for python and n batches for cypress, we need a test matrix of python x m + cypress x n. @@ -1083,7 +1083,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - name: Login to DockerHub uses: docker/login-action@v3 if: ${{ needs.setup.outputs.docker-login == 'true' }} diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index e6044badb1b41c..f9d459652086fb 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -5,6 +5,7 @@ on: branches: - "**" paths: + - ".github/workflows/documentation.yml" - "metadata-ingestion/**" - "metadata-models/**" - "docs/**" @@ -13,6 +14,7 @@ on: branches: - master paths: + - ".github/workflows/documentation.yml" - "metadata-ingestion/**" - "metadata-models/**" - "docs/**" @@ -40,7 +42,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - uses: actions/setup-python@v5 with: python-version: "3.10" @@ -57,8 +59,12 @@ jobs: - name: Deploy if: github.event_name == 'push' && github.repository == 'datahub-project/datahub' - uses: peaceiris/actions-gh-pages@v3 + uses: peaceiris/actions-gh-pages@v4 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./docs-website/build cname: datahubproject.io + # The gh-pages branch stores the built docs site. We don't need to preserve + # the full history of the .html files, since they're generated from our + # source files. Doing so significantly reduces the size of the repo's .git dir. + force_orphan: true diff --git a/.github/workflows/gx-plugin.yml b/.github/workflows/gx-plugin.yml index c28bdbb30eb36d..68d7934d1d6e6f 100644 --- a/.github/workflows/gx-plugin.yml +++ b/.github/workflows/gx-plugin.yml @@ -48,7 +48,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - uses: acryldata/sane-checkout-action@v3 - uses: actions/setup-python@v5 with: diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index be6026098ce420..ff4531ab0b25c1 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -54,7 +54,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - uses: acryldata/sane-checkout-action@v3 - uses: actions/setup-python@v5 with: diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 80af03e77eef82..6514b9e7226b60 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -67,7 +67,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - name: Gradle build (and test) run: | ./gradlew :metadata-io:test diff --git a/.github/workflows/metadata-model.yml b/.github/workflows/metadata-model.yml index 6f62284afcc172..f9262d89f9ef00 100644 --- a/.github/workflows/metadata-model.yml +++ b/.github/workflows/metadata-model.yml @@ -32,7 +32,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - uses: acryldata/sane-checkout-action@v3 - uses: actions/setup-python@v5 with: diff --git a/.github/workflows/pr-labeler.yml b/.github/workflows/pr-labeler.yml index de7ad21b3e67bb..55f7fea875388a 100644 --- a/.github/workflows/pr-labeler.yml +++ b/.github/workflows/pr-labeler.yml @@ -3,6 +3,10 @@ on: pull_request_target: types: [opened, reopened] +permissions: + contents: read + pull-requests: write + jobs: triage: permissions: @@ -10,11 +14,11 @@ jobs: pull-requests: write runs-on: ubuntu-latest steps: - - uses: actions/labeler@v4 + - uses: actions/labeler@v5 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" configuration-path: ".github/pr-labeler-config.yml" - - uses: actions-ecosystem/action-add-labels@v1.1.0 + - uses: actions-ecosystem/action-add-labels@v1.1.3 # only add names of Acryl Data team members here if: ${{ @@ -25,7 +29,6 @@ jobs: "chriscollins3456", "david-leifker", "shirshanka", - "sid-acryl", "swaroopjagadish", "treff7es", "yoonhyejin", @@ -46,7 +49,9 @@ jobs: "kevinkarchacryl", "sgomezvillamor", "acrylJonny", - "chakru-r" + "chakru-r", + "brock-acryl", + "mminichino" ]'), github.actor ) @@ -55,7 +60,7 @@ jobs: github_token: ${{ github.token }} labels: | community-contribution - - uses: actions-ecosystem/action-add-labels@v1.1.0 + - uses: actions-ecosystem/action-add-labels@v1.1.3 # only add names of champions here. Confirm with DevRel Team if: ${{ diff --git a/.github/workflows/prefect-plugin.yml b/.github/workflows/prefect-plugin.yml index 401efa340ae8ca..f1b06f31a05224 100644 --- a/.github/workflows/prefect-plugin.yml +++ b/.github/workflows/prefect-plugin.yml @@ -39,7 +39,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - uses: acryldata/sane-checkout-action@v3 - uses: actions/setup-python@v5 with: diff --git a/.github/workflows/publish-datahub-jars.yml b/.github/workflows/publish-datahub-jars.yml index 393f9d993e2a2f..fa3554c05d374c 100644 --- a/.github/workflows/publish-datahub-jars.yml +++ b/.github/workflows/publish-datahub-jars.yml @@ -57,7 +57,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - uses: actions/setup-python@v5 with: python-version: "3.10" @@ -210,7 +210,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - uses: actions/setup-python@v5 with: python-version: "3.10" diff --git a/.github/workflows/python-build-pages.yml b/.github/workflows/python-build-pages.yml index 8971722c374fb7..9e1a2563478039 100644 --- a/.github/workflows/python-build-pages.yml +++ b/.github/workflows/python-build-pages.yml @@ -37,7 +37,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - uses: acryldata/sane-checkout-action@v3 - uses: actions/setup-python@v5 with: diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml index e6a6705a72879c..7a2080b7a9ddf3 100644 --- a/.github/workflows/spark-smoke-test.yml +++ b/.github/workflows/spark-smoke-test.yml @@ -35,7 +35,7 @@ jobs: with: distribution: "zulu" java-version: 17 - - uses: gradle/actions/setup-gradle@v3 + - uses: gradle/actions/setup-gradle@v4 - uses: actions/setup-python@v5 with: python-version: "3.10" diff --git a/.gitignore b/.gitignore index 43c627f9ed244f..19909b25fefe7f 100644 --- a/.gitignore +++ b/.gitignore @@ -85,6 +85,7 @@ metadata-service/plugin/src/test/resources/sample-plugins/** smoke-test/rollback-reports coverage*.xml .vercel +.envrc # A long series of binary directories we should ignore datahub-frontend/bin/main/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3697efa37770e7..103e1680b8d105 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -# Auto-generated by .github/scripts/generate_pre_commit.py at 2025-01-09 10:08:09 UTC +# Auto-generated by .github/scripts/generate_pre_commit.py at 2025-01-17 16:43:31 UTC # Do not edit this file directly. Run the script to regenerate. # Add additional hooks in .github/scripts/pre-commit-override.yaml repos: @@ -442,4 +442,5 @@ repos: name: smoke-test cypress Lint Fix entry: ./gradlew :smoke-test:cypressLintFix language: system - files: ^smoke-test/tests/cypress/.*$ + files: ^smoke-test/tests/cypress/.*\.tsx$ + pass_filenames: false diff --git a/build.gradle b/build.gradle index e4fd70a99e6434..2984812bda13b8 100644 --- a/build.gradle +++ b/build.gradle @@ -39,13 +39,13 @@ buildscript { ext.springBootVersion = '3.2.9' ext.springKafkaVersion = '3.1.6' ext.openTelemetryVersion = '1.18.0' - ext.neo4jVersion = '5.14.0' - ext.neo4jTestVersion = '5.14.0' - ext.neo4jApocVersion = '5.14.0' + ext.neo4jVersion = '5.20.0' + ext.neo4jTestVersion = '5.20.0' + ext.neo4jApocVersion = '5.20.0' ext.testContainersVersion = '1.17.4' ext.elasticsearchVersion = '2.11.1' // ES 7.10, Opensearch 1.x, 2.x ext.jacksonVersion = '2.15.3' - ext.jettyVersion = '11.0.21' + ext.jettyVersion = '12.0.16' // see also datahub-frontend/play.gradle ext.playVersion = '2.8.22' ext.playScalaVersion = '2.13' @@ -136,7 +136,8 @@ project.ext.externalDependency = [ 'datastaxOssNativeProtocol': 'com.datastax.oss:native-protocol:1.5.1', 'datastaxOssCore': 'com.datastax.oss:java-driver-core:4.14.1', 'datastaxOssQueryBuilder': 'com.datastax.oss:java-driver-query-builder:4.14.1', - 'dgraph4j' : 'io.dgraph:dgraph4j:21.12.0', + 'dgraph4j' : 'io.dgraph:dgraph4j:24.1.1', + 'dgraphNetty': 'io.grpc:grpc-netty-shaded:1.69.0', 'dropwizardMetricsCore': 'io.dropwizard.metrics:metrics-core:4.2.3', 'dropwizardMetricsJmx': 'io.dropwizard.metrics:metrics-jmx:4.2.3', 'ebean': 'io.ebean:ebean:' + ebeanVersion, @@ -176,8 +177,9 @@ project.ext.externalDependency = [ 'jakartaValidation': 'jakarta.validation:jakarta.validation-api:3.1.0-M2', 'jerseyCore': 'org.glassfish.jersey.core:jersey-client:2.41', 'jerseyGuava': 'org.glassfish.jersey.bundles.repackaged:jersey-guava:2.25.1', - 'jettyJaas': "org.eclipse.jetty:jetty-jaas:$jettyVersion", + 'jettySecurity': "org.eclipse.jetty:jetty-security:$jettyVersion", 'jettyClient': "org.eclipse.jetty:jetty-client:$jettyVersion", + 'jettyJmx': "org.eclipse.jetty:jetty-jmx:$jettyVersion", 'jettison': 'org.codehaus.jettison:jettison:1.5.4', 'jgrapht': 'org.jgrapht:jgrapht-core:1.5.1', 'jna': 'net.java.dev.jna:jna:5.12.1', @@ -193,7 +195,7 @@ project.ext.externalDependency = [ 'junitJupiterEngine': "org.junit.jupiter:junit-jupiter-engine:$junitJupiterVersion", // avro-serde includes dependencies for `kafka-avro-serializer` `kafka-schema-registry-client` and `avro` 'kafkaAvroSerde': "io.confluent:kafka-streams-avro-serde:$kafkaVersion", - 'kafkaAvroSerializer': 'io.confluent:kafka-avro-serializer:5.1.4', + 'kafkaAvroSerializer': "io.confluent:kafka-avro-serializer:$kafkaVersion", 'kafkaClients': "org.apache.kafka:kafka-clients:$kafkaVersion-ccs", 'snappy': 'org.xerial.snappy:snappy-java:1.1.10.5', 'logbackClassic': "ch.qos.logback:logback-classic:$logbackClassic", @@ -380,6 +382,13 @@ configure(subprojects.findAll {! it.name.startsWith('spark-lineage')}) { resolutionStrategy.force externalDependency.antlr4Runtime resolutionStrategy.force externalDependency.antlr4 resolutionStrategy.force 'org.apache.mina:mina-core:2.2.4' + resolutionStrategy { + force "org.eclipse.jetty:jetty-security:${jettyVersion}" + force "org.eclipse.jetty:jetty-server:${jettyVersion}" + force "org.eclipse.jetty:jetty-ee10-servlet:${jettyVersion}" + force "org.eclipse.jetty:jetty-ee10-webapp:${jettyVersion}" + force "org.eclipse.jetty:jetty-xml:${jettyVersion}" + } } } @@ -407,7 +416,7 @@ subprojects { implementation externalDependency.annotationApi constraints { implementation("com.google.googlejavaformat:google-java-format:$googleJavaFormatVersion") - implementation('io.netty:netty-all:4.1.115.Final') + implementation('io.netty:netty-all:4.1.116.Final') implementation('org.apache.commons:commons-compress:1.27.1') implementation('org.apache.velocity:velocity-engine-core:2.4') implementation('org.hibernate:hibernate-validator:6.0.20.Final') diff --git a/datahub-frontend/app/security/AuthenticationManager.java b/datahub-frontend/app/security/AuthenticationManager.java index f46dc57c232bd2..8e7d51a0776c23 100644 --- a/datahub-frontend/app/security/AuthenticationManager.java +++ b/datahub-frontend/app/security/AuthenticationManager.java @@ -1,68 +1,33 @@ package security; import com.google.common.base.Preconditions; -import java.util.Collections; import javax.annotation.Nonnull; import javax.naming.AuthenticationException; -import javax.security.auth.callback.Callback; -import javax.security.auth.callback.CallbackHandler; -import javax.security.auth.callback.NameCallback; -import javax.security.auth.callback.PasswordCallback; -import javax.security.auth.login.LoginContext; -import javax.security.auth.login.LoginException; import org.apache.commons.lang3.StringUtils; -import org.eclipse.jetty.jaas.JAASLoginService; -import org.eclipse.jetty.jaas.PropertyUserStoreManager; -import play.Logger; +import org.eclipse.jetty.security.UserPrincipal; +import org.eclipse.jetty.util.security.Credential; public class AuthenticationManager { - - private AuthenticationManager(boolean verbose) {} + private AuthenticationManager() {} // Prevent instantiation public static void authenticateJaasUser(@Nonnull String userName, @Nonnull String password) throws Exception { Preconditions.checkArgument(!StringUtils.isAnyEmpty(userName), "Username cannot be empty"); - JAASLoginService jaasLoginService = new JAASLoginService("WHZ-Authentication"); - PropertyUserStoreManager propertyUserStoreManager = new PropertyUserStoreManager(); - propertyUserStoreManager.start(); - jaasLoginService.setBeans(Collections.singletonList(propertyUserStoreManager)); - JAASLoginService.INSTANCE.set(jaasLoginService); - try { - LoginContext lc = - new LoginContext("WHZ-Authentication", new WHZCallbackHandler(userName, password)); - lc.login(); - } catch (LoginException le) { - AuthenticationException authenticationException = - new AuthenticationException(le.getMessage()); - authenticationException.setRootCause(le); - throw authenticationException; - } - } - private static class WHZCallbackHandler implements CallbackHandler { - private String password; - private String username; - - private WHZCallbackHandler(@Nonnull String username, @Nonnull String password) { - this.username = username; - this.password = password; - } + try { + // Create and configure credentials for authentication + UserPrincipal userPrincipal = new UserPrincipal(userName, Credential.getCredential(password)); - @Override - public void handle(@Nonnull Callback[] callbacks) { - NameCallback nc = null; - PasswordCallback pc = null; - for (Callback callback : callbacks) { - Logger.debug( - "The submitted callback is of type: " + callback.getClass() + " : " + callback); - if (callback instanceof NameCallback) { - nc = (NameCallback) callback; - nc.setName(this.username); - } else if (callback instanceof PasswordCallback) { - pc = (PasswordCallback) callback; - pc.setPassword(this.password.toCharArray()); - } + // Verify credentials + if (!userPrincipal.authenticate(password)) { + throw new AuthenticationException("Invalid credentials for user: " + userName); } + + } catch (Exception e) { + AuthenticationException authenticationException = + new AuthenticationException("Authentication failed"); + authenticationException.setRootCause(e); + throw authenticationException; } } } diff --git a/datahub-frontend/play.gradle b/datahub-frontend/play.gradle index d513c3c232d9a0..1a9ffeede56251 100644 --- a/datahub-frontend/play.gradle +++ b/datahub-frontend/play.gradle @@ -50,7 +50,7 @@ dependencies { implementation externalDependency.springBeans implementation externalDependency.springContext implementation externalDependency.springBootAutoconfigure - implementation externalDependency.jettyJaas + implementation externalDependency.jettySecurity implementation externalDependency.graphqlJava implementation externalDependency.antlr4Runtime implementation externalDependency.antlr4 diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java index 69306862a46ef7..aec5352dec1a64 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java @@ -28,6 +28,7 @@ private Constants() {} public static final String INCIDENTS_SCHEMA_FILE = "incident.graphql"; public static final String CONTRACTS_SCHEMA_FILE = "contract.graphql"; public static final String CONNECTIONS_SCHEMA_FILE = "connection.graphql"; + public static final String VERSION_SCHEMA_FILE = "versioning.graphql"; public static final String BROWSE_PATH_DELIMITER = "/"; public static final String BROWSE_PATH_V2_DELIMITER = "␟"; public static final String VERSION_STAMP_FIELD_NAME = "versionStamp"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index b15db80a8487ae..2c5e841322f45f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -124,6 +124,8 @@ import com.linkedin.datahub.graphql.generated.TestResult; import com.linkedin.datahub.graphql.generated.TypeQualifier; import com.linkedin.datahub.graphql.generated.UserUsageCounts; +import com.linkedin.datahub.graphql.generated.VersionProperties; +import com.linkedin.datahub.graphql.generated.VersionSet; import com.linkedin.datahub.graphql.resolvers.MeResolver; import com.linkedin.datahub.graphql.resolvers.assertion.AssertionRunEventResolver; import com.linkedin.datahub.graphql.resolvers.assertion.DeleteAssertionResolver; @@ -324,6 +326,7 @@ import com.linkedin.datahub.graphql.resolvers.user.ListUsersResolver; import com.linkedin.datahub.graphql.resolvers.user.RemoveUserResolver; import com.linkedin.datahub.graphql.resolvers.user.UpdateUserStatusResolver; +import com.linkedin.datahub.graphql.resolvers.versioning.VersionsSearchResolver; import com.linkedin.datahub.graphql.resolvers.view.CreateViewResolver; import com.linkedin.datahub.graphql.resolvers.view.DeleteViewResolver; import com.linkedin.datahub.graphql.resolvers.view.ListGlobalViewsResolver; @@ -381,6 +384,7 @@ import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertyType; import com.linkedin.datahub.graphql.types.tag.TagType; import com.linkedin.datahub.graphql.types.test.TestType; +import com.linkedin.datahub.graphql.types.versioning.VersionSetType; import com.linkedin.datahub.graphql.types.view.DataHubViewType; import com.linkedin.entity.client.EntityClient; import com.linkedin.entity.client.SystemEntityClient; @@ -537,6 +541,7 @@ public class GmsGraphQLEngine { private final IncidentType incidentType; private final RestrictedType restrictedType; private final DataProcessInstanceType dataProcessInstanceType; + private final VersionSetType versionSetType; private final int graphQLQueryComplexityLimit; private final int graphQLQueryDepthLimit; @@ -658,6 +663,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.incidentType = new IncidentType(entityClient); this.restrictedType = new RestrictedType(entityClient, restrictedService); this.dataProcessInstanceType = new DataProcessInstanceType(entityClient, featureFlags); + this.versionSetType = new VersionSetType(entityClient); this.graphQLQueryComplexityLimit = args.graphQLQueryComplexityLimit; this.graphQLQueryDepthLimit = args.graphQLQueryDepthLimit; @@ -707,6 +713,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { entityTypeType, formType, incidentType, + versionSetType, restrictedType, businessAttributeType, dataProcessInstanceType)); @@ -809,6 +816,8 @@ public void configureRuntimeWiring(final RuntimeWiring.Builder builder) { configureConnectionResolvers(builder); configureDeprecationResolvers(builder); configureMetadataAttributionResolver(builder); + configureVersionPropertiesResolvers(builder); + configureVersionSetResolvers(builder); } private void configureOrganisationRoleResolvers(RuntimeWiring.Builder builder) { @@ -863,7 +872,8 @@ public GraphQLEngine.Builder builder() { .addSchema(fileBasedSchema(ASSERTIONS_SCHEMA_FILE)) .addSchema(fileBasedSchema(INCIDENTS_SCHEMA_FILE)) .addSchema(fileBasedSchema(CONTRACTS_SCHEMA_FILE)) - .addSchema(fileBasedSchema(COMMON_SCHEMA_FILE)); + .addSchema(fileBasedSchema(COMMON_SCHEMA_FILE)) + .addSchema(fileBasedSchema(VERSION_SCHEMA_FILE)); for (GmsGraphQLPlugin plugin : this.graphQLPlugins) { List pluginSchemaFiles = plugin.getSchemaFiles(); @@ -1050,6 +1060,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("form", getResolver(formType)) .dataFetcher("view", getResolver(dataHubViewType)) .dataFetcher("structuredProperty", getResolver(structuredPropertyType)) + .dataFetcher("versionSet", getResolver(versionSetType)) .dataFetcher("listPolicies", new ListPoliciesResolver(this.entityClient)) .dataFetcher("getGrantedPrivileges", new GetGrantedPrivilegesResolver()) .dataFetcher("listUsers", new ListUsersResolver(this.entityClient)) @@ -2295,7 +2306,15 @@ private void configureTypeResolvers(final RuntimeWiring.Builder builder) { .type( "TimeSeriesAspect", typeWiring -> typeWiring.typeResolver(new TimeSeriesAspectInterfaceTypeResolver())) - .type("ResultsType", typeWiring -> typeWiring.typeResolver(new ResultsTypeResolver())); + .type("ResultsType", typeWiring -> typeWiring.typeResolver(new ResultsTypeResolver())) + .type( + "SupportsVersions", + typeWiring -> + typeWiring.typeResolver( + new EntityInterfaceTypeResolver( + loadableTypes.stream() + .map(graphType -> (EntityType) graphType) + .collect(Collectors.toList())))); } /** Configures custom type extensions leveraged within our GraphQL schema. */ @@ -3089,16 +3108,6 @@ private void configureDataProcessInstanceResolvers(final RuntimeWiring.Builder b ? dataProcessInstance.getDataPlatformInstance().getUrn() : null; })) - .dataFetcher( - "platform", - new LoadableTypeResolver<>( - dataPlatformType, - (env) -> { - final DataProcessInstance dataProcessInstance = env.getSource(); - return dataProcessInstance.getPlatform() != null - ? dataProcessInstance.getPlatform().getUrn() - : null; - })) .dataFetcher("parentContainers", new ParentContainersResolver(entityClient)) .dataFetcher( "container", @@ -3322,4 +3331,34 @@ private void configureMetadataAttributionResolver(final RuntimeWiring.Builder bu entityTypes, (env) -> ((MetadataAttribution) env.getSource()).getSource()))); } + + private void configureVersionPropertiesResolvers(final RuntimeWiring.Builder builder) { + builder.type( + "VersionProperties", + typeWiring -> + typeWiring.dataFetcher( + "versionSet", + new LoadableTypeResolver<>( + versionSetType, + (env) -> { + final VersionProperties versionProperties = env.getSource(); + return versionProperties != null + ? versionProperties.getVersionSet().getUrn() + : null; + }))); + } + + private void configureVersionSetResolvers(final RuntimeWiring.Builder builder) { + builder.type( + "VersionSet", + typeWiring -> + typeWiring + .dataFetcher( + "latestVersion", + new EntityTypeResolver( + entityTypes, (env) -> ((VersionSet) env.getSource()).getLatestVersion())) + .dataFetcher( + "versionsSearch", + new VersionsSearchResolver(this.entityClient, this.viewService))); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java index c25d6af75fe76d..29d1c02dacb416 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/authorization/AuthorizationUtils.java @@ -232,6 +232,10 @@ public static T restrictEntity(@Nonnull Object entity, Class clazz) { try { Object[] args = allFields.stream() + // New versions of graphql.codegen generate serialVersionUID + // We need to filter serialVersionUID out because serialVersionUID is + // never part of the entity type constructor + .filter(field -> !field.getName().contains("serialVersionUID")) .map( field -> { // properties are often not required but only because diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java index 3647eb55b2583a..8cdc13a14be87c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java @@ -189,6 +189,7 @@ public CompletableFuture get(final DataFetchingEnvironment environmen .setEditableDatasetNameEnabled(_featureFlags.isEditableDatasetNameEnabled()) .setShowSeparateSiblings(_featureFlags.isShowSeparateSiblings()) .setShowManageStructuredProperties(_featureFlags.isShowManageStructuredProperties()) + .setEntityVersioningEnabled(_featureFlags.isEntityVersioning()) .build(); appConfig.setFeatureFlags(featureFlagsConfig); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java index 69e049af1e87b7..f32fd03a384005 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java @@ -12,7 +12,9 @@ import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.LinkVersionInput; +import com.linkedin.datahub.graphql.generated.VersionSet; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; @@ -21,24 +23,22 @@ import io.datahubproject.metadata.context.OperationContext; import java.util.List; import java.util.concurrent.CompletableFuture; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang.StringUtils; /** * Currently only supports linking the latest version, but may be modified later to support inserts */ -public class LinkAssetVersionResolver implements DataFetcher> { +@Slf4j +@RequiredArgsConstructor +public class LinkAssetVersionResolver implements DataFetcher> { private final EntityVersioningService entityVersioningService; private final FeatureFlags featureFlags; - public LinkAssetVersionResolver( - EntityVersioningService entityVersioningService, FeatureFlags featureFlags) { - this.entityVersioningService = entityVersioningService; - this.featureFlags = featureFlags; - } - @Override - public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); final LinkVersionInput input = bindArgument(environment.getArgument("input"), LinkVersionInput.class); @@ -75,12 +75,22 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws entityVersioningService.linkLatestVersion( opContext, versionSetUrn, entityUrn, versionPropertiesInput); - return linkResults.stream() - .filter( - ingestResult -> input.getLinkedEntity().equals(ingestResult.getUrn().toString())) - .map(ingestResult -> ingestResult.getUrn().toString()) - .findAny() - .orElse(StringUtils.EMPTY); + String successVersionSetUrn = + linkResults.stream() + .filter( + ingestResult -> + input.getLinkedEntity().equals(ingestResult.getUrn().toString())) + .map(ingestResult -> ingestResult.getUrn().toString()) + .findAny() + .orElse(StringUtils.EMPTY); + + if (StringUtils.isEmpty(successVersionSetUrn)) { + return null; + } + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(versionSetUrn.toString()); + versionSet.setType(EntityType.VERSION_SET); + return versionSet; }, this.getClass().getSimpleName(), "get"); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java index 3d5027a0d668ac..33ab83a59c6771 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java @@ -12,14 +12,18 @@ import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.UnlinkVersionInput; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.metadata.entity.RollbackResult; import com.linkedin.metadata.entity.versioning.EntityVersioningService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import io.datahubproject.metadata.context.OperationContext; +import java.util.List; import java.util.concurrent.CompletableFuture; -public class UnlinkAssetVersionResolver implements DataFetcher> { +public class UnlinkAssetVersionResolver implements DataFetcher> { private final EntityVersioningService entityVersioningService; private final FeatureFlags featureFlags; @@ -31,7 +35,7 @@ public UnlinkAssetVersionResolver( } @Override - public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { if (!featureFlags.isEntityVersioning()) { throw new IllegalAccessError( "Entity Versioning is not configured, please enable before attempting to use this feature."); @@ -58,8 +62,15 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } return GraphQLConcurrencyUtils.supplyAsync( () -> { - entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); - return true; + List results = + entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); + if (results.isEmpty() || results.stream().allMatch(RollbackResult::isNoOp)) { + return null; + } + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(versionSetUrn.toString()); + versionSet.setType(EntityType.VERSION_SET); + return versionSet; }, this.getClass().getSimpleName(), "get"); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java index a01b3aaec9c982..f105a72a1273ee 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java @@ -18,13 +18,18 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.FacetFilterInput; import com.linkedin.datahub.graphql.generated.SearchResults; import com.linkedin.datahub.graphql.generated.SearchSortInput; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; import com.linkedin.datahub.graphql.types.common.mappers.SearchFlagsInputMapper; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; +import com.linkedin.datahub.graphql.types.mappers.UrnSearchResultsMapper; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -33,24 +38,32 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.service.ViewService; import com.linkedin.view.DataHubViewInfo; import io.datahubproject.metadata.context.OperationContext; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; import org.codehaus.plexus.util.CollectionUtils; @Slf4j public class SearchUtils { private SearchUtils() {} + private static final int DEFAULT_SEARCH_COUNT = 10; + private static final int DEFAULT_SCROLL_COUNT = 10; + private static final String DEFAULT_SCROLL_KEEP_ALIVE = "5m"; + /** Entities that are searched by default in Search Across Entities */ public static final List SEARCHABLE_ENTITY_TYPES = ImmutableList.of( @@ -348,4 +361,98 @@ public static List getSortCriteria(@Nullable final SearchSortInpu return sortCriteria; } + + public static CompletableFuture searchAcrossEntities( + QueryContext inputContext, + final EntityClient _entityClient, + final ViewService _viewService, + List inputEntityTypes, + String inputQuery, + Filter baseFilter, + String viewUrn, + List sortCriteria, + com.linkedin.datahub.graphql.generated.SearchFlags inputSearchFlags, + Integer inputCount, + Integer inputStart, + String className) { + + final List entityTypes = + (inputEntityTypes == null || inputEntityTypes.isEmpty()) + ? SEARCHABLE_ENTITY_TYPES + : inputEntityTypes; + final List entityNames = + entityTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList()); + + // escape forward slash since it is a reserved character in Elasticsearch, default to * if + // blank/empty + final String query = + StringUtils.isNotBlank(inputQuery) ? ResolverUtils.escapeForwardSlash(inputQuery) : "*"; + + final Optional searchFlags = + Optional.ofNullable(inputSearchFlags) + .map((flags) -> SearchFlagsInputMapper.map(inputContext, flags)); + final OperationContext context = + inputContext.getOperationContext().withSearchFlags(searchFlags::orElse); + + final int count = Optional.ofNullable(inputCount).orElse(DEFAULT_SEARCH_COUNT); + final int start = Optional.ofNullable(inputStart).orElse(0); + + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + final OperationContext baseContext = inputContext.getOperationContext(); + final Optional maybeResolvedView = + Optional.ofNullable(viewUrn) + .map((urn) -> resolveView(baseContext, _viewService, UrnUtils.getUrn(urn))); + + final List finalEntityNames = + maybeResolvedView + .map( + (view) -> + intersectEntityTypes(entityNames, view.getDefinition().getEntityTypes())) + .orElse(entityNames); + + final Filter finalFilters = + maybeResolvedView + .map((view) -> combineFilters(baseFilter, view.getDefinition().getFilter())) + .orElse(baseFilter); + + log.debug( + "Executing search for multiple entities: entity types {}, query {}, filters: {}, start: {}, count: {}", + finalEntityNames, + query, + finalFilters, + start, + count); + + try { + final SearchResult searchResult = + _entityClient.searchAcrossEntities( + context, + finalEntityNames, + query, + finalFilters, + start, + count, + sortCriteria, + null); + return UrnSearchResultsMapper.map(inputContext, searchResult); + } catch (Exception e) { + log.warn( + "Failed to execute search for multiple entities: entity types {}, query {}, filters: {}, start: {}, count: {}", + finalEntityNames, + query, + finalFilters, + start, + count); + throw new RuntimeException( + "Failed to execute search: " + + String.format( + "entity types %s, query %s, filters: %s, start: %s, count: %s", + finalEntityNames, query, finalFilters, start, count), + e); + } + }, + className, + "searchAcrossEntities"); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java new file mode 100644 index 00000000000000..915e1cf00ebc6b --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolver.java @@ -0,0 +1,87 @@ +package com.linkedin.datahub.graphql.resolvers.versioning; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.*; + +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.SearchAcrossEntitiesInput; +import com.linkedin.datahub.graphql.generated.SearchFlags; +import com.linkedin.datahub.graphql.generated.SearchResults; +import com.linkedin.datahub.graphql.resolvers.ResolverUtils; +import com.linkedin.datahub.graphql.resolvers.search.SearchUtils; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.query.filter.SortOrder; +import com.linkedin.metadata.service.ViewService; +import com.linkedin.metadata.utils.CriterionUtils; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Stream; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** Resolver that executes a searchAcrossEntities only on a version set's versioned entities */ +@Slf4j +@RequiredArgsConstructor +public class VersionsSearchResolver implements DataFetcher> { + + private static final String VERSION_SET_FIELD_NAME = "versionSet"; + + private final EntityClient _entityClient; + private final ViewService _viewService; + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) { + final Entity entity = environment.getSource(); + final QueryContext context = environment.getContext(); + final SearchAcrossEntitiesInput input = + bindArgument(environment.getArgument("input"), SearchAcrossEntitiesInput.class); + + final Criterion versionSetFilter = + CriterionUtils.buildCriterion(VERSION_SET_FIELD_NAME, Condition.EQUAL, entity.getUrn()); + final Filter baseFilter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion().setAnd(new CriterionArray(versionSetFilter)))); + final Filter inputFilter = ResolverUtils.buildFilter(null, input.getOrFilters()); + + final List initialSortCriteria = + SearchUtils.getSortCriteria(input.getSortInput()); + final List sortCriteria = + Stream.concat( + initialSortCriteria.stream(), + Stream.of( + new SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(SortOrder.DESCENDING))) + .toList(); + + SearchFlags searchFlags = Optional.ofNullable(input.getSearchFlags()).orElse(new SearchFlags()); + searchFlags.setFilterNonLatestVersions(false); + + return SearchUtils.searchAcrossEntities( + context, + _entityClient, + _viewService, + input.getTypes(), + input.getQuery(), + SearchUtils.combineFilters(inputFilter, baseFilter), + input.getViewUrn(), + sortCriteria, + searchFlags, + input.getCount(), + input.getStart(), + this.getClass().getSimpleName()); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java index 9f5025ccf303a2..0b3a445175c4c1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/SearchFlagsInputMapper.java @@ -69,6 +69,9 @@ public com.linkedin.metadata.query.SearchFlags apply( result.setCustomHighlightingFields( new StringArray(searchFlags.getCustomHighlightingFields())); } + if (searchFlags.getFilterNonLatestVersions() != null) { + result.setFilterNonLatestVersions(searchFlags.getFilterNonLatestVersions()); + } return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java index eae33e6da2e56d..b815c1b1c1dd9f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java @@ -41,6 +41,7 @@ import com.linkedin.datahub.graphql.generated.StructuredPropertyEntity; import com.linkedin.datahub.graphql.generated.Tag; import com.linkedin.datahub.graphql.generated.Test; +import com.linkedin.datahub.graphql.generated.VersionSet; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -231,6 +232,11 @@ public Entity apply(@Nullable QueryContext context, Urn input) { ((DataProcessInstance) partialEntity).setUrn(input.toString()); ((DataProcessInstance) partialEntity).setType(EntityType.DATA_PROCESS_INSTANCE); } + if (input.getEntityType().equals(VERSION_SET_ENTITY_NAME)) { + partialEntity = new VersionSet(); + ((VersionSet) partialEntity).setUrn(input.toString()); + ((VersionSet) partialEntity).setType(EntityType.VERSION_SET); + } return partialEntity; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java index 28c9c8936fdbfb..d721f5a5fb522d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java @@ -8,7 +8,6 @@ import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.datahub.graphql.QueryContext; -import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.common.mappers.AuditStampMapper; @@ -80,10 +79,6 @@ public DataProcessInstance apply( DataPlatformInstance dataPlatformInstance = new DataPlatformInstance(dataMap); dataProcessInstance.setDataPlatformInstance( DataPlatformInstanceAspectMapper.map(context, dataPlatformInstance)); - DataPlatform dataPlatform = new DataPlatform(); - dataPlatform.setUrn(dataPlatformInstance.getPlatform().toString()); - dataPlatform.setType(EntityType.DATA_PLATFORM); - dataProcessInstance.setPlatform(dataPlatform); }); mappingHelper.mapToResult( SUB_TYPES_ASPECT_NAME, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java index 6a3f9cb9b21f38..74ef4cf125cd24 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/DatasetType.java @@ -89,7 +89,8 @@ public class DatasetType ACCESS_ASPECT_NAME, STRUCTURED_PROPERTIES_ASPECT_NAME, FORMS_ASPECT_NAME, - SUB_TYPES_ASPECT_NAME); + SUB_TYPES_ASPECT_NAME, + VERSION_PROPERTIES_ASPECT_NAME); private static final Set FACET_FIELDS = ImmutableSet.of("origin", "platform"); private static final String ENTITY_NAME = "dataset"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java index e411014c23c89b..aa7033b180e80e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataset/mappers/DatasetMapper.java @@ -17,6 +17,7 @@ import com.linkedin.common.Status; import com.linkedin.common.SubTypes; import com.linkedin.common.TimeStamp; +import com.linkedin.common.VersionProperties; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.datahub.graphql.QueryContext; @@ -48,6 +49,7 @@ import com.linkedin.datahub.graphql.types.rolemetadata.mappers.AccessMapper; import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertiesMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; +import com.linkedin.datahub.graphql.types.versioning.VersionPropertiesMapper; import com.linkedin.dataset.DatasetDeprecation; import com.linkedin.dataset.DatasetProperties; import com.linkedin.dataset.EditableDatasetProperties; @@ -183,6 +185,11 @@ public Dataset apply( SUB_TYPES_ASPECT_NAME, (dashboard, dataMap) -> dashboard.setSubTypes(SubTypesMapper.map(context, new SubTypes(dataMap)))); + mappingHelper.mapToResult( + VERSION_PROPERTIES_ASPECT_NAME, + (entity, dataMap) -> + entity.setVersionProperties( + VersionPropertiesMapper.map(context, new VersionProperties(dataMap)))); if (context != null && !canView(context.getOperationContext(), entityUrn)) { return AuthorizationUtils.restrictEntity(mappingHelper.getResult(), Dataset.class); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java index 5b72c2b3c11c5e..334faf753cb8b5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java @@ -77,9 +77,6 @@ public class EntityTypeUrnMapper { .put( Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME, "urn:li:entityType:datahub.businessAttribute") - .put( - Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME, - "urn:li:entityType:datahub.dataProcessInstance") .build(); private static final Map ENTITY_TYPE_URN_TO_NAME = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java index 7102fd4aed9743..11e6b5180f8c1c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelMapper.java @@ -13,6 +13,7 @@ import com.linkedin.common.InstitutionalMemory; import com.linkedin.common.Ownership; import com.linkedin.common.Status; +import com.linkedin.common.VersionProperties; import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; @@ -38,6 +39,7 @@ import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.datahub.graphql.types.structuredproperty.StructuredPropertiesMapper; import com.linkedin.datahub.graphql.types.tag.mappers.GlobalTagsMapper; +import com.linkedin.datahub.graphql.types.versioning.VersionPropertiesMapper; import com.linkedin.domain.Domains; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspectMap; @@ -180,6 +182,11 @@ public MLModel apply( FORMS_ASPECT_NAME, ((entity, dataMap) -> entity.setForms(FormsMapper.map(new Forms(dataMap), entityUrn.toString())))); + mappingHelper.mapToResult( + VERSION_PROPERTIES_ASPECT_NAME, + (entity, dataMap) -> + entity.setVersionProperties( + VersionPropertiesMapper.map(context, new VersionProperties(dataMap)))); if (context != null && !canView(context.getOperationContext(), entityUrn)) { return AuthorizationUtils.restrictEntity(mappingHelper.getResult(), MLModel.class); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java new file mode 100644 index 00000000000000..f89ebdc9f2b043 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionPropertiesMapper.java @@ -0,0 +1,53 @@ +package com.linkedin.datahub.graphql.types.versioning; + +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.VersionProperties; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.datahub.graphql.types.mappers.MapperUtils; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.datahub.graphql.types.mlmodel.mappers.VersionTagMapper; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class VersionPropertiesMapper + implements ModelMapper { + public static final VersionPropertiesMapper INSTANCE = new VersionPropertiesMapper(); + + public static VersionProperties map( + @Nullable QueryContext context, + @Nonnull final com.linkedin.common.VersionProperties versionProperties) { + return INSTANCE.apply(context, versionProperties); + } + + @Override + public VersionProperties apply( + @Nullable QueryContext context, @Nonnull com.linkedin.common.VersionProperties input) { + final VersionProperties result = new VersionProperties(); + + result.setVersionSet( + VersionSet.builder() + .setUrn(input.getVersionSet().toString()) + .setType(EntityType.VERSION_SET) + .build()); + + result.setVersion(VersionTagMapper.map(context, input.getVersion())); + result.setAliases( + input.getAliases().stream() + .map(alias -> VersionTagMapper.map(context, alias)) + .collect(Collectors.toList())); + result.setComment(input.getComment()); + result.setIsLatest(Boolean.TRUE.equals(input.isIsLatest())); + + if (input.getMetadataCreatedTimestamp() != null) { + result.setCreated(MapperUtils.createResolvedAuditStamp(input.getMetadataCreatedTimestamp())); + } + if (input.getSourceCreatedTimestamp() != null) { + result.setCreatedInSource( + MapperUtils.createResolvedAuditStamp(input.getSourceCreatedTimestamp())); + } + + return result; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java new file mode 100644 index 00000000000000..3a07115ece5f6e --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetMapper.java @@ -0,0 +1,47 @@ +package com.linkedin.datahub.graphql.types.versioning; + +import static com.linkedin.metadata.Constants.VERSION_SET_PROPERTIES_ASPECT_NAME; + +import com.linkedin.data.DataMap; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper; +import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; +import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspectMap; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class VersionSetMapper implements ModelMapper { + + public static final VersionSetMapper INSTANCE = new VersionSetMapper(); + + public static VersionSet map( + @Nullable QueryContext context, @Nonnull final EntityResponse entityResponse) { + return INSTANCE.apply(context, entityResponse); + } + + @Override + public VersionSet apply(@Nullable QueryContext context, @Nonnull EntityResponse entityResponse) { + final VersionSet result = new VersionSet(); + result.setUrn(entityResponse.getUrn().toString()); + result.setType(EntityType.VERSION_SET); + + EnvelopedAspectMap aspectMap = entityResponse.getAspects(); + MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); + mappingHelper.mapToResult( + VERSION_SET_PROPERTIES_ASPECT_NAME, + (versionSet, dataMap) -> mapVersionSetProperties(context, versionSet, dataMap)); + + return result; + } + + private void mapVersionSetProperties( + @Nullable QueryContext context, @Nonnull VersionSet versionSet, @Nonnull DataMap dataMap) { + com.linkedin.versionset.VersionSetProperties versionProperties = + new com.linkedin.versionset.VersionSetProperties(dataMap); + versionSet.setLatestVersion(UrnToEntityMapper.map(context, versionProperties.getLatest())); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java new file mode 100644 index 00000000000000..ed2beff4530949 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/versioning/VersionSetType.java @@ -0,0 +1,79 @@ +package com.linkedin.datahub.graphql.types.versioning; + +import static com.linkedin.metadata.Constants.*; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import graphql.execution.DataFetcherResult; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class VersionSetType + implements com.linkedin.datahub.graphql.types.EntityType { + + public static final Set ASPECTS_TO_FETCH = + ImmutableSet.of(VERSION_SET_PROPERTIES_ASPECT_NAME); + private final EntityClient _entityClient; + + @Override + public EntityType type() { + return EntityType.VERSION_SET; + } + + @Override + public Function getKeyProvider() { + return Entity::getUrn; + } + + @Override + public Class objectClass() { + return VersionSet.class; + } + + @Override + public List> batchLoad( + @Nonnull List urns, @Nonnull QueryContext context) throws Exception { + final List versionSetUrns = + urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + + try { + final Map entities = + _entityClient.batchGetV2( + context.getOperationContext(), + VERSION_SET_ENTITY_NAME, + new HashSet<>(versionSetUrns), + ASPECTS_TO_FETCH); + + final List gmsResults = new ArrayList<>(); + for (Urn urn : versionSetUrns) { + gmsResults.add(entities.getOrDefault(urn, null)); + } + return gmsResults.stream() + .map( + gmsResult -> + gmsResult == null + ? null + : DataFetcherResult.newResult() + .data(VersionSetMapper.map(context, gmsResult)) + .build()) + .collect(Collectors.toList()); + } catch (Exception e) { + throw new RuntimeException("Failed to batch load Queries", e); + } + } +} diff --git a/datahub-graphql-core/src/main/resources/app.graphql b/datahub-graphql-core/src/main/resources/app.graphql index 28688903687235..ca7f89415f6b87 100644 --- a/datahub-graphql-core/src/main/resources/app.graphql +++ b/datahub-graphql-core/src/main/resources/app.graphql @@ -531,6 +531,11 @@ type FeatureFlagsConfig { If turned on, show the manage structured properties tab in the govern dropdown """ showManageStructuredProperties: Boolean! + + """ + If turned on, exposes the versioning feature by allowing users to link entities in the UI. + """ + entityVersioningEnabled: Boolean! } """ @@ -573,4 +578,4 @@ type DocPropagationSettings { The default doc propagation setting for the platform. """ docColumnPropagation: Boolean -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index b47be7ae32b2c4..64c79b05745ded 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -956,16 +956,6 @@ type Mutation { Remove Business Attribute """ removeBusinessAttribute(input: AddBusinessAttributeInput!): Boolean - - """ - Link the latest versioned entity to a Version Set - """ - linkAssetVersion(input: LinkVersionInput!): String - - """ - Unlink a versioned entity from a Version Set - """ - unlinkAssetVersion(input: UnlinkVersionInput!): Boolean } """ @@ -1231,6 +1221,11 @@ enum EntityType { A Business Attribute """ BUSINESS_ATTRIBUTE + + """ + A set of versioned entities, representing a single source / logical entity over time + """ + VERSION_SET } """ @@ -12921,56 +12916,6 @@ input ListBusinessAttributesInput { query: String } -""" -Input for linking a versioned entity to a Version Set -""" -input LinkVersionInput { - """ - The target version set - """ - versionSet: String! - - """ - The target versioned entity to link - """ - linkedEntity: String! - - """ - Version Tag label for the version, should be unique within a Version Set - """ - version: String! - - """ - Optional timestamp from the source system - """ - sourceTimestamp: Long - - """ - Optional creator from the source system, will be converted to an Urn - """ - sourceCreator: String - - """ - Optional comment about the version - """ - comment: String -} - -""" -Input for unlinking a versioned entity from a Version Set -""" -input UnlinkVersionInput { - """ - The target version set - """ - versionSet: String - - """ - The target versioned entity to unlink - """ - unlinkedEntity: String -} - """ The result obtained when listing Business Attribute """ @@ -13084,11 +13029,6 @@ extend type DataProcessInstance { """ container: Container - """ - Standardized platform urn where the data process instance is defined - """ - platform: DataPlatform! - """ Recursively get the lineage of containers for this entity """ diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 82bfb9ee26fc42..d8f17faa3d11c2 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -172,6 +172,11 @@ input SearchFlags { Whether or not to fetch and request for structured property facets when doing a search """ includeStructuredPropertyFacets: Boolean + + """ + Determines whether to filter out any non-latest entity version if entity is part of a Version Set, default true + """ + filterNonLatestVersions: Boolean } """ @@ -1497,4 +1502,4 @@ input GroupingCriterion { """ groupingEntityType: EntityType! -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/resources/versioning.graphql b/datahub-graphql-core/src/main/resources/versioning.graphql new file mode 100644 index 00000000000000..4a63463509c84d --- /dev/null +++ b/datahub-graphql-core/src/main/resources/versioning.graphql @@ -0,0 +1,148 @@ +type VersionSet implements Entity { + """ + The primary key of the VersionSet + """ + urn: String! + + """ + The standard Entity Type + """ + type: EntityType! + + """ + Granular API for querying edges extending from this entity + """ + relationships(input: RelationshipsInput!): EntityRelationshipsResult + + """ + The latest versioned entity linked to in this version set + """ + latestVersion: Entity + + """ + Executes a search on all versioned entities linked to this version set + By default sorts by sortId in descending order + """ + versionsSearch(input: SearchAcrossEntitiesInput!): SearchResults +} + +type VersionProperties { + """ + The linked Version Set entity that ties multiple versioned assets together + """ + versionSet: VersionSet! + + """ + Label for this versioned asset, should be unique within a version set (not enforced) + """ + version: VersionTag! + + """ + Additional version identifiers for this versioned asset. + """ + aliases: [VersionTag!]! + + """ + Comment documenting what this version was created for, changes, or represents + """ + comment: String + + """ + Whether this version is currently the latest in its verison set + """ + isLatest: Boolean! + + """ + Timestamp reflecting when the metadata for this version was created in DataHub + """ + created: ResolvedAuditStamp + + """ + Timestamp reflecting when the metadata for this version was created in DataHub + """ + createdInSource: ResolvedAuditStamp +} + +interface SupportsVersions { + """ + Indicates that this entity is versioned and provides information about the version. + """ + versionProperties: VersionProperties +} + +extend type Dataset implements SupportsVersions { + versionProperties: VersionProperties +} + +extend type MLModel implements SupportsVersions { + versionProperties: VersionProperties +} + +extend type Query { + """ + Fetch a Version Set by its URN + """ + versionSet(urn: String!): VersionSet +} + +""" +Input for linking a versioned entity to a Version Set +""" +input LinkVersionInput { + """ + The target version set + """ + versionSet: String! + + """ + The target versioned entity to link + """ + linkedEntity: String! + + """ + Version Tag label for the version, should be unique within a version set (not enforced) + """ + version: String! + + """ + Optional timestamp from the source system + """ + sourceTimestamp: Long + + """ + Optional creator from the source system, will be converted to an Urn + """ + sourceCreator: String + + """ + Optional comment about the version + """ + comment: String +} + +""" +Input for unlinking a versioned entity from a Version Set +""" +input UnlinkVersionInput { + """ + The target version set + """ + versionSet: String + + """ + The target versioned entity to unlink + """ + unlinkedEntity: String +} + +extend type Mutation { + """ + Link the latest versioned entity to a Version Set + """ + linkAssetVersion(input: LinkVersionInput!): VersionSet + + """ + Unlink a versioned entity from a Version Set + """ + unlinkAssetVersion(input: UnlinkVersionInput!): VersionSet +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java index 690856263fccc5..c2eb92f4d1cd4c 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java @@ -56,8 +56,7 @@ public void testGetSuccessful() throws Exception { Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); - String result = resolver.get(mockEnv).get(); - assertEquals(result, TEST_ENTITY_URN); + assertEquals(resolver.get(mockEnv).get().getUrn(), TEST_VERSION_SET_URN); } @Test diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java index 0000ad24a04537..e162ce96e627c6 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java @@ -48,7 +48,7 @@ public void testGetSuccessful() throws Exception { Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); Mockito.when(mockEnv.getContext()).thenReturn(mockContext); - assertTrue(resolver.get(mockEnv).get()); + assertEquals(resolver.get(mockEnv).get(), null); Mockito.verify(mockService) .unlinkVersion( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java new file mode 100644 index 00000000000000..3554df074df698 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/versioning/VersionsSearchResolverTest.java @@ -0,0 +1,294 @@ +package com.linkedin.datahub.graphql.resolvers.versioning; + +import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.*; +import static org.mockito.ArgumentMatchers.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertThrows; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.template.StringArray; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.AndFilterInput; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.FacetFilterInput; +import com.linkedin.datahub.graphql.generated.SearchAcrossEntitiesInput; +import com.linkedin.datahub.graphql.generated.SearchFlags; +import com.linkedin.datahub.graphql.generated.SearchResults; +import com.linkedin.datahub.graphql.generated.SearchSortInput; +import com.linkedin.datahub.graphql.generated.SortCriterion; +import com.linkedin.datahub.graphql.generated.SortOrder; +import com.linkedin.datahub.graphql.generated.VersionSet; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.SearchResultMetadata; +import com.linkedin.metadata.service.ViewService; +import com.linkedin.metadata.utils.CriterionUtils; +import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.view.DataHubViewDefinition; +import com.linkedin.view.DataHubViewInfo; +import com.linkedin.view.DataHubViewType; +import graphql.schema.DataFetchingEnvironment; +import java.util.List; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class VersionsSearchResolverTest { + + private static final String VERSION_SET_URN = "urn:li:versionSet:(my_version_set,dataset)"; + private static final Urn TEST_VIEW_URN = UrnUtils.getUrn("urn:li:dataHubView:test"); + private static final Urn TEST_USER_URN = UrnUtils.getUrn("urn:li:corpuser:test"); + + private static final SearchAcrossEntitiesInput BASIC_INPUT = + new SearchAcrossEntitiesInput( + List.of(EntityType.DATASET), "", 0, 10, null, null, null, null, null); + + private static final SearchAcrossEntitiesInput COMPLEX_INPUT = + new SearchAcrossEntitiesInput( + List.of(EntityType.CHART, EntityType.DATASET), + "query", + 2, + 5, + null, + List.of( + AndFilterInput.builder() + .setAnd( + List.of( + FacetFilterInput.builder() + .setField("field1") + .setValues(List.of("1", "2")) + .build(), + FacetFilterInput.builder() + .setField("field2") + .setValues(List.of("a")) + .build())) + .build(), + AndFilterInput.builder() + .setAnd( + List.of( + FacetFilterInput.builder() + .setField("field3") + .setValues(List.of("3", "4")) + .build(), + FacetFilterInput.builder() + .setField("field4") + .setValues(List.of("b")) + .build())) + .build()), + TEST_VIEW_URN.toString(), + SearchFlags.builder().setSkipCache(true).build(), + SearchSortInput.builder() + .setSortCriteria( + List.of( + SortCriterion.builder() + .setField("sortField1") + .setSortOrder(SortOrder.DESCENDING) + .build(), + SortCriterion.builder() + .setField("sortField2") + .setSortOrder(SortOrder.ASCENDING) + .build())) + .build()); + + @Test + public void testGetSuccessBasic() throws Exception { + EntityClient mockEntityClient = initMockEntityClient(); + ViewService mockViewService = Mockito.mock(ViewService.class); + VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(BASIC_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(VERSION_SET_URN); + Mockito.when(mockEnv.getSource()).thenReturn(versionSet); + + SearchResults result = resolver.get(mockEnv).get(); + + // Validate the result + assertEquals(result.getSearchResults().size(), 0); + + // Validate that we called the search service correctly + Mockito.verify(mockEntityClient, Mockito.times(1)) + .searchAcrossEntities( + Mockito.argThat( + context -> + !context.getSearchContext().getSearchFlags().isFilterNonLatestVersions()), + Mockito.eq(List.of(Constants.DATASET_ENTITY_NAME)), + Mockito.eq("*"), + Mockito.eq( + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, VERSION_SET_URN)))))), + Mockito.eq(0), + Mockito.eq(10), + Mockito.eq( + List.of( + new com.linkedin.metadata.query.filter.SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING))), + any()); + } + + @Test + public void testGetSuccessComplex() throws Exception { + EntityClient mockEntityClient = initMockEntityClient(); + + Filter viewFilter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + List.of(buildCriterion("viewField", Condition.EQUAL, "test")))))); + DataHubViewInfo viewInfo = + new DataHubViewInfo() + .setName("test") + .setType(DataHubViewType.GLOBAL) + .setCreated(new AuditStamp().setTime(0L).setActor(TEST_USER_URN)) + .setLastModified(new AuditStamp().setTime(0L).setActor(TEST_USER_URN)) + .setDefinition( + new DataHubViewDefinition() + .setEntityTypes( + new StringArray( + List.of( + Constants.DATASET_ENTITY_NAME, Constants.DASHBOARD_ENTITY_NAME))) + .setFilter(viewFilter)); + ViewService mockViewService = Mockito.mock(ViewService.class); + Mockito.when(mockViewService.getViewInfo(any(), Mockito.eq(TEST_VIEW_URN))) + .thenReturn(viewInfo); + + VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(COMPLEX_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(VERSION_SET_URN); + Mockito.when(mockEnv.getSource()).thenReturn(versionSet); + + SearchResults result = resolver.get(mockEnv).get(); + + // Validate the result + assertEquals(result.getSearchResults().size(), 0); + + // Validate that we called the search service correctly + Mockito.verify(mockEntityClient, Mockito.times(1)) + .searchAcrossEntities( + Mockito.argThat( + context -> + !context.getSearchContext().getSearchFlags().isFilterNonLatestVersions() + && context.getSearchContext().getSearchFlags().isSkipCache()), + Mockito.eq(List.of(Constants.DATASET_ENTITY_NAME)), + Mockito.eq("query"), + Mockito.eq( + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + CriterionUtils.buildCriterion( + "field1", Condition.EQUAL, "1", "2"), + CriterionUtils.buildCriterion( + "field2", Condition.EQUAL, "a"), + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, VERSION_SET_URN), + CriterionUtils.buildCriterion( + "viewField", Condition.EQUAL, "test"))), + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + CriterionUtils.buildCriterion( + "field3", Condition.EQUAL, "3", "4"), + CriterionUtils.buildCriterion( + "field4", Condition.EQUAL, "b"), + CriterionUtils.buildCriterion( + "versionSet", Condition.EQUAL, VERSION_SET_URN), + CriterionUtils.buildCriterion( + "viewField", Condition.EQUAL, "test")))))), + Mockito.eq(2), + Mockito.eq(5), + Mockito.eq( + List.of( + new com.linkedin.metadata.query.filter.SortCriterion() + .setField("sortField1") + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING), + new com.linkedin.metadata.query.filter.SortCriterion() + .setField("sortField2") + .setOrder(com.linkedin.metadata.query.filter.SortOrder.ASCENDING), + new com.linkedin.metadata.query.filter.SortCriterion() + .setField(VERSION_SORT_ID_FIELD_NAME) + .setOrder(com.linkedin.metadata.query.filter.SortOrder.DESCENDING))), + any()); + } + + @Test + public void testThrowsError() throws Exception { + EntityClient mockEntityClient = initMockEntityClient(); + ViewService mockViewService = Mockito.mock(ViewService.class); + + Mockito.when( + mockEntityClient.searchAcrossEntities( + any(), any(), any(), any(), Mockito.anyInt(), Mockito.anyInt(), any(), any())) + .thenThrow(new RemoteInvocationException()); + + VersionsSearchResolver resolver = new VersionsSearchResolver(mockEntityClient, mockViewService); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(BASIC_INPUT); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + VersionSet versionSet = new VersionSet(); + versionSet.setUrn(VERSION_SET_URN); + Mockito.when(mockEnv.getSource()).thenReturn(versionSet); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } + + private EntityClient initMockEntityClient() throws Exception { + EntityClient client = Mockito.mock(EntityClient.class); + + Mockito.when( + client.searchAcrossEntities( + any(), + any(), + Mockito.anyString(), + any(), + Mockito.anyInt(), + Mockito.anyInt(), + any(), + Mockito.eq(null))) + .thenReturn( + new SearchResult() + .setEntities(new SearchEntityArray()) + .setNumEntities(0) + .setFrom(0) + .setPageSize(0) + .setMetadata(new SearchResultMetadata())); + + return client; + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java index dc1ce935ad5ecd..cd9d58b54e6b3a 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java @@ -80,9 +80,10 @@ public void testMapPlatformInstance() throws Exception { DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); assertNotNull(instance.getDataPlatformInstance()); - assertNotNull(instance.getPlatform()); - assertEquals(instance.getPlatform().getUrn(), TEST_PLATFORM_URN); - assertEquals(instance.getPlatform().getType(), EntityType.DATA_PLATFORM); + assertNotNull(instance.getDataPlatformInstance().getPlatform()); + assertEquals(instance.getDataPlatformInstance().getPlatform().getUrn(), TEST_PLATFORM_URN); + assertEquals( + instance.getDataPlatformInstance().getPlatform().getType(), EntityType.DATA_PLATFORM); } @Test diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapperTest.java new file mode 100644 index 00000000000000..79cc7725b1fc7f --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapperTest.java @@ -0,0 +1,20 @@ +package com.linkedin.datahub.graphql.types.entitytype; + +import static org.testng.Assert.*; + +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.metadata.Constants; +import org.testng.annotations.Test; + +public class EntityTypeMapperTest { + + @Test + public void testGetType() throws Exception { + assertEquals(EntityTypeMapper.getType(Constants.DATASET_ENTITY_NAME), EntityType.DATASET); + } + + @Test + public void testGetName() throws Exception { + assertEquals(EntityTypeMapper.getName(EntityType.DATASET), Constants.DATASET_ENTITY_NAME); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapperTest.java new file mode 100644 index 00000000000000..ed16226d0685ee --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapperTest.java @@ -0,0 +1,30 @@ +package com.linkedin.datahub.graphql.types.entitytype; + +import static org.testng.Assert.*; + +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.metadata.Constants; +import org.testng.annotations.Test; + +public class EntityTypeUrnMapperTest { + + @Test + public void testGetName() throws Exception { + assertEquals( + EntityTypeUrnMapper.getName("urn:li:entityType:datahub.dataset"), + Constants.DATASET_ENTITY_NAME); + } + + @Test + public void testGetEntityType() throws Exception { + assertEquals( + EntityTypeUrnMapper.getEntityType("urn:li:entityType:datahub.dataset"), EntityType.DATASET); + } + + @Test + public void testGetEntityTypeUrn() throws Exception { + assertEquals( + EntityTypeUrnMapper.getEntityTypeUrn(Constants.DATASET_ENTITY_NAME), + "urn:li:entityType:datahub.dataset"); + } +} diff --git a/datahub-web-react/package.json b/datahub-web-react/package.json index a608698d7602c4..0ff68de2481ed0 100644 --- a/datahub-web-react/package.json +++ b/datahub-web-react/package.json @@ -152,7 +152,7 @@ "prettier": "^2.8.8", "source-map-explorer": "^2.5.2", "storybook": "^8.1.11", - "vite": "^4.5.5", + "vite": "^4.5.6", "vite-plugin-babel-macros": "^1.0.6", "vite-plugin-static-copy": "^0.17.0", "vite-plugin-svgr": "^4.1.0", diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index 2da9e733eb4072..063b784920e234 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -645,6 +645,7 @@ export const dataset3 = { structuredProperties: null, forms: null, activeIncidents: null, + versionProperties: null, } as Dataset; export const dataset3WithSchema = { diff --git a/datahub-web-react/src/app/context/CustomUserContext.tsx b/datahub-web-react/src/app/context/CustomUserContext.tsx new file mode 100644 index 00000000000000..016bbe29684ea5 --- /dev/null +++ b/datahub-web-react/src/app/context/CustomUserContext.tsx @@ -0,0 +1,7 @@ +/** + * Custom User Context State - This is a custom user context state and can be overriden in specific fork of DataHub. + * The below type can be customized with specific object properties as well if needed. + */ +export type CustomUserContextState = Record; + +export const DEFAULT_CUSTOM_STATE: CustomUserContextState = {}; diff --git a/datahub-web-react/src/app/context/userContext.tsx b/datahub-web-react/src/app/context/userContext.tsx index c9b8adafd9722f..a728e01ddc29ae 100644 --- a/datahub-web-react/src/app/context/userContext.tsx +++ b/datahub-web-react/src/app/context/userContext.tsx @@ -1,5 +1,6 @@ import React from 'react'; import { CorpUser, PlatformPrivileges } from '../../types.generated'; +import { CustomUserContextState, DEFAULT_CUSTOM_STATE } from './CustomUserContext'; /** * Local State is persisted to local storage. @@ -22,6 +23,7 @@ export type State = { loadedPersonalDefaultViewUrn: boolean; hasSetDefaultView: boolean; }; + customState?: CustomUserContextState; }; /** @@ -51,6 +53,7 @@ export const DEFAULT_STATE: State = { loadedPersonalDefaultViewUrn: false, hasSetDefaultView: false, }, + customState: DEFAULT_CUSTOM_STATE, }; export const DEFAULT_CONTEXT = { diff --git a/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx b/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx index 4834a026ad94a3..9bb9bd745d1ee6 100644 --- a/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx +++ b/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx @@ -161,6 +161,7 @@ export class DataProcessInstanceEntity implements Entity { return { name, externalUrl, + platform: processInstance?.dataPlatformInstance?.platform, }; }; @@ -174,9 +175,10 @@ export class DataProcessInstanceEntity implements Entity { subType={data.subTypes?.typeNames?.[0]} description="" platformName={ - data?.platform?.properties?.displayName || capitalizeFirstLetterOnly(data?.platform?.name) + data?.dataPlatformInstance?.platform?.properties?.displayName || + capitalizeFirstLetterOnly(data?.dataPlatformInstance?.platform?.name) } - platformLogo={data.platform.properties?.logoUrl} + platformLogo={data?.dataPlatformInstance?.platform?.properties?.logoUrl} owners={null} globalTags={null} // domain={data.domain?.domain} @@ -201,9 +203,10 @@ export class DataProcessInstanceEntity implements Entity { subType={data.subTypes?.typeNames?.[0]} description="" platformName={ - data?.platform?.properties?.displayName || capitalizeFirstLetterOnly(data?.platform?.name) + data?.dataPlatformInstance?.platform?.properties?.displayName || + capitalizeFirstLetterOnly(data?.dataPlatformInstance?.platform?.name) } - platformLogo={data.platform.properties?.logoUrl} + platformLogo={data.dataPlatformInstance?.platform?.properties?.logoUrl} platformInstanceId={data.dataPlatformInstance?.instanceId} owners={null} globalTags={null} @@ -231,8 +234,8 @@ export class DataProcessInstanceEntity implements Entity { name: this.displayName(entity), type: EntityType.DataProcessInstance, subtype: entity?.subTypes?.typeNames?.[0], - icon: entity?.platform?.properties?.logoUrl || undefined, - platform: entity?.platform, + icon: entity?.dataPlatformInstance?.platform?.properties?.logoUrl || undefined, + platform: entity?.dataPlatformInstance?.platform, container: entity?.container, // health: entity?.health || undefined, }; diff --git a/datahub-web-react/src/app/entity/domain/DataProductsTab/DataProductsTab.tsx b/datahub-web-react/src/app/entity/domain/DataProductsTab/DataProductsTab.tsx index 15cc99127f3500..39f89979dd95a0 100644 --- a/datahub-web-react/src/app/entity/domain/DataProductsTab/DataProductsTab.tsx +++ b/datahub-web-react/src/app/entity/domain/DataProductsTab/DataProductsTab.tsx @@ -68,6 +68,7 @@ export default function DataProductsTab() { searchFlags: { skipCache: true }, }, }, + fetchPolicy: 'no-cache', }); const totalResults = data?.searchAcrossEntities?.total || 0; const searchResults = data?.searchAcrossEntities?.searchResults?.map((r) => r.entity) || []; diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx index 8976629d9ef0b1..549724bd1945d9 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/header/EntityName.tsx @@ -48,9 +48,9 @@ function EntityName(props: Props) { setIsEditing(false); return; } - setUpdatedName(name); updateName({ variables: { input: { name, urn } } }) .then(() => { + setUpdatedName(name); setIsEditing(false); message.success({ content: 'Name Updated', duration: 2 }); refetch(); diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx index 62b967e8f7b30d..e57666471df1a6 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/EditOwnersModal.tsx @@ -78,10 +78,26 @@ export const EditOwnersModal = ({ const renderSearchResult = (entity: Entity) => { const avatarUrl = (entity.type === EntityType.CorpUser && (entity as CorpUser).editableProperties?.pictureLink) || undefined; + const corpUserDepartmentName = + (entity.type === EntityType.CorpUser && (entity as CorpUser).properties?.departmentName) || ''; + const corpUserId = (entity.type === EntityType.CorpUser && (entity as CorpUser).username) || ''; + const corpUserTitle = (entity.type === EntityType.CorpUser && (entity as CorpUser).properties?.title) || ''; const displayName = entityRegistry.getDisplayName(entity.type, entity); + return ( - - + } + > + ); }; @@ -381,6 +397,7 @@ export const EditOwnersModal = ({ value: owner.value.ownerUrn, label: owner.label, }))} + optionLabelProp="label" > {ownerSearchOptions} diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx index 898fbd6a6d9268..68e6c8d3436fb9 100644 --- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx @@ -200,7 +200,7 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) setVersion(event.target.value)} onBlur={(event) => handleBlur(event, setVersion)} diff --git a/datahub-web-react/src/app/shared/OwnerLabel.tsx b/datahub-web-react/src/app/shared/OwnerLabel.tsx index de3c03dea2ba4a..fb670aa56d7881 100644 --- a/datahub-web-react/src/app/shared/OwnerLabel.tsx +++ b/datahub-web-react/src/app/shared/OwnerLabel.tsx @@ -20,14 +20,22 @@ type Props = { name: string; avatarUrl: string | undefined; type: EntityType; + corpUserId?: string; + corpUserTitle?: string; + corpUserDepartmentName?: string; }; -export const OwnerLabel = ({ name, avatarUrl, type }: Props) => { +export const OwnerLabel = ({ name, avatarUrl, type, corpUserId, corpUserTitle, corpUserDepartmentName }: Props) => { + const subHeader = [corpUserId, corpUserTitle, corpUserDepartmentName].filter(Boolean).join(' - '); + return ( -
{name}
+
+
{name}
+ {subHeader &&
{subHeader}
} +
); diff --git a/datahub-web-react/src/appConfigContext.tsx b/datahub-web-react/src/appConfigContext.tsx index d7fef85db4b625..8ac18d0142b4e9 100644 --- a/datahub-web-react/src/appConfigContext.tsx +++ b/datahub-web-react/src/appConfigContext.tsx @@ -57,6 +57,7 @@ export const DEFAULT_APP_CONFIG = { editableDatasetNameEnabled: false, showSeparateSiblings: false, showManageStructuredProperties: false, + entityVersioningEnabled: false, }, }; diff --git a/datahub-web-react/src/graphql/app.graphql b/datahub-web-react/src/graphql/app.graphql index 0d1999f82f77cd..c1fe50d7620a3c 100644 --- a/datahub-web-react/src/graphql/app.graphql +++ b/datahub-web-react/src/graphql/app.graphql @@ -72,6 +72,7 @@ query appConfig { editableDatasetNameEnabled showSeparateSiblings showManageStructuredProperties + entityVersioningEnabled } } } diff --git a/datahub-web-react/src/graphql/dataProcessInstance.graphql b/datahub-web-react/src/graphql/dataProcessInstance.graphql index 8f55ca4903d527..442f8db0a933b2 100644 --- a/datahub-web-react/src/graphql/dataProcessInstance.graphql +++ b/datahub-web-react/src/graphql/dataProcessInstance.graphql @@ -67,9 +67,6 @@ fragment processInstanceRelationshipResults on EntityRelationshipsResult { fragment dataProcessInstanceFields on DataProcessInstance { urn type - platform { - ...platformFields - } parentContainers { ...parentContainersFields } @@ -125,9 +122,6 @@ query getDataProcessInstance($urn: String!) { dataProcessInstance(urn: $urn) { urn type - platform { - ...platformFields - } parentContainers { ...parentContainersFields } diff --git a/datahub-web-react/src/graphql/dataset.graphql b/datahub-web-react/src/graphql/dataset.graphql index fcca919f614235..8bbeb304aae2cc 100644 --- a/datahub-web-react/src/graphql/dataset.graphql +++ b/datahub-web-react/src/graphql/dataset.graphql @@ -172,6 +172,7 @@ fragment nonSiblingDatasetFields on Dataset { forms { ...formsFields } + ...entityProfileVersionProperties } query getRecentQueries($urn: String!) { diff --git a/datahub-web-react/src/graphql/mlModel.graphql b/datahub-web-react/src/graphql/mlModel.graphql index 2192888caef701..ad97c7c6f530a1 100644 --- a/datahub-web-react/src/graphql/mlModel.graphql +++ b/datahub-web-react/src/graphql/mlModel.graphql @@ -34,5 +34,6 @@ query getMLModel($urn: String!) { forms { ...formsFields } + ...entityProfileVersionProperties } } diff --git a/datahub-web-react/src/graphql/preview.graphql b/datahub-web-react/src/graphql/preview.graphql index 1bee614dd7adbe..8000f59f2bf258 100644 --- a/datahub-web-react/src/graphql/preview.graphql +++ b/datahub-web-react/src/graphql/preview.graphql @@ -346,4 +346,9 @@ fragment entityPreview on Entity { ... on Container { ...entityContainer } + ... on SupportsVersions { + versionProperties { + ...versionProperties + } + } } diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index 72e7d347187828..9edd6754022866 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -433,6 +433,8 @@ fragment searchResultsWithoutSchemaField on Entity { lastName fullName email + departmentName + title } info { active @@ -442,6 +444,8 @@ fragment searchResultsWithoutSchemaField on Entity { lastName fullName email + departmentName + title } editableProperties { displayName @@ -945,6 +949,11 @@ fragment searchResultsWithoutSchemaField on Entity { ... on StructuredPropertyEntity { ...structuredPropertyFields } + ... on SupportsVersions { + versionProperties { + ...versionProperties + } + } } fragment searchResultFields on Entity { diff --git a/datahub-web-react/src/graphql/versioning.graphql b/datahub-web-react/src/graphql/versioning.graphql new file mode 100644 index 00000000000000..e9b6b82494b6ed --- /dev/null +++ b/datahub-web-react/src/graphql/versioning.graphql @@ -0,0 +1,89 @@ +fragment versionProperties on VersionProperties { + versionSet { + urn + type + } + isLatest + version { + versionTag + } + aliases { + versionTag + } + comment + created { + time + actor { + urn + ...entityDisplayNameFields + editableProperties { + displayName + pictureLink + } + } + } + createdInSource { + time + actor { + urn + ...entityDisplayNameFields + editableProperties { + displayName + pictureLink + } + } + } +} + +fragment versionsSearchResults on SearchResults { + count + total + searchResults { + entity { + urn + type + ... on SupportsVersions { + versionProperties { + ...versionProperties + } + } + } + } +} + +fragment entityProfileVersionProperties on SupportsVersions { + versionProperties { + ...versionProperties + versionSet { + urn + type + versionsSearch(input: { query: "*", count: 5, searchFlags: { skipCache: true } }) { + ...versionsSearchResults + } + } + } +} + +query searchAcrossVersions($versionSetUrn: String!, $input: SearchAcrossEntitiesInput!) { + versionSet(urn: $versionSetUrn) { + versionsSearch(input: $input) { + ...versionsSearchResults + } + } +} + +mutation linkAssetVersion($input: LinkVersionInput!) { + linkAssetVersion(input: $input) { + versionsSearch(input: { query: "*", count: 5, searchFlags: { skipCache: true } }) { + ...versionsSearchResults + } + } +} + +mutation unlinkAssetVersion($input: UnlinkVersionInput!) { + unlinkAssetVersion(input: $input) { + versionsSearch(input: { query: "*", count: 5, searchFlags: { skipCache: true } }) { + ...versionsSearchResults + } + } +} diff --git a/datahub-web-react/yarn.lock b/datahub-web-react/yarn.lock index f16e8aa506e2cc..6e946e584f18a5 100644 --- a/datahub-web-react/yarn.lock +++ b/datahub-web-react/yarn.lock @@ -12066,10 +12066,10 @@ vite-plugin-svgr@^4.1.0: "@svgr/core" "^8.1.0" "@svgr/plugin-jsx" "^8.1.0" -"vite@^3.0.0 || ^4.0.0 || ^5.0.0-0", "vite@^3.1.0 || ^4.0.0 || ^5.0.0-0", vite@^4.5.5: - version "4.5.5" - resolved "https://registry.yarnpkg.com/vite/-/vite-4.5.5.tgz#639b9feca5c0a3bfe3c60cb630ef28bf219d742e" - integrity sha512-ifW3Lb2sMdX+WU91s3R0FyQlAyLxOzCSCP37ujw0+r5POeHPwe6udWVIElKQq8gk3t7b8rkmvqC6IHBpCff4GQ== +"vite@^3.0.0 || ^4.0.0 || ^5.0.0-0", "vite@^3.1.0 || ^4.0.0 || ^5.0.0-0", vite@^4.5.6: + version "4.5.6" + resolved "https://registry.yarnpkg.com/vite/-/vite-4.5.6.tgz#48bbd97fe06e8241df2e625b31c581707e10b57d" + integrity sha512-ElBNuVvJKslxcfY2gMmae5IjaKGqCYGicCNZ+8R56sAznobeE3pI9ctzI17cBS/6OJh5YuQNMSN4BP4dRjugBg== dependencies: esbuild "^0.18.10" postcss "^8.4.27" diff --git a/docker/README.md b/docker/README.md index ad847dc70cf3c0..b756ae3eaed752 100644 --- a/docker/README.md +++ b/docker/README.md @@ -16,7 +16,7 @@ area. ## Quickstart The easiest way to bring up and test DataHub is using DataHub [Docker](https://www.docker.com) images -which are continuously deployed to [Docker Hub](https://hub.docker.com/u/linkedin) with every commit to repository. +which are continuously deployed to [Docker Hub](https://hub.docker.com/u/acryldata) with every commit to repository. You can easily download and run all these images and their dependencies with our [quick start guide](../docs/quickstart.md). diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile index 52cc507f9268d1..5462d4f70002c1 100644 --- a/docker/datahub-gms/Dockerfile +++ b/docker/datahub-gms/Dockerfile @@ -26,7 +26,6 @@ RUN go install github.com/jwilder/dockerize@$DOCKERIZE_VERSION FROM alpine:3.21 AS base ENV JMX_VERSION=0.18.0 -ENV JETTY_VERSION=11.0.21 # Re-declaring args from above to make them available in this stage (will inherit default values) ARG ALPINE_REPO_URL @@ -42,9 +41,6 @@ RUN apk --no-cache --update-cache --available upgrade \ && apk --no-cache add curl bash coreutils gcompat sqlite libc6-compat snappy \ && apk --no-cache add openjdk17-jre-headless --repository=${ALPINE_REPO_URL}/edge/community \ && apk --no-cache add jattach --repository ${ALPINE_REPO_URL}/edge/community/ \ - && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-runner/${JETTY_VERSION}/jetty-runner-${JETTY_VERSION}.jar --output jetty-runner.jar \ - && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-jmx/${JETTY_VERSION}/jetty-jmx-${JETTY_VERSION}.jar --output jetty-jmx.jar \ - && curl -sS ${MAVEN_CENTRAL_REPO_URL}/org/eclipse/jetty/jetty-util/${JETTY_VERSION}/jetty-util-${JETTY_VERSION}.jar --output jetty-util.jar \ && wget --no-verbose ${GITHUB_REPO_URL}/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.24.0/opentelemetry-javaagent.jar \ && wget --no-verbose ${MAVEN_CENTRAL_REPO_URL}/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_VERSION}/jmx_prometheus_javaagent-${JMX_VERSION}.jar -O jmx_prometheus_javaagent.jar \ && cp /usr/lib/jvm/java-17-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks @@ -56,8 +52,6 @@ FROM base AS prod-install COPY war.war /datahub/datahub-gms/bin/war.war COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-gms/resources/entity-registry.yml COPY docker/datahub-gms/start.sh /datahub/datahub-gms/scripts/start.sh -COPY docker/datahub-gms/jetty.xml /datahub/datahub-gms/scripts/jetty.xml -COPY docker/datahub-gms/jetty-jmx.xml /datahub/datahub-gms/scripts/jetty-jmx.xml COPY docker/monitoring/client-prometheus-config.yaml /datahub/datahub-gms/scripts/prometheus-config.yaml RUN chmod +x /datahub/datahub-gms/scripts/start.sh @@ -70,7 +64,7 @@ FROM ${APP_ENV}-install AS final RUN mkdir -p /etc/datahub/plugins/auth/resources RUN addgroup -S datahub && adduser -S datahub -G datahub -RUN chown -R datahub:datahub /etc/datahub +RUN chown -R datahub:datahub /etc/datahub /datahub USER datahub ENV JMX_OPTS="" diff --git a/docker/datahub-gms/jetty-jmx.xml b/docker/datahub-gms/jetty-jmx.xml deleted file mode 100644 index 5aadbb66a70ed1..00000000000000 --- a/docker/datahub-gms/jetty-jmx.xml +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docker/datahub-gms/jetty.xml b/docker/datahub-gms/jetty.xml deleted file mode 100644 index 3f04635d9498ca..00000000000000 --- a/docker/datahub-gms/jetty.xml +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docker/datahub-gms/start.sh b/docker/datahub-gms/start.sh index c91580eed83cb9..cac36920022749 100755 --- a/docker/datahub-gms/start.sh +++ b/docker/datahub-gms/start.sh @@ -62,13 +62,8 @@ COMMON=" java $JAVA_OPTS $JMX_OPTS \ $OTEL_AGENT \ $PROMETHEUS_AGENT \ - -jar /jetty-runner.jar \ - --stats unsecure \ - --jar jetty-util.jar \ - --jar jetty-jmx.jar \ - --config /datahub/datahub-gms/scripts/jetty.xml \ - --config /datahub/datahub-gms/scripts/jetty-jmx.xml \ - /datahub/datahub-gms/bin/war.war" + -Dstats=unsecure \ + -jar /datahub/datahub-gms/bin/war.war" if [[ $SKIP_ELASTICSEARCH_CHECK != true ]]; then exec dockerize \ diff --git a/docker/datahub-ingestion-base/smoke.Dockerfile b/docker/datahub-ingestion-base/smoke.Dockerfile index 34654faaad729d..81a6bd0e20cacc 100644 --- a/docker/datahub-ingestion-base/smoke.Dockerfile +++ b/docker/datahub-ingestion-base/smoke.Dockerfile @@ -20,9 +20,9 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ COPY . /datahub-src ARG RELEASE_VERSION RUN cd /datahub-src && \ - sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" metadata-ingestion/src/datahub/__init__.py && \ - sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/__init__.py && \ - cat metadata-ingestion/src/datahub/__init__.py && \ + sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" metadata-ingestion/src/datahub/_version.py && \ + sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_version.py && \ + cat metadata-ingestion/src/datahub/_version.py && \ ./gradlew :metadata-ingestion:codegen && \ pip install file:metadata-ingestion-modules/airflow-plugin#egg=acryl-datahub-airflow-plugin file:metadata-ingestion#egg=acryl-datahub diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index ee0333e1cb1d1f..a9fd3a6662d1bb 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -26,10 +26,10 @@ COPY --chown=datahub ./metadata-ingestion-modules/airflow-plugin /metadata-inges ARG RELEASE_VERSION WORKDIR /metadata-ingestion -RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/__init__.py && \ - sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \ - cat src/datahub/__init__.py | grep __version__ && \ - cat airflow-plugin/src/datahub_airflow_plugin/__init__.py | grep __version__ +RUN sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/_version.py && \ + sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" airflow-plugin/src/datahub_airflow_plugin/_version.py && \ + cat src/datahub/_version.py | grep __version__ && \ + cat airflow-plugin/src/datahub_airflow_plugin/_version.py | grep __version__ FROM base AS slim-install diff --git a/docker/datahub-ingestion/Dockerfile-slim-only b/docker/datahub-ingestion/Dockerfile-slim-only index 6ade262f2feded..80abff204df9f2 100644 --- a/docker/datahub-ingestion/Dockerfile-slim-only +++ b/docker/datahub-ingestion/Dockerfile-slim-only @@ -15,8 +15,8 @@ COPY --chown=datahub ./metadata-ingestion /metadata-ingestion ARG RELEASE_VERSION WORKDIR /metadata-ingestion -RUN sed -i.bak "s/__version__ = \"1\!0.0.0.dev0\"/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/__init__.py && \ - cat src/datahub/__init__.py +RUN sed -i.bak "s/__version__ = .*$/__version__ = \"$(echo $RELEASE_VERSION|sed s/-/+/)\"/" src/datahub/_version.py && \ + cat src/datahub/_version.py FROM base as slim-install diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index c68a4c1f5a8fcf..0cbb2aee903c84 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -49,8 +49,6 @@ services: - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true} volumes: - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh - - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml - - ./datahub-gms/jetty-jmx.xml:/datahub/datahub-gms/scripts/jetty-jmx.xml - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-gms/scripts/prometheus-config.yaml - ../metadata-models/src/main/resources/:/datahub/datahub-gms/resources - ../metadata-service/war/build/libs/:/datahub/datahub-gms/bin diff --git a/docker/profiles/docker-compose.frontend.yml b/docker/profiles/docker-compose.frontend.yml index c6b15a7016670d..b278cd41b0b231 100644 --- a/docker/profiles/docker-compose.frontend.yml +++ b/docker/profiles/docker-compose.frontend.yml @@ -26,6 +26,7 @@ x-datahub-frontend-service-dev: &datahub-frontend-service-dev DATAHUB_ANALYTICS_ENABLED: ${DATAHUB_ANALYTICS_ENABLED:-true} volumes: - ../../datahub-frontend/build/stage/main:/datahub-frontend + - ./monitoring/client-prometheus-config.yaml:/datahub-frontend/client-prometheus-config.yaml services: frontend-quickstart: diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 2147d6b5a0247f..d4ea7dde9f8481 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -130,15 +130,13 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev <<: [*datahub-dev-telemetry-env, *datahub-gms-env] SKIP_ELASTICSEARCH_CHECK: false JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001' - BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false + BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: ${BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE:-false} SEARCH_SERVICE_ENABLE_CACHE: false LINEAGE_SEARCH_CACHE_ENABLED: false SHOW_BROWSE_V2: true ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh - - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml - - ./datahub-gms/jetty-jmx.xml:/datahub/datahub-gms/scripts/jetty-jmx.xml - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-gms/scripts/prometheus-config.yaml - ../../metadata-models/src/main/resources/:/datahub/datahub-gms/resources - ../../metadata-service/war/build/libs/:/datahub/datahub-gms/bin diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index fb2f418e37c044..686c28fa3d1b93 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -77,7 +77,7 @@ module.exports = { announcementBar: { id: "announcement-3", content: - '

Watch Metadata & AI Summit sessions on-demand.

Watch Now
', + '

Learn about DataHub 1.0 launching at our 5th birthday party!

Register
', backgroundColor: "#111", textColor: "#ffffff", isCloseable: false, diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index bcb06affedcff0..fbd35b60aedba9 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -368,6 +368,7 @@ module.exports = { }, { "DataHub Cloud Release History": [ + "docs/managed-datahub/release-notes/v_0_3_8", "docs/managed-datahub/release-notes/v_0_3_7", "docs/managed-datahub/release-notes/v_0_3_6", "docs/managed-datahub/release-notes/v_0_3_5", diff --git a/docs-website/src/components/SolutionsDropdown/SolutionsDropdownContent/solutionsDropdownContent.js b/docs-website/src/components/SolutionsDropdown/SolutionsDropdownContent/solutionsDropdownContent.js index abede0f11735d4..ad7278a438cf81 100644 --- a/docs-website/src/components/SolutionsDropdown/SolutionsDropdownContent/solutionsDropdownContent.js +++ b/docs-website/src/components/SolutionsDropdown/SolutionsDropdownContent/solutionsDropdownContent.js @@ -24,7 +24,7 @@ const solutionsDropdownContent = { title: "DataHub Core", description: "Get started with the Open Source platform.", iconImage: "/img/solutions/icon-dropdown-core.png", - href: "/", + href: "/docs/quickstart", }, { title: "Cloud vs Core", diff --git a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js index ed083e4e0c3987..c9e5098a016c2d 100644 --- a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js +++ b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js @@ -42,6 +42,22 @@ export default function DocsVersionDropdownNavbarItem({ type: 'html', value: '', }, + { + value: ` + 0.14.0 + + + `, + type: "html", + }, + { + value: ` + 0.13.1 + + + `, + type: "html", + }, { value: ` 0.13.0 diff --git a/docs-website/versions.json b/docs-website/versions.json index 5288c42437c779..ab242fa47dce5b 100644 --- a/docs-website/versions.json +++ b/docs-website/versions.json @@ -1,4 +1,4 @@ [ - "0.14.0", - "0.13.1" + "0.15.0", + "0.14.1" ] diff --git a/docs/actions/README.md b/docs/actions/README.md index 4fa44eec588bed..a906c3c78f119e 100644 --- a/docs/actions/README.md +++ b/docs/actions/README.md @@ -189,11 +189,10 @@ Two event types are currently supported. Read more about them below. ## Supported Event Sources -Currently, the only event source that is officially supported is `kafka`, which polls for events -via a Kafka Consumer. +Currently, the following event sources are supported: - [Kafka Event Source](sources/kafka-event-source.md) - +- [DataHub Cloud Event Source](sources/datahub-cloud-event-source.md) ## Supported Actions diff --git a/docs/actions/sources/datahub-cloud-event-source.md b/docs/actions/sources/datahub-cloud-event-source.md new file mode 100644 index 00000000000000..656fe4a3a6329f --- /dev/null +++ b/docs/actions/sources/datahub-cloud-event-source.md @@ -0,0 +1,113 @@ +# DataHub Cloud Event Source + +## Prerequisites + +### Compatibility + +The **DataHub Cloud Event Source** is only compatible with versions of DataHub Cloud above `v0.3.7`. + +### Privileges + +By default, users do not have access to the Events API of DataHub Cloud. In order to access the API, the user or service account +associated with the access token used to configure this events source _must_ have the `Get Platform Events` platform privilege, which +can be granted using an [Access Policy](https://datahubproject.io/docs/authorization/access-policies-guide/). + +## Overview + +The DataHub Cloud Event Source allows you to use DataHub Actions with an instance of DataHub Cloud hosted by [Acryl](https://acryl.io). + +Under the hood, the DataHub Cloud Event Source communicates with DataHub Cloud to extract change events in realtime. +The state of progress is automatically saved to DataHub Cloud after messages are processed, allowing you to seamlessly pause and restart the consumer, using the provided `name` to uniquely identify the consumer state. + +On initial startup of a new consumer id, the DataHub event source will automatically begin the _latest_ events by default. Afterwards, the message stream processed offsets will be continually saved. However, the source can also optionally be configured to "look back" in time +by a certain number of days on initial bootstrap using the `lookback_days` parameter. To reset all previously saved offsets for a consumer, +you can set `reset_offsets` to `True`. + +### Processing Guarantees + +This event source implements an "ack" function which is invoked if and only if an event is successfully processed +by the Actions framework, meaning that the event made it through the Transformers and into the Action without +any errors. Under the hood, the "ack" method synchronously commits DataHub Cloud Consumer Offsets on behalf of the Action. This means that by default, the framework provides *at-least once* processing semantics. That is, in the unusual case that a failure occurs when attempting to commit offsets back to Kafka, that event may be replayed on restart of the Action. + +If you've configured your Action pipeline `failure_mode` to be `CONTINUE` (the default), then events which +fail to be processed will simply be logged to a `failed_events.log` file for further investigation (dead letter queue). The DataHub Cloud Event Source will continue to make progress against the underlying topics and continue to commit offsets even in the case of failed messages. + +If you've configured your Action pipeline `failure_mode` to be `THROW`, then events which fail to be processed result in an Action Pipeline error. This in turn terminates the pipeline before committing offsets back to DataHub Cloud. Thus the message will not be marked as "processed" by the Action consumer. + +## Supported Events + +The DataHub Cloud Event Source produces + +- [Entity Change Event V1](../../managed-datahub/datahub-api/entity-events-api.md) + +Note that the DataHub Cloud Event Source does _not_ yet support the full [Metadata Change Log V1](../events/metadata-change-log-event.md) event stream. + +## Configure the Event Source + +Use the following config(s) to get started with the DataHub Cloud Event Source. + +### Quickstart + +To start listening for new events from now, you can use the following recipe: + +```yml +name: "unique-action-name" +datahub: + server: "https://.acryl.io" + token: "" +source: + type: "datahub-cloud" +action: + # action configs +``` + +Note that the `datahub` configuration block is **required** to connect to your DataHub Cloud instance. + +### Advanced Configurations + +To reset the offsets for the action pipeline and start consuming events from 7 days ago, you can use the following recipe: + +```yml +name: "unique-action-name" +datahub: + server: "https://.acryl.io" + token: "" +source: + type: "datahub-cloud" + config: + lookback_days: 7 # Look back 7 days for events + reset_offsets: true # Ignore stored offsets and start fresh + kill_after_idle_timeout: true # Enable shutdown after idle period + idle_timeout_duration_seconds: 60 # Idle timeout set to 60 seconds + event_processing_time_max_duration_seconds: 45 # Max processing time of 45 seconds per batch +action: + # action configs +``` + +Note that the `datahub` configuration block is **required** to connect to your DataHub Cloud instance. + +
+ View All Configuration Options + +| Field | Required | Default | Description | + | ------------------------------------- | :------: | :---------------------------: | ----------------------------------------------------------------------------------------- | +| `topic` | ❌ | `PlatformEvent_v1` | The name of the topic from which events will be consumed. Do not change this unless you know what you're doing! | +| `lookback_days` | ❌ | None | Optional number of days to look back when polling for events. | +| `reset_offsets` | ❌ | `False` | When set to `True`, the consumer will ignore any stored offsets and start fresh. | +| `kill_after_idle_timeout` | ❌ | `False` | If `True`, stops the consumer after being idle for the specified timeout duration. | +| `idle_timeout_duration_seconds` | ❌ | `30` | Duration in seconds after which, if no events are received, the consumer is considered idle. | +| `event_processing_time_max_duration_seconds` | ❌ | `30` | Maximum allowed time in seconds for processing events before timing out. | +
+ + +## FAQ + +1. Is there a way to always start processing from the end of the topics on Actions start? + +Yes, simply set `reset_offsets` to True for a single run of the action. Remember to disable this for subsequent runs if you don't want to miss any events! + +2. What happens if I have multiple actions with the same pipeline `name` running? Can I scale out horizontally? + +Today, there is undefined behavior deploying multiple actions with the same name using the DataHub Cloud Events Source. +All events must be processed by a single running action + diff --git a/docs/api/datahub-apis.md b/docs/api/datahub-apis.md index c46aacde3a0cb5..62136406e6ff66 100644 --- a/docs/api/datahub-apis.md +++ b/docs/api/datahub-apis.md @@ -12,6 +12,10 @@ DataHub has several APIs to manipulate metadata on the platform. Here's the list In general, **Python and Java SDKs** are our most recommended tools for extending and customizing the behavior of your DataHub instance. We don't recommend using the **OpenAPI** directly, as it's more complex and less user-friendly than the other APIs. +:::warning +About async usage of APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. +::: + ## Python and Java SDK We offer an SDK for both Python and Java that provide full functionality when it comes to CRUD operations and any complex functionality you may want to build into DataHub. We recommend using the SDKs for most use cases. Here are the examples of how to use the SDKs: diff --git a/docs/automations/bigquery-metadata-sync.md b/docs/automations/bigquery-metadata-sync.md index 3d896f57d341a4..78bdbdd453e9f7 100644 --- a/docs/automations/bigquery-metadata-sync.md +++ b/docs/automations/bigquery-metadata-sync.md @@ -67,7 +67,7 @@ Ensure your service account has the following permissions: | Propagation Type | DataHub Entity | BigQuery Entity | Note | | -------- | ------- | ------- | ------- | | Table Tags as Labels | [Table Tag](https://datahubproject.io/docs/tags/) | [BigQuery Label](https://cloud.google.com/bigquery/docs/labels-intro) | - | - | Column Glossary Terms as Policy Tags | [Glossary Term on Table Column](https://datahubproject.io/docs/0.14.0/glossary/business-glossary/) | [Policy Tag](https://cloud.google.com/bigquery/docs/best-practices-policy-tags) |
  • Assigned Policy tags are created under DataHub taxonomy.
  • Only the latest assigned glossary term set as policy tag. BigQuery only supports one assigned policy tag.
  • Policy Tags are not synced to DataHub as glossary term from BigQuery.
+ | Column Glossary Terms as Policy Tags | [Glossary Term on Table Column](https://datahubproject.io/docs/glossary/business-glossary/) | [Policy Tag](https://cloud.google.com/bigquery/docs/best-practices-policy-tags) |
  • Assigned Policy tags are created under DataHub taxonomy.
  • Only the latest assigned glossary term set as policy tag. BigQuery only supports one assigned policy tag.
  • Policy Tags are not synced to DataHub as glossary term from BigQuery.
| Table Descriptions | [Table Description](https://datahubproject.io/docs/api/tutorials/descriptions/) | Table Description | - | | Column Descriptions | [Column Description](https://datahubproject.io/docs/api/tutorials/descriptions/) | Column Description | - | diff --git a/docs/cli.md b/docs/cli.md index 1c38077d0d12ef..10f330a3ecfc6e 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -57,24 +57,30 @@ Options: --help Show this message and exit. Commands: - actions - check Helper commands for checking various aspects of DataHub. - dataproduct A group of commands to interact with the DataProduct entity in DataHub. - delete Delete metadata from datahub using a single urn or a combination of filters - docker Helper commands for setting up and interacting with a local DataHub instance using Docker. - exists A group of commands to check existence of entities in DataHub. - get A group of commands to get metadata from DataHub. - group A group of commands to interact with the Group entity in DataHub. - ingest Ingest metadata into DataHub. - init Configure which datahub instance to connect to - lite A group of commands to work with a DataHub Lite instance - migrate Helper commands for migrating metadata within DataHub. - put A group of commands to put metadata in DataHub. - state Managed state stored in DataHub by stateful ingestion. - telemetry Toggle telemetry. - timeline Get timeline for an entity based on certain categories - user A group of commands to interact with the User entity in DataHub. - version Print version number and exit. + actions + assertions A group of commands to interact with the Assertion entity in DataHub. + check Helper commands for checking various aspects of DataHub. + container A group of commands to interact with containers in DataHub. + datacontract A group of commands to interact with the DataContract entity in DataHub. + dataproduct A group of commands to interact with the DataProduct entity in DataHub. + dataset A group of commands to interact with the Dataset entity in DataHub. + delete Delete metadata from DataHub. + docker Helper commands for setting up and interacting with a local DataHub instance using Docker. + exists A group of commands to check existence of entities in DataHub. + forms A group of commands to interact with forms in DataHub. + get A group of commands to get metadata from DataHub. + group A group of commands to interact with the Group entity in DataHub. + ingest Ingest metadata into DataHub. + init Configure which datahub instance to connect to + lite A group of commands to work with a DataHub Lite instance + migrate Helper commands for migrating metadata within DataHub. + properties A group of commands to interact with structured properties in DataHub. + put A group of commands to put metadata in DataHub. + state Managed state stored in DataHub by stateful ingestion. + telemetry Toggle telemetry. + timeline Get timeline for an entity based on certain categories + user A group of commands to interact with the User entity in DataHub. + version Print version number and exit. ``` The following top-level commands listed below are here mainly to give the reader a high-level picture of what are the kinds of things you can accomplish with the cli. @@ -115,17 +121,17 @@ datahub ingest -c ./examples/recipes/example_to_datahub_rest.dhub.yaml --dry-run datahub ingest -c ./examples/recipes/example_to_datahub_rest.dhub.yaml -n ``` -#### ingest --list-source-runs +#### ingest list-source-runs -The `--list-source-runs` option of the `ingest` command lists the previous runs, displaying their run ID, source name, +The `list-source-runs` option of the `ingest` command lists the previous runs, displaying their run ID, source name, start time, status, and source URN. This command allows you to filter results using the --urn option for URN-based filtering or the --source option to filter by source name (partial or complete matches are supported). ```shell # List all ingestion runs -datahub ingest --list-source-runs +datahub ingest list-source-runs # Filter runs by a source name containing "demo" -datahub ingest --list-source-runs --source "demo" +datahub ingest list-source-runs --source "demo" ``` #### ingest --preview @@ -274,6 +280,18 @@ DATAHUB_TELEMETRY_TIMEOUT=10 DATAHUB_DEBUG=false ``` +### container + +A group of commands to interact with containers in DataHub. + +e.g. You can use this to apply a tag to all datasets recursively in this container. +```shell +datahub container tag --container-urn "urn:li:container:0e9e46bd6d5cf645f33d5a8f0254bc2d" --tag-urn "urn:li:tag:tag1" +datahub container domain --container-urn "urn:li:container:3f2effd1fbe154a4d60b597263a41e41" --domain-urn "urn:li:domain:ajsajo-b832-4ab3-8881-7ed5e991a44c" +datahub container owner --container-urn "urn:li:container:3f2effd1fbe154a4d60b597263a41e41" --owner-urn "urn:li:corpGroup:eng@example.com" +datahub container term --container-urn "urn:li:container:3f2effd1fbe154a4d60b597263a41e41" --term-urn "urn:li:term:PII" +``` + ### check The datahub package is composed of different plugins that allow you to connect to different metadata sources and ingest metadata from them. diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index eb5a792216d981..945a00088f1f74 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -1,8 +1,3 @@ -# Known Issues - -- Async APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. - - # Updating DataHub