From 4448cf1f2d777c82d913e5ee0aeabd0e2785fad3 Mon Sep 17 00:00:00 2001 From: gaurav2733 <77378510+gaurav2733@users.noreply.github.com> Date: Tue, 26 Dec 2023 16:30:24 +0530 Subject: [PATCH 01/18] fix(ui/ingestion): add debounce on search on ingestion listing page (#9516) --- .../entity/shared/tabs/Dataset/Queries/utils/constants.ts | 1 + datahub-web-react/src/app/ingest/secret/SecretsList.tsx | 8 +++++++- .../src/app/ingest/source/IngestionSourceList.tsx | 8 +++++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Queries/utils/constants.ts b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Queries/utils/constants.ts index 5176c1207874c9..025705abc580ea 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Queries/utils/constants.ts +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Queries/utils/constants.ts @@ -16,5 +16,6 @@ export const DEFAULT_MAX_RECENT_QUERIES = 9; */ export const MAX_ROWS_BEFORE_DEBOUNCE = 50; export const HALF_SECOND_IN_MS = 500; +export const ONE_SECOND_IN_MS = 1000; export const ADD_UNAUTHORIZED_MESSAGE = 'You are not authorized to add Queries to this entity.'; diff --git a/datahub-web-react/src/app/ingest/secret/SecretsList.tsx b/datahub-web-react/src/app/ingest/secret/SecretsList.tsx index 2728fff0ccba34..1a960997e6beeb 100644 --- a/datahub-web-react/src/app/ingest/secret/SecretsList.tsx +++ b/datahub-web-react/src/app/ingest/secret/SecretsList.tsx @@ -1,5 +1,6 @@ import React, { useEffect, useState } from 'react'; import { Button, Empty, message, Modal, Pagination, Typography } from 'antd'; +import { debounce } from 'lodash'; import { DeleteOutlined, PlusOutlined } from '@ant-design/icons'; import * as QueryString from 'query-string'; import { useLocation } from 'react-router'; @@ -18,6 +19,7 @@ import { SearchBar } from '../../search/SearchBar'; import { useEntityRegistry } from '../../useEntityRegistry'; import { scrollToTop } from '../../shared/searchUtils'; import { addSecretToListSecretsCache, removeSecretFromListSecretsCache } from './cacheUtils'; +import { ONE_SECOND_IN_MS } from '../../entity/shared/tabs/Dataset/Queries/utils/constants'; const DeleteButtonContainer = styled.div` display: flex; @@ -84,6 +86,10 @@ export const SecretsList = () => { setPage(newPage); }; + const debouncedSetQuery = debounce((newQuery: string | undefined) => { + setQuery(newQuery); + }, ONE_SECOND_IN_MS); + const onSubmit = (state: SecretBuilderState, resetBuilderState: () => void) => { createSecretMutation({ variables: { @@ -199,7 +205,7 @@ export const SecretsList = () => { onSearch={() => null} onQueryChange={(q) => { setPage(1); - setQuery(q); + debouncedSetQuery(q); }} entityRegistry={entityRegistry} hideRecommendations diff --git a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx index 6188845694f9eb..e6db6bfcc9a61b 100644 --- a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx +++ b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx @@ -1,5 +1,6 @@ import { PlusOutlined, RedoOutlined } from '@ant-design/icons'; import React, { useCallback, useEffect, useState } from 'react'; +import { debounce } from 'lodash'; import * as QueryString from 'query-string'; import { useLocation } from 'react-router'; import { Button, message, Modal, Pagination, Select } from 'antd'; @@ -30,6 +31,7 @@ import { INGESTION_CREATE_SOURCE_ID, INGESTION_REFRESH_SOURCES_ID, } from '../../onboarding/config/IngestionOnboardingConfig'; +import { ONE_SECOND_IN_MS } from '../../entity/shared/tabs/Dataset/Queries/utils/constants'; const PLACEHOLDER_URN = 'placeholder-urn'; @@ -133,6 +135,10 @@ export const IngestionSourceList = () => { setLastRefresh(new Date().getTime()); }, [refetch]); + const debouncedSetQuery = debounce((newQuery: string | undefined) => { + setQuery(newQuery); + }, ONE_SECOND_IN_MS); + function hasActiveExecution() { return !!filteredSources.find((source) => source.executions?.executionRequests.find((request) => isExecutionRequestActive(request)), @@ -401,7 +407,7 @@ export const IngestionSourceList = () => { onSearch={() => null} onQueryChange={(q) => { setPage(1); - setQuery(q); + debouncedSetQuery(q); }} entityRegistry={entityRegistry} hideRecommendations From d399a530576974da9beb1af24d7ea5f98922b6d3 Mon Sep 17 00:00:00 2001 From: kushagra-apptware <81357546+kushagra-apptware@users.noreply.github.com> Date: Tue, 26 Dec 2023 18:26:40 +0530 Subject: [PATCH 02/18] fix(ui): correct the color of edit links (#9517) --- .../entity/shared/tabs/Documentation/components/LinkList.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx index 1b5c3d54009da8..9f94a830ac1cfa 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/LinkList.tsx @@ -159,7 +159,7 @@ export const LinkList = ({ refetch }: LinkListProps) => { - + + } + trigger={['click']} + > + + ), }, From b7a0bbcb3d6000d3d9827ab19f13c3118d0bfc19 Mon Sep 17 00:00:00 2001 From: Fernando Marino` Date: Thu, 28 Dec 2023 01:24:25 +0100 Subject: [PATCH 08/18] feat(ingest/openapi): support proxies and alternate auth schemes (#9492) Co-authored-by: Fernando Marino Co-authored-by: Harshal Sheth --- .../src/datahub/ingestion/source/openapi.py | 41 +++++++++++++++---- .../ingestion/source/openapi_parser.py | 26 ++++++++---- 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi.py b/metadata-ingestion/src/datahub/ingestion/source/openapi.py index 3925ba51c16dd9..ad62ef7362aebd 100755 --- a/metadata-ingestion/src/datahub/ingestion/source/openapi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/openapi.py @@ -52,6 +52,13 @@ class OpenApiConfig(ConfigModel): ignore_endpoints: list = Field(default=[], description="") username: str = Field(default="", description="") password: str = Field(default="", description="") + proxies: Optional[dict] = Field( + default=None, + description="Eg. " + "`{'http': 'http://10.10.1.10:3128', 'https': 'http://10.10.1.10:1080'}`." + "If authentication is required, add it to the proxy url directly e.g. " + "`http://user:pass@10.10.1.10:3128/`.", + ) forced_examples: dict = Field(default={}, description="") token: Optional[str] = Field(default=None, description="") get_token: dict = Field(default={}, description="") @@ -87,9 +94,13 @@ def get_swagger(self) -> Dict: password=self.password, tok_url=url4req, method=self.get_token["request_type"], + proxies=self.proxies, ) sw_dict = get_swag_json( - self.url, token=self.token, swagger_file=self.swagger_file + self.url, + token=self.token, + swagger_file=self.swagger_file, + proxies=self.proxies, ) # load the swagger file else: # using basic auth for accessing endpoints @@ -98,6 +109,7 @@ def get_swagger(self) -> Dict: username=self.username, password=self.password, swagger_file=self.swagger_file, + proxies=self.proxies, ) return sw_dict @@ -258,10 +270,15 @@ def get_workunits_internal(self) -> Iterable[ApiWorkUnit]: # noqa: C901 tot_url = clean_url(config.url + self.url_basepath + endpoint_k) if config.token: - response = request_call(tot_url, token=config.token) + response = request_call( + tot_url, token=config.token, proxies=config.proxies + ) else: response = request_call( - tot_url, username=config.username, password=config.password + tot_url, + username=config.username, + password=config.password, + proxies=config.proxies, ) if response.status_code == 200: fields2add, root_dataset_samples[dataset_name] = extract_fields( @@ -281,10 +298,15 @@ def get_workunits_internal(self) -> Iterable[ApiWorkUnit]: # noqa: C901 url_guess = try_guessing(endpoint_k, root_dataset_samples) tot_url = clean_url(config.url + self.url_basepath + url_guess) if config.token: - response = request_call(tot_url, token=config.token) + response = request_call( + tot_url, token=config.token, proxies=config.proxies + ) else: response = request_call( - tot_url, username=config.username, password=config.password + tot_url, + username=config.username, + password=config.password, + proxies=config.proxies, ) if response.status_code == 200: fields2add, _ = extract_fields(response, dataset_name) @@ -304,10 +326,15 @@ def get_workunits_internal(self) -> Iterable[ApiWorkUnit]: # noqa: C901 ) tot_url = clean_url(config.url + self.url_basepath + composed_url) if config.token: - response = request_call(tot_url, token=config.token) + response = request_call( + tot_url, token=config.token, proxies=config.proxies + ) else: response = request_call( - tot_url, username=config.username, password=config.password + tot_url, + username=config.username, + password=config.password, + proxies=config.proxies, ) if response.status_code == 200: fields2add, _ = extract_fields(response, dataset_name) diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py index 1ab40bc8be73d4..84bb3ad4526117 100755 --- a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py +++ b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py @@ -51,6 +51,7 @@ def request_call( token: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, + proxies: Optional[dict] = None, ) -> requests.Response: headers = {"accept": "application/json"} @@ -60,8 +61,8 @@ def request_call( ) elif token is not None: - headers["Authorization"] = f"Bearer {token}" - return requests.get(url, headers=headers) + headers["Authorization"] = f"{token}" + return requests.get(url, proxies=proxies, headers=headers) else: return requests.get(url, headers=headers) @@ -72,12 +73,15 @@ def get_swag_json( username: Optional[str] = None, password: Optional[str] = None, swagger_file: str = "", + proxies: Optional[dict] = None, ) -> Dict: tot_url = url + swagger_file if token is not None: - response = request_call(url=tot_url, token=token) + response = request_call(url=tot_url, token=token, proxies=proxies) else: - response = request_call(url=tot_url, username=username, password=password) + response = request_call( + url=tot_url, username=username, password=password, proxies=proxies + ) if response.status_code != 200: raise Exception(f"Unable to retrieve {tot_url}, error {response.status_code}") @@ -251,7 +255,7 @@ def compose_url_attr(raw_url: str, attr_list: list) -> str: attr_list=["2",]) asd2 == "http://asd.com/2" """ - splitted = re.split(r"\{[^}]+\}", raw_url) + splitted = re.split(r"\{[^}]+}", raw_url) if splitted[-1] == "": # it can happen that the last element is empty splitted = splitted[:-1] composed_url = "" @@ -265,7 +269,7 @@ def compose_url_attr(raw_url: str, attr_list: list) -> str: def maybe_theres_simple_id(url: str) -> str: - dets = re.findall(r"(\{[^}]+\})", url) # searching the fields between parenthesis + dets = re.findall(r"(\{[^}]+})", url) # searching the fields between parenthesis if len(dets) == 0: return url dets_w_id = [det for det in dets if "id" in det] # the fields containing "id" @@ -349,6 +353,7 @@ def get_tok( password: str = "", tok_url: str = "", method: str = "post", + proxies: Optional[dict] = None, ) -> str: """ Trying to post username/password to get auth. @@ -357,12 +362,15 @@ def get_tok( url4req = url + tok_url if method == "post": # this will make a POST call with username and password - data = {"username": username, "password": password} + data = {"username": username, "password": password, "maxDuration": True} # url2post = url + "api/authenticate/" - response = requests.post(url4req, data=data) + response = requests.post(url4req, proxies=proxies, json=data) if response.status_code == 200: cont = json.loads(response.content) - token = cont["tokens"]["access"] + if "token" in cont: # other authentication scheme + token = cont["token"] + else: # works only for bearer authentication scheme + token = f"Bearer {cont['tokens']['access']}" elif method == "get": # this will make a GET call with username and password response = requests.get(url4req) From 754d8814477d050e907aeca6c561d98372b60dc5 Mon Sep 17 00:00:00 2001 From: cburroughs Date: Wed, 27 Dec 2023 19:33:41 -0500 Subject: [PATCH 09/18] build(ingest/feast): upgrade to latest feast version (#9439) --- metadata-ingestion/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 4632c20cd3b969..32d49ffc73fa34 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -316,7 +316,7 @@ # https://github.com/elastic/elasticsearch-py/issues/1639#issuecomment-883587433 "elasticsearch": {"elasticsearch==7.13.4"}, "feast": { - "feast~=0.31.1", + "feast~=0.34.1", "flask-openid>=1.3.0", # typeguard 3.x, released on 2023-03-14, seems to cause issues with Feast. "typeguard<3", From 9f79f44dd69a5a86864ccc31473305bdf1c2f4bb Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 27 Dec 2023 20:05:17 -0500 Subject: [PATCH 10/18] build: enable gradle caching (#9525) --- .github/workflows/airflow-plugin.yml | 1 + .github/workflows/build-and-test.yml | 1 + .github/workflows/check-datahub-jars.yml | 1 + .github/workflows/docker-unified.yml | 27 ++++++++++++--------- .github/workflows/documentation.yml | 1 + .github/workflows/metadata-ingestion.yml | 1 + .github/workflows/metadata-io.yml | 2 ++ .github/workflows/metadata-model.yml | 2 ++ .github/workflows/publish-datahub-jars.yml | 2 ++ .github/workflows/spark-smoke-test.yml | 2 ++ gradle.properties | 2 +- gradle/wrapper/gradle-wrapper.jar | Bin 61624 -> 61608 bytes gradlew | 4 +-- 13 files changed, 32 insertions(+), 14 deletions(-) diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index 70816e5f093d13..97a0da8546ed17 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -55,6 +55,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index dab64cf2dca5e6..6daf1904ba3ae3 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -42,6 +42,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/.github/workflows/check-datahub-jars.yml b/.github/workflows/check-datahub-jars.yml index 46d97ffec88618..556cd87f12df04 100644 --- a/.github/workflows/check-datahub-jars.yml +++ b/.github/workflows/check-datahub-jars.yml @@ -33,6 +33,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 7cef38b1cd47ce..454e7661402459 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -84,6 +84,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - name: Pre-build artifacts for docker image @@ -145,6 +146,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - name: Pre-build artifacts for docker image @@ -206,6 +208,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - name: Pre-build artifacts for docker image @@ -267,6 +270,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - name: Pre-build artifacts for docker image @@ -328,6 +332,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - name: Pre-build artifacts for docker image @@ -567,6 +572,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - uses: dorny/paths-filter@v2 @@ -653,6 +659,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 - uses: dorny/paths-filter@v2 @@ -731,12 +738,13 @@ jobs: strategy: fail-fast: false matrix: - test_strategy: [ - "no_cypress_suite0", - "no_cypress_suite1", - "cypress_suite1", - "cypress_rest" - ] + test_strategy: + [ + "no_cypress_suite0", + "no_cypress_suite1", + "cypress_suite1", + "cypress_rest", + ] needs: [ setup, @@ -760,6 +768,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - uses: actions/setup-python@v4 with: python-version: "3.10" @@ -904,11 +913,7 @@ jobs: deploy_datahub_head: name: Deploy to Datahub HEAD runs-on: ubuntu-latest - needs: - [ - setup, - smoke_test - ] + needs: [setup, smoke_test] steps: - uses: aws-actions/configure-aws-credentials@v1 if: ${{ needs.setup.outputs.publish != 'false' && github.repository_owner == 'datahub-project' && needs.setup.outputs.repository_name == 'datahub' }} diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 29953b8b70d911..e1671cc0219198 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -32,6 +32,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - uses: actions/setup-python@v4 with: python-version: "3.10" diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 4e04fef3b3980b..af73db483f9aeb 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -49,6 +49,7 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: diff --git a/.github/workflows/metadata-io.yml b/.github/workflows/metadata-io.yml index 2188fcb07c77a6..96229642244b67 100644 --- a/.github/workflows/metadata-io.yml +++ b/.github/workflows/metadata-io.yml @@ -34,9 +34,11 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - uses: actions/setup-python@v4 with: python-version: "3.10" + cache: "pip" - name: Gradle build (and test) # there is some race condition in gradle build, which makes gradle never terminate in ~30% of the runs # running build first without datahub-web-react:yarnBuild and then with it is 100% stable diff --git a/.github/workflows/metadata-model.yml b/.github/workflows/metadata-model.yml index d0112f1b14e7af..265a66aa236ae9 100644 --- a/.github/workflows/metadata-model.yml +++ b/.github/workflows/metadata-model.yml @@ -34,10 +34,12 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: "3.10" + cache: "pip" - name: Install dependencies run: ./metadata-ingestion/scripts/install_deps.sh - name: Run model generation diff --git a/.github/workflows/publish-datahub-jars.yml b/.github/workflows/publish-datahub-jars.yml index 24d1c5436b3156..0a311be33cd30a 100644 --- a/.github/workflows/publish-datahub-jars.yml +++ b/.github/workflows/publish-datahub-jars.yml @@ -54,9 +54,11 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - uses: actions/setup-python@v4 with: python-version: "3.10" + cache: "pip" - name: checkout upstream repo run: | git remote add upstream https://github.com/datahub-project/datahub.git diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml index 60e183cce5179c..94692bd3c2336a 100644 --- a/.github/workflows/spark-smoke-test.yml +++ b/.github/workflows/spark-smoke-test.yml @@ -35,9 +35,11 @@ jobs: with: distribution: "zulu" java-version: 17 + - uses: gradle/gradle-build-action@v2 - uses: actions/setup-python@v4 with: python-version: "3.10" + cache: "pip" - name: Install dependencies run: ./metadata-ingestion/scripts/install_deps.sh - name: Remove images diff --git a/gradle.properties b/gradle.properties index 1cd349344b432d..f410ff01bf397d 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,7 +1,7 @@ org.gradle.daemon=false org.gradle.configureondemand=true org.gradle.parallel=true -org.gradle.caching=false +org.gradle.caching=true # Increase gradle JVM memory to 3GB to allow tests to run locally org.gradle.jvmargs=-Xmx3000m diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index afba109285af78dbd2a1d187e33ac4f87c76e392..ccebba7710deaf9f98673a68957ea02138b60d0a 100644 GIT binary patch delta 11632 zcmZvCWmsHIvn}otoWWsmcPF@o;2JczyX!!3mq9{scP9i11PcU$4GzKGB{&IihkW_o zbKd)$`O#H-t*YJKyL-C(sjk*_`0{mlL^UON1bi?o20=0j9xOh%ei@J~uLo722sQ!? z42-jzH3vM*|5z;${D%Z1z>-6?+!a8R2&^_5R82;A{zEDJH88;dK(!yMfTk1-S2$Sw zCIswy1gQ9kmj(+JN(3g)qKZ%!32Jsub^v`?A}@l6ieT!WPuco!LiX35zyZJ<06|Me)};q$8UbE@JzIV7 zjabmR_!-#~61M#3PnCX3B*N=plNEo@)#a7bm0u@^k2l!XFA*mqTmj$fmF4iiq}LnC zF*Umt9<5P>#-!gN=S1d=2veDI!b$hC1Ln*>#i7yq^5KWPfkp2dxeSgIp8vhCpfFTE zV1wZQzExKaNRfI@`aAs-h+&iccdh_M%o_IV1>F#1(UCqNF2(lopwF*!G|`Yi+*}KY z_^VrRSQ!Go{X8FQ)7(c)FxfA3b^cwoy{7NHo*g_DAfbd4l;(a`|9s2tSzN_Vx?Em; z;hF;K(bxSLnqQ=w$d`>%SU^n@^Pi|(mMvuh0UNbZ*3pPEqIl6{lDJInhNCldqRNw9 z=$)@$*s!12V~KYN7t(143=hwuLM(2DGf-$GhYeQgp>5QzNMHH6(LkJ>v>BOt=Oi#|f+HvN*zP?|OMLfmXfJOvg~^ z+P0-<6_#rYiv&UYihhcOFRn*&!j^szdCaBEt|z&sf|%3)F<{;T^hTp{k+(<;eKdg;pqg8c@Q` zvCZht=B@Tg0J)?P!+-rXNJTM^34+xq)yspMn|7Anogw~^%`&!pux&Y%yDi%Mb7G(m zjY7wbTtyllcE!Hrs5L07C!{8~cWPum|1DckPu~UHMdQq}>5oNYcGnd@$)0$-K z#}DDT47{IUYHZ~MA|y6n4;#JRzjWeiFK9xX-#%#B(dX|(jG1efSmt#Sd(tTAct61T z)lZ?Q%2zprzKM^&ZMOUre~z3cju$$pHN_ya<43#01FE66$%`B`Hi|iNYsJIiZI5{G zv1T@cX`#C_P44Iwdju!pycPl!wR10ll2wz+!%2g0Toe{r2#O|WVb!mSl7B^25B2i~ zqHg!|n_0MZ3abhl!52%AOp^trBq>g1{glL>jFeQRZ9K&aA6aoSs90x1D%)HFi5_vl!N2ggjGZ#w)G+DyCm7Cdn zMp32|$lo&_%lM`T$)rI(^D?Ux(fSdVJe1*(NWW<|#aOr5Brdj@BBp^L%CL%W@88nJ z$5O#=5Tr(Ds4-!gcli+TZJ-c>V&~LYj0f5JnKAC}} zo4;Tk!k;E25$qg9CrG~x3%6E>n3ROQ?5Td7+>=MS35r`Ne&$x)>M`tnjcN9wzpdUk@Fq^Ro zEY4~4y-m&6j=jeH^V5$MH(L*=21{y3s_a+<&Kol< z#C{>*WrRxAJ_83`ePJ4_3xKX%BWo?OZL|fx~xpRi-Q73L-#oVqhw14kdH6 zanOGujOl8pv3Ko{)%?paa|;Y1N==Molf3)Z%D_kocrd;@J1};jBc*E{pYj*~AV%D4 z6BbLqXd-)Y@Z#xieFLC#-<36^E62EO6c_wt;(ETD^c(nR^KL5Ret6zggSP-pU3`bn z>fsguY(YTsXYV`{LvA_{9Djz@w*Li#bGLDt&_%1BH|tMgcTkI_28q*WQ6R1LEUsmR zo;Phy#X?-Dm@>dXfr>SAAQS|v?*W>~t=pe{=WQmT;`v+w?zSWOtDV^a|JSzuxr4wa z2p$H87zqZ39(-;`2GX~6(R8qNVK;X$b$1WfQu3aX!+)f21b)@=LDA?fsvx0I#81+v z3a8~ol(Ml^_IV#hUmP3FBPeY-lr~VjUz+t8eT(hUSLk8twy4>Dns~5JzJNe9A{m?6 zme{uHO<4qbeAuK5zPPOo7JUbou!)D4r!VNdAq7jsr5zkH4!Yb=3IFbw*TPx!V8vz# z`MInAzUeGHyh^{zYP~(&7hQmr72gHV1Z$0DX|lAAEx^36e*z>cr!mS!yovW6qwUT| zn)8TbNP(5#?vTc}dro>%u%xxO&oWnm%{x`-Ba>zXL?c@()UH=R3Wqlp10KMXEF+4c zW7$bIe9zh!0!kGpq-B?3UNGL`YY(#ed>JObE%~j;PE98|uCfWwxeA40_;-$ROHboQ z+Ntw}U*wc&CcCV&Ip$TbzNMgXH1Mls>Pk{{dAZ&%@igsP3o#XqT4oh>b4x%; z=iLLvEjM@gLwXm1q)Ll+w~r(>lBL%V!98)BCcqDRE$1)FUI*BlBUbTKgcy$2=xV#H z&K*3dJAsHklpeWePq&)AKY>KZmk78Yi6M#^DDc=1dlcyC{+Q0zI+s!ZK20BbRoOJX zb&_?FCCU^_NkUT@<$%D7?6tBneYk(kH#AcqFE8}3XGHJ?}g&)N=vHD78 zoJ7>3)EALmaLa@^ef|nfT`YHi%wh(RSFr$Ifih9Z8>&m+L_2>yb<9*)cuYIKaVl z6$=UE-2g(iI?>&Vv-aN`WMrAxBXl8*bV;ztb3@(x!ng3%qB2%dT)D|Ljb0)V6gk4- zZ;=g;Fe|g-vm=CT;POEWc~`BJ^k;To=D>A!LEL1PgA zz=?m2z*Jzpg?WkQdL#oUMPxLxG??w~dD3}tF?T}=lWQj4&FxgP;T0^>dT9P*P>fPJ zB+Yu!=Eg98)-DglC(^ePp>|-gTwv;4V!_)jiEAqALdJ?=@_;*+K}=vW93*i&Ol*nG z^9Da=y-4s=pN);>hl)LL8aTQU5-aK*ZtlYRabLtXr_o>451$GwqnzFC#PSkMX<2-+ zS2CvMOlUcYQPkD6h~a;HlbUJAQh&{n`YMi0m-z{ZitrF%ho?=0*fNIarq(!rm8{tK znd(rlO+EL%mC_8kgF2uZ4dgJUe_-Wimy+DL96E-~U6a`P0{cuYKX82j@c@+CMV?&| zhBtrPQPD`PUx;|kp_jpyZ3iB352>T$`GIck$dORnCCCj6VkJj+$vCXJm<^^EePm!y z%(IVe3pAJxya?=&@W`$B^idQyMKFACi}udo}EqNUCI_{_C^E zsm|pN26tJ;3LkH!H|3mb5p#nvmq5MZ;W~YbOSrK_s;=u9>A#0`*o`3&6%hso0}Td- z?eCD{gZ{Gu$LkpA;_2c)Vky4o#4x5olMKfU53(xRTj+2>>~_~&(gc{pym84`bbcqHy- z`b`TAE{+4YGMe&7DdDnzf8X~RgOZT;GaHDS4j3m3&~?!s;QI_rrvpe9TN(~xHn1YQ zYAUQ?(g4@f!?*xBddcS$E6#(C0aWXKR9fpc+N^IHwrE`dfi!-^_P9SWhCiFSuvApF zj5zjOVd2T;by%&!gV=KtQVlX`-WkC|JzSJF;|1H z%)UACPG#~|-?~CuhgXcWVhMPBy?DOICRmZQgn2TLez4loup9bT>;7H2mSeHo0EidU zWvy~YX1L&-Ut>JY+UuxqfY0riUd>AM%B7V%F6<(omjo?Z)kM|lS9J41KMcUqKwqgf&yPcGMII@*lJ?0LAb_xM=ls8~lf z1J+$Fk85i3EjB#;|xIr*Xybc096dJ7(kBml}>Z! zm2@E}AAeQyds@R6NGG}p{npJ>h{OPq>S?|w>1199ij~52`gwdBh<Jf>sij;7IoV z@^tO(yvaa8y|_nf+=K-c845Y|;aD7p2d~jU!_ce?%Y|sdr1#ut{gZ$P4@!Ly(GiM; zej;8!o{coegW297K8=FL7}IANaSkLK8?0XH37P^SY!5o>bHc&*Sr(|9{R-&5%o_xL z7s+{wuP4T-#C>DdLA=Iv%Z?uF7{{S~U^Hz^1`kFt3(m&)zL0AJLYtkT}Q zaoP*W#Rz+EX0qBie?pnucfs=)p_hAsQ7aImOUo9BiJS%3Ba!e%;=>n-m|rSlW{FF< zG0pxcFpSY&XcY9g5r*IJ*cFnk!j2+bV0e6DH40sv+^BxxP-MHKKOr31!nPppF5jC_ z4NIg>9W9APBY9EIKJ+0zH~aZ$&0O~N zf5*bCl`L21p`kGl8w?E9pY^m*uP9u78QR2|iV?&v#00W|_@ zL~vM1Nc(46j-hX?t~;2tlZ!F4ogZsHvdu2Hcr?_M)4r{=`Pgb;?_D#mZ*OootL>=W z_0;BU#jQvI_xMubB=D5o`XSGKyuc2EAz@+lk;kK_a{dtTIW^G+hk@l>wYEF_Gd1R2;Rx=IvTiJ&ocJF7}PAo49=yUuQOXXr}l*O2$rj zB;E0V&x+oTMvmK-J`;|`4y3E&T)pZt7Sld}d3c!ayrlN2Wj)(-hE#`(U^>s*L4X-3 zSKZkNc@<`Uto^#renX1G%8{FfX=+fH#}{~h?Zwl>Tk$g2rp`f?xCW=mO}=;Uh!-#I z$_`f~u;!e+L177craEY@carrpZuKD1k>`_%Qwy*!Fuu6=6UtdQ7`Ps zmhc~%sC{UI=Gt^EQ=q={R}Imvg>>Gz6p~o614C2l@&}Ts5G%Q;#$Q$ zN%fw`7331JG|<=_?_+!KpeXxxg)Bssi9(^IpUIDS!;QRtCL3gDkEgfeXf(EUz}dk` z5#js;zwa~SJBt`)D%+M~T+^x!H_QoCn_84lS&Hb*hK_Kdy{vQXde*~m3{z3Bn)$r@ zennVvZijwmgPjVa)?m|3i7FyxE3*7SQlz>5deffw@tvGM?<37r>#-Hh(rk`>&(*nL zlIH+bO(gSiR0c0drEGhLK8o`t5eCbMg%Zt@?XUS|5*3Lb>imHt^2~>Q!x%wXmz6&F z5u^f~Z`(z9B=N|aU56wZGIVTzmjv-mtylHGigDj1!qz@|9|LUsSboea*kjY6AtxhA zCk{}h(|=9yqHm;DrRgiT!Au;7nK^DHQqFm5RlEGeyAt3h5SC#j#Qg0fu~Q;IhZ%OK zDM8XciVr5W*mlMX)<2!tB_}f~0<0Jf)4%rCPK{T!UXWVEDRkJu$eE3o0KEh-t7-5dl-X5dWB2nYh*z;m- z`_l&;%}K}Jo|k&%beab~A*ntWA3Y>;!cgBPE*zvX7zTI9)pdWZm`60V2}m)|-uCM@ zAE=h`o<$w*p4X3N)nVZ|K#<$1*ZhEGZ7~(zuN<+m2+DRHOb zNf1j3{6Xw6Z04&Is7RQ4vf+cVfM)vsCB9hAl*zt!4n;n)=RG|1UWM9Erfht&MA+zc z#@@8jFVFUvFK`14T35rRbq5}ZMio<*9&H0AQ@$Vple&w!JEVB$)Ql{Tu1sgholX}b z-(F;XV9SdR<=r25q-xZ<(zMY7+A{=<*&dJdAoNTcZ+?a-N!a$E6pt-qtLvu+zr`M7 z#=OsNlIyd-6UT${FR<@aa&CSa39pXcC^V_=Qglh~8v7;hFHNO=?pe|R3KlU60q02s zz&#+nCA7kgk8v)nHvR1ltRb61>_RS9_56W4$TdFom<*B=N4*Uc`&eURwTHRU%q1qR zA4hjh^>!ZuBzFDVRCg)Qjb2-mYe(^(((3hxLK5ji#%|6ZNGPZ8V1truuNwFW=K!eG zA7;e%>d{j{BiQ>Mq|3DzeZTQ?uzQM^8Wcgpr7p3<8$CW*Q?qZgV0T+`4e?oG_+Xb3 z>-*-Nkg}9{2%F<<%J6Y|$W5!vt<+x!8qpR?R`u0Syzc=dtYY!J%ZV1`*I$<0 zgRp&eh|(#h7UmJd4n-_LIhj^A;4{nmhnB0R8KhM5VJ zTOZq&S*XYPNdl{|3mq{wMYiF!6y_^WX3cAt*$lRQVV>g8aNIq1+N@Za6L&aWuvhBh zU9)9oI)Zjqhs3Du^6J``)~g3Hy?g2S)feWMg=2{qc*5dLs;WI99(rcIt7Hi!n|dgs z@yr`G1oJQ+)?Lh^Y~&EZSM+necNdN14*` zRB`d;)-vab?Ic%p9$=4*AByKT<$98OyDq+w{>jrXOaxPVn+}n*LnUS$CB4Ez8h>5nG#}f z%ehQGu|h+NuIZ|3Jh(EMzHMOlS9TwgXP-yXKWxGc1K}L zR5c>?qP8L#5-(W7)WcDrgw9>0e+SI@&=6DP2r{Ov} zakYS6{E03GaI>^GYd8&7=?vrv3#sRt=N<)n_+Yw_>i&q}o$6vMyX5>%a zxgvlrm|4#rurzD>Rg>mCN8%t`STLjw3Gq#lB`xcDw*u3zgLo~BuaagTLy{}TK8ZZ2 zr_5NZ-2ZT!__;av)jl_|UB=C@A(U-fs=o`lp-I!zotr}OO=WEL6Yjg122x`kk?!+%b?G$#IcP z4`8#FRE|uxkqLXRFw0D`griN1Z`C83q>?WIa+wo3K&WrF5^<-|LI+^z6 zutc8neP67R$LPT-r8!zsG8AYNW&1q+9ylvMZHNfIeMQVg-y)R0%7AQFetB0bBaUQW zS6BrJ!g9`oMHB$t+m>{unk>pcmcBDB+J_iV-ayVK8v~2e(occTwqIY8H0ZkCiY`WP z$%>GQu-LMWhG4kcGLIH%+P*Xj&D6-E4-Dy2D9)9k*HNX>&h^7>GbDb%HHyVawi3Tn zl>X9O9Vu5U$x~TyNkI}438D7xUY16rPV_LBGLFN>_W1>}anQ8wCOu2d(7x0##g6w6 z9#|Dp7?YiFf7$xLxi8)akJ)wt(2LXNh0(A}%b--y&P2~9JcH+a>t zk7UD->bIgGU^>IZo9Q$F&T#5+67YiFsoj=-x?1nx+UXndM8vz+e5*{U?7)0jqKH|G zcmgTflkWm0l}+S$^C;J%`1slUg!3-=RSI-YLl&)-Vid9*PtIWRvpJxcls5K7@xe~N znNXzmrbNv0+uuqW{Mc=xPY&bq&B*6ApQ8xymcYq-V?Dzr?(! zkHdo*b%s#9Z^Rp4jVRz-gnJQ)+_cet*jU?^(1k7ytd`jJ_lQh+(71p30K+XXv41J8 z<{J77q|pw(ez#&Y&y&~;q&l3jDU;jeYd0wJ*DDGPbFQsm;X61Flqfo{RZm(i+9#R$ ztkj3x+DMet48M(H<0tc|;WII%<{E6pdB)k2KOab$#@jYbdZW1?#N2_$v;9q1z`WAz zEcFm(bOb1&HSsx*nX;$wO(@|||B*-<#zyQuC^hj~fA7K4CV(6f_Pd5eV@wxTjdT9T z;Egtzb-q_7bf&Xbj%cOTxmevTzWHwK%SwUu3;XrI6FCn{e{t5ad>>wVc#roZo_5Y3 ze|m4N|Hg)-TAt=6bYC!#EUsUSk07$Vz4>#-PS_Fw?tAyTvW4!w&!dPJd?h+>%)zAa z=pd3TQG$5o?#gn&&r3*eX>eHvA^B34Y);MoanfEEFGe{rdLkNqXTM(FW<$=H=oHXt1Z z^{Kcz8PAqne|sRt3IG1h6QM%sdsv@jJjEb(pNuBrO6?H>p?E>c9{4seCbz{L1_^Y@ z4592bZEz51ySawkmNvdiywbQLaoQ`|R$lp4UOJ>*4`<9#9zMQ)pBCTg(Vo+(OYIN5 zm_9B|3v6z$NF1|Cxw$vipHjl4wz&9MeL7nVDy=iOsKtUk>Y!Jq^Kiuv%Snr&d>Q)mC!c)IDUVr@t;X8*7=>u-rXBKm! z3Z-BT^05gV8?|s~5aKK$d9#yM5Q8Mdd6kS#%vg9@AbOX-@zz6)EMDy)Y z*-l4!Nu>VaceD`-yAGf~+%M2fin!#CZtC^s5u0tB=iY-qL9y@8xeVTQ+SAhLtY5FDGRFa5Mo`9BH$@N<)$KM@4eL;U&Xqf$+6bc=j_= zcFgC{zlk>95lwN)K|Q#J%cA1mFSpdu0uxs;eBz_Wpr7m^Jg&{t!<1 zF1Q+={Dz!C!J2r@31~)+!R~U^(|SwhdhA(j0V2C=Oby7lm}7dWH&?3}fI_Lb3}9G6 zdkOTh_%JTPup`uT=fZ%NMHY6(*k2mTHJ^E{9w+G=2 z^-2`8%EzT!cgs;3;-Tj7lmp|w#|Q2aWCD+E zJ@?=c3M>gi0U=~HC0ICo_~(odAl#7%7$PDJ#@>3)#%zHC`(l5b5O$zh9pDBD0GM+J z4Q^B74`{jb{H6kF05}T@PD%f%?}s$bU-yk2ARMFIACOay;xEXl2*7|QZ~plehnOis zX&TJ`Tr2*!9Ywo9I7CA**&Yq}U>5^g!|^qtEmE zAIE=P$NtL^+ZF1_@lU6_4uEjxUQmbtdg1Wm9~k=(2q)+L7e)mKc+>qu1wpAf|9>dV z0OEhBH7NBR^p`?O2}b&d;Wz@q?WX*ff!e~@LbhJ*Pz{-a|fC?-U znB(|4?dR1o5Uwx>ELlbfp2)%a$2mUte@~CNT&Mv#cTv+|U@C(G9N?EHNdId{ z{&n^7uWxf&0W~N4|E3q_7XY~W1Pu=H%bzBVo;=U&wG$v*L(d-|_!JY&a7qCMbNT>a zlT$Rfw7$Q@b0+hLQy|>(@E>6M^mzrig#yRpe}MQiWU$cLvz6u<5Kd+4FG!6B#Dh+} I<>$8jKcJOiWB>pF delta 11565 zcmZ{K1yoy27j1AU(&Ad&wYa+$cZcFq+-ZSe#XV5my|@(D;_g!1gS)oK3;n;>@BiOg zFIkzp@7`xl=H9t0=Vay-Z@@Hez`!ZX!N4Mepir>m2r!|LJuo$}p4+%a^QWQY;^-xv zB=+K6{R8+lKS~zYNtWXWCP7euYcKcE3DYdSx4vbLn{z}&9HsHs0$=7;Oz=8cx0P=4#l(@j~f z;OEdqG}B`CT!MR)x?0FaqpQ}U9i_sk!cvdtdBS1hlgE2@kb?gT(};b6F@b$VXZs@l zr*dFb5i-ApMc@22V$fn4td0kKo@>Ex?)@8u#X>AmdcLy8uLqO1fgLa(29DVWG!j25 zd>pv?p1S=g_lhAlD?xxp&!RL#{`Rnt!O_jG;`5;e!D8sw9j&-w#Sb`SeGTq5F#c|} zyf~@H?)yvns*)e9k0Z^;6c0`aM)4c+8|zPin&UjVL^AtF1sE(lv@T2yN)7XH1-U4m z8zbJk`AUj)fn~90=|NzsR?@S}XJIh1Da++Wjn$H>;n(x2kzr>;O-EwIKp5WErT0rt zkGg>{dMSa^1N+(Q&Sl%?A)U+T2ln6X?D&&-jM|FVP4sg~!=>O9w7-48PsL{Jmqe}r zcI)~t49*Txc73yR5)3`KSZ^kiy#2ODHJeJM7?oP(D$U8H<@UY2`CEz&wHILtXB5q8 zvx43gkM#|Pp~ba3y(e4?tY&@cw^iYF`|A8?$GYZRqb;2caFf&`Io zHi_X*O_Bn9Je9h9qb1}l`F04@O?Bp?`SO>3)uoVh{zgWtxh7ffmNmQ`mh2*PzmvYx z{y_t6le4Mfh$jupEHs))J0UP?lPSSLgI;DVWha{LOU=ooEZC+gY3JEGv}mw02h<-f z^fzkEhO;T0TznW4MNOJx?QJ&7d-`mhDO;0cY9l)7ILJ=AyOSn1B2bVf+b|BARTKnX z4EuHJlE3@HM-glx3KtbRr#?h5q=-aN62DQl`Ag7T_ud`ZHrYK(IAcz)uy?C@i_x&& zgzkF1K4mPSH8b5tT`_pVG$Q@$AP~fV!P9lE3dL;WR}#`9TYW^Sh) zP+C(x#F2#bE&}f-QAN5$`k9sXl2aQbrzMbXV+)g1JzhE35kb{=e{kvTO!W?0` zg$uRQ>0HW(IZ8m>!lw?-2vxG}`0-)=vV(r+H zHCn(y*qCG^im|S>H^w${z)?1dFjbwFbgv4QWK2%uEv*;@ZCIbw>*%={k#Md^Qj83x z&MAtwF*v%oV$eN$v~L`;^;@I7Ot`=GyiKRMF$wcT(83r)&SJ0lG3)1HqVUDT#2}&- z^P}EThW6U)m4=N}krqM9FV(>2+`L$${s2?j!*XRYBU4QWrz@ABmX6M&Jf5wk1==C@ zc^Bu(eSHi0XejLJ~Hs?^fJ!EweF#@mXefD);HCG|e z$J>|g6NycFz1nYuKlZOD6g>8)qEsU2P3cY^kMAGg#p^3GEj}`5$#MgF)?n2B^ggGf z@3mC!4W-dO$A=5h`rIf+MJi|jZ9Q7Yu1Xo}PD_}^+TELki6;py6+nV4)HX5DKo&r3 zxrUZ!5%8^@VtnjS1_$@)M{Q)jrrVv_t35%^aF^EZzKQn8S7W2F=EhIik#X~rI>J)! zpam~i9;0=B#)ELq$jtyC%=6Put&V^cFK4ueJA7qx1UC6hDuk5npK>bV0`0tBGpV>k zdv`7W@5T{`acdMB2!KQES3Cn924m7DI&Q5M^Fj6Ur|Y57*O{jUINDJcCfLU2RsRaU z&&hi&T?lKNR3s7+lc>Op&y65Skw-&IfPWa68|M_E#o>L>TTYpWNGQ)~L$U?Xtqs9H$Z;PQ z#PE>>_utq|VC1tL0|NkvL*g?#$oL}>P}$hkO5NGm-Q3y5*jC)i-1MWlGqb6!v5QNh zy4>d_2`oX(pK-PfKE*-Q)=1<&l$v2PII07I@tcLpj?;diggWDQPKEWlI&2iYWDHyn zVRz_N`_S5r_v541uj1NH|vwIUH-%b&rSE=Io&PmP`dj>K!*abJ@9dG%+VfTNvm z#HTywLc13ej^Y_YtYvFh7)RnX~#%7j;s5^KDQ{ z{z*TIA#XfqCH1{MX{+WYLM90{P%K#BY|c~FD9KxCAQ_>cxmXC>Ij$@6-ZaOxv{lrX zc|0NOOz;Fwpv)^#;+L^q^_{~TsNj+|P8n`hS7`P} zKOR}zkL6Sp$i1qDZXXDg9%#)5?$2R_m!odBIONQ-DkYe7usyuLunW;j0kt-gd14ym zN3sW4uQBfIUxyN)+YjoNlKn8-Y58X59=E(NcV2hg?~X^s)qw4lHf*!9hRLNf1j8Ti z4qCTOL`X96-oJht!_2MN$_&$3)|&W8-QLrJGQxhIxY=2+z_V}MB>{IxT|xU8E0UM0Zr_pQo-ro9Lm4Eopxy7p#3QePg(>#)QDR zy(Pot`GZwBTRUc`5<#F*2_7ePQSWvuQHq^68>0^?2??94gdLCNK&gDGQPGBdMZ9V2(28U}|Kl7=2KMXzCYN_hOA;F8cn_C14Thk1v$;(!a&N zdMJ+oQDol-Y)H0zsMg zZ}5+QzWW6$2{%HHj%!JKhNNGhT`|dGUmr85PXaQwv9n9z6B!kY8A0ziD^r7=lhXXW zt6jujwU~f#C{Iu5R>rLwwy&K>S1>;)uZn@CjmE9qDKKW@FT*H~F%tjF;2D98IOWF| zW{TAQw|V66RM)HzM=8=D;Y^7m^)Q3JO=uhOCRt}F$Q?bw{Q0T*n`-AG^3h;WXkzm>E^w{1@bp$Wn&zrMHkCun4(GtJh)fXnU$Lkq`}B-?@;u zjLkOPj*T0w0Rf)jZGg+h+RzrLla2Nm1rl^`4mg}l>%0{gf8A0a=5=Cl2}UFS$Gp|Z z5JB^CGZxdrH{h_5v4UB10fkTY*{}S8XNm8I-dfhsVs-aB+5c8q8{$x#q+yVzFwevF zIzjGVSGSpVo8mKX$a19>p$;S9^)J`-CKDc#0Z+nteUn8C}h3Q z3Ni(uBnS#D0+kA{tBFkzTA+^^xS5ro^RDbp#i-Id7uOULQ718&syup|Z8rkBtOc;| zblGZo+2?MK_6}+(Fwa7!ssSENi_NyERzJ3Yn-_OYyPiBGQuBL~bph_a&xAcm2fXWT z%2eo%86G@P6U>vU><4NWNR!{CYL-P!PZ(^nEca9-9TZ8`KJBlF<$b!Xj!gX4W7DhO z%~u|>w9twA2};GVx69#wXVDspv6KW30H8z$i5B65M4X6#OKO-}c+bQxKdTr+-_eH! zREx<1Fj>B9R%u4jPQWoi+0`vF50aJHyYE}%P1u~4w+Y<(H#{NTP%W^1pH6Zw1cJ7% z_*-OUO%V08Fs@V;fg0Guu_>4oDQ9dCbCi(Yf7Oc znd$d^X4{PgV)wU3zQ!@r@6vE;B@t6puK40;@B;xum5scS%#?_m%6lQq(3?{9m9fgg zP(&Un8ndx%pDz??aBh$}y!jAEQ^vbG=aeM;fFf5l4@LGnj6nvJr$~);lqHRo?b^gC z@&@pC$v|4;*`bep*rRj3pQH9XzPi=KsWY`CGcW3P0oV6i>g#KM9AEJcI}f3?jaW6O zxN$6P-0+BJGghF$gdH9*9Y=rV;;2m?Yn)D1*t`6?f2Pros`tGPn#N>inzadJn5;&qE!HGaY=F@n4Zp@L+lcdk zf5NB-2MpE@^czh=$@G|d8$;>AO=E9Ykl89z`ctjDNVYw~Ik7`9ue{L&!_}FqIu*TB z2Q*rjgEQLrc#!{Tial6kH@6+Rz3T(9emMb0ggOe;wjSKthJV26xnJQCc8KkIXaLC> zMfU^)J?oWFCD8h!UK@4i$T~wxms0Ml!IV{v5_M+AY~T6yfu_UcZrJLqlvbI)pDELb zLfo1}jgwVTU)mv`21q?CPW{XI*P$7H3ZUw?qI>z#nd;>y8A?4G*N=j+L*}Eq)V*nC z4mHh1V5=}5f(xz=TbC$|<;2Q`AIgt{Q`>WS$Y$qddGr-pf{)g+XCrAW0A=G76f#U z%28wCW~@gG^c2(aZb%(X(UoNsIL~-rec>;y1^q5C6a#OgX39W{!T{iIZ$TQ+n-V^<&+^4f>`X8qLR-~<O4pA~qUV}!D^Lv)@+bmZ)3&!1 z(zAowG~^Qd`DI(*j5e&>%17u|tI5n%HOHZF2#(b?6&BWu6T6 z653gtSdk*6n|ILkV$q;w(q<<{cuP6~XD~c+JV_w{9TN{q0QLs9ACYI!Y`L9BX&=UH z^VCD_%kw`gU=;_DRN8|Gl;K_u$$Xy=Aj?-C9xsbp_J@#E{PW^V4n6@Sgsh^0NRIG9 zaG*H|@;!oRpyQY2tu+XINd5Mbf){1%LyY4x(-9#D=AvHc|4N?uOooisumFHiCIEmG zWbA+|JN@k}zLgTDHi+9{ zF^Jwq5>8a*r|%VCI<^PSwri?(~qFvt1ZJi z&){Vjhqpe@W1mBzIszF3J(0f}g`)B@G}ra3ZC$e!-oM3FC0 z@c9i>9b=W&F$;L<2H2Y@Wwb6Ctt~$46Xb{KQ(q2qRv$J&GGqrkBHJQZkD!~$BM zb_bWdz4Wrre~^Ch(v8JE-z1$q$GF$0A7!&9&)XxNhxKggqZvL6C-q5JiW@_RYt+nS z2-NQ{1w!kyP?SpkQZwI7B~{oB22>`3tqHi{t&u9H8|h?*JJB)@3x1N0?u%LGG1{S> zQSbjOJI9G?fWPa8vX6_l6V)ADD!7-4&D@Hyd3YB!9y`w-=t@8+sykdx&;V!Mrff8P z48^sRC!QMGmm#`1Vn84r=#7)bq`)YnS3vVNA9yUl)QRLpT@VJ_ls1l3g}sfn^d)_l zICp9ca63?|1lN*xAJc9%R*Is zkE8v-t8?u&c^5$1#?-zSziZd7yv1VW{RXreEAvx|QFGqBKBHU+vrq$SM{q39zD&(k zHWc|P$-evK+n3PuxnyObNxQD#2VQO0bl}fv9J!)g{*l?qWSylsi$c}I$ee*yyc-#4)x!3Bx0xO-LI=5n)jOWl%54mOG`7OA6CMjc?UPHw^_P93kFL$~MAS za)*C<|9eaB*)oULSacXJUT!cX~Oea?jgfsKi1E6YW;BP%u)wOt;;RA3oS zS}4QfMSsdrbSf5y4Z>%J#ni8-qneS{k>Wzei?eCmTp!*$Jtgcin?B|Cj_&hJ-z68f z(>p-!Y;KE_C-?LZ3SF<=P>uC6B(R1G1I%!OQ zu2wJTEN#Ao_bGZ;tUWvMA4)k_CV^yCj0^4YgVW|W*}}+c?zF*hPOyWnAtFPza(XH) z4WEc7kgO`g-BalZ>ja3Q12>80gC#r`;c2ErQ2ad;Anq9t$_OdZO~ zOM+55qMn9*?CyPJ7o$7jo-R(ZRejJ`%qBiT)<%sVvO$fCn!I{qI}sGZq%Wj?gw06+ zbWr4nN|XAlJIlduKuS*3jtCO zk-XS!6@TW1#x0UAtJ~2{jvA2j6Rdn{UItRPqTUCgTf{mkZ5R>R6n7u=D$58c?qp4Y z)c3Nmr0^kXfw^!2y>v~!2QmRF$nO0WJs2>& zGT(7)%gPQjOiw0@Bk|KK;PHw|jgW0r3E_U{S2 zVBb-S*+6F5Y`=W`Fo_aKr=AC0Gw9`dh`HaGGJNrtDn~2Sw5#c78LwA9Y04GfTE@&p zcy}1{h8JW$VGisDPgYyy`#CHp*zWNs49F0^v9?J2Phw)?!lLe^rnrE|%nw{w=xcu; zJX_WY_3Rv%=zpqx#cU-HgRc&-0oYm10iwK&yFr`T_3NZ&-GYPSkQ&^NTmQ#t9 z9%x(H5$d<9<7D|_MZ|Eb;a)M~ZP&;*&JeLa+NOi}7jM)!%GCK8 z%^b(6d#NWhHpwR|!`Fe(t~-Jc1{odw-1`qc?PB1xdjuKn6Z6QvhP?;zb`5ZwciLFP zCGphJbW=>_Y*Uuu2fj;nDhHm98N_jB*-^9xno)J0xVmm&v8NqHDk5Q+p?qzXAPp75 z7}C7u%|TTLipK{U#5BQ$In%ZcuO_cH7v%yJORtHotKXR_H$WMqLodc)CJhj^E%~mFPk5+c9$2XATCL@>V=1%jTNtO z7E$IX;%W-mdf;;runR0*AF=W;+fh|doS=%w7;fx{)X~FC@}@Z9aWG=5^P~nWe$_^2 zc1WT!?hdd@5kPdMb5GgXKx4oU@v&vdz)<4pT?P`VFs2)khN!Gb{-7#}m*C0?F5pWf zE8+(a*UM+%s~AG1IF{4nNTcjMyf%vXW%b&Ka2ko9fZN3_5_YA#m>#~?q1+t5Ek3}_ zH^^|}Ud2})+`n^-#1osDl#Ey}g_**G1Fe)B&Z7^<1-tT7?Mr1jL#D*wwoZ*z+?cRv znLltyjqf=jYwq3>`cC#FkxXsC{L;P8^@;ac4DnGl)d*FEUW@nO;bV2E))Ae`P1;di z4tDm~_&^;#i9W2&L)loz4%pS#&>6fP$4hM*aM5hWH&;Q9V9sX3sz(#jg79j`QFhY&?m$l4|nYVg6rJ}Q{fp)pGy6&m-N#!=sYsuEh=C~ z*+^bp$?*FV*XTIYU`@{AYw4cB*C9I^4f2>P?^WI6V=vn3azr!j;ayq`MCq2T7sbDm zXz6l2d@>)ZlJ&yH!IU$or2QdX!#|DR@0@WZyfTV~im8`T>U@A|I0O5R#Y4g6=~=%D z_4}d<1K3dZSJa4X<=pj1p`*V6&W9c#)I>!^pd)kz8LpJ#PXL^>sAz#`E-Jy8OH(i{ z{}0YMT-uMg-gZVh)Z9w@C+^)}2~dd9w!Q7D&5#A3Zsa9A<%YL)Z5}3UODa^Pe-3g6 zHpuoCpjO~rZ0XNhd(hdwQmW1%Dk=Ip7nPpX;`dFU)!lv?#+{8J1V>@Ezr`*c==(@) zW2sD2{HCWqrmm{=u4&OHbLNDmCEV=_EZ{)@iMj01pGI1n5p3B|Wt1YnhzOPUnHago zVyVjZyi=R*H$jvubg%#SIgxRc+og;y+QEHxVV8O$uE#yM^E7Zaa~zS@fr(txYo`;@ z^dt49)c1W;^>pV#wz9sh$%2Y~K$e8z%Gt5RIZqMCAcD!6Z?;jdJ`)^^%*aZ&17Rba zUKftZP+}IVm_~De-WxC*s&nM`Op)8S=fOLq)MZG`NIm+Giu76-QfH+nWe2bJDLQrJ zzxKt14`FLCpCXJcwo}(K8ID03q+p(-l&lzD72BP)Uzk-l!OM`9_<5SM1Pqwgqwp~L z86cZs5ks$BtL2{4ZKySXEnnkerSLQ28{dgpJ8FtpruCdkb=jb*v%A$#?QKOT7nhS5 z9WNM8`ZP{z5At%t_~#md3vlBFq8Yk%{Nx4oj2pftJ9&>pjppQfR0(+w55jJat60Ik zBH^bJuKHn_ayeZKpOMIJe&7dAdgGggl<^Jwp`ACVnlAQ~mr76J?am*5gq9q8fWDZ1 zO=L?Sfc#mb%M+LUR=Z3hywv23j=Jg6qoM28-W$BUK+OEwo4;B%#TEKEmyP^FB~(Z@ zi@Z>2tJKe3EpGY4dYeoI`^@E~tN$lM8KX`Y=v_kz+`LEfb|E-qLjj=W*IPB?djOnX zxKvb8$Zy#->AYcvn>bCwrkIBvjo(%yhP@kdRxuR9i(c9~3YgYHX4)d`R%96x5qlVs z3H%)U!&lhk*VJs)pD;_1XVM0CL{GK$Rj)!he$&YiWHQkIvO419*h(Eb&3@;JPjVjI z>Z6WzK4>b|6Z14l_6z_j*wUfji?~rUS1;^^_HJR{;rOPeS|6ToRdWL%3Jq zWf7PD>73Yxg^u;sM^_VxSJpnR@wcA{urH$0mZaGmg1x6SYOcM7PkFEMtJ2&Uf{0#i zr9(q0<>+2t(bn1lb4=>^v*#2?_TXH{zC?ITo?khoDr9B9=R-eq?@rF@`nIAT118|9 zN2*vsZm9oJz2c|G0=TsnITpJz$bG_e>&%zrh-g>f{=q{w@ql*hPuFt(WPqoeX9t+P zWu>oZ5dfzS?^ELu7{k%86{RH}W#y4fFvT{7DHf0fgySv)wI<5zaIfsUQ@`XBL+f-% zwQ!`CO&$}hbP*g=5Vc#B%@wnp2}SicNE!PkSy-#zqD8&k9P$;JLP`+N-p9Nj-VilX zn}tdp?d5R&t4Lp&3PqsR}SB6{ij%@+~0if5I8Y=9YgxOR~WJbbF zf{olEeA3enqPc|DSjz9-yv3Fu2@u9zZ%VK)N}MU;Fo3#|^Uo(Kf#nMSI`QZU%2!L8 zZjXMe9Hz2LDf4P(DMo{Jtx%Qb85j0)<&jh{Ve;&CCPvdo1S@Ln)P?5h0Qohlk}c##czc|4z;*%r%M!X3nQ6t=QIT&gu|`*L8L;nSxCwc@+7Pmd ziw4?l=CHaW+WFN7Zc2v>ElQJM;%!b!TV743e9lpLr#c5$rz6NBdEoe=F^W3J2XD?7 z_U1Fn@13pWb4t=X%PG5skKy$jrihe3zyG3-v?mBewKw3%@P&7T@3f|{dWNNB`y)4+ z1cqe|#VqvoYmnB>Xz3uh>E0Wi{1{H&$n&zQ>;f6@5&Oun!0C!0;MfgI$)OX;VM^bd zrLe)dOEWkHn4R=tl{|sA2HmTwf`}g2S|(9Bn9>^jlrPV`ORSK2TP%|&e0k*|k}vYq znot6D((UnDjrnah^UudTg@xg9jP1ACwdSwh?J)Hz^D~DzIm;EmN}32$Q zU|@m)k1=SmUnXR~2R%HR-Kz2BCYJs(mX!UnQqHH^&e6+coNvrqkDSfpe{S(Y@rL(rr>j=kRowbHWB$RMNgQ;dvidvC>Gm74y zr+&+;&x!UXu9)m(g1>!G*m&4X2TrmSc39TeMeLIxgTK%UJ;^=?q}Aw*>)bn(Va?CP z!%x03B51pQ%QQ=ircVNW`JfytYAKP|fW#@bwxoVXLa!_-W*@PD;tt6=+4OqmCuvxj z@J}V_=GCE=6hG=5wFUR7YnDKtEDMGh!r)nwo?oHZyL_YGF}b+JA{Zx21@=uV`^p!k z2v9g+vJ6`5Lm#f?axjaAw&!p;6I99P zt8*QC9uOI^6>3?@Km~Th8c;#@i&rax{a{d}h(zj5>1LU6r*MYfZOZN)%var+F^a&i zvan4S)KA7`5ZQH7hi-~OFXCz{q|^x|8hg^^Z;XqEBAO_6dzi%^`F+$l+rA!^={v+BEq~W#UV%Lc|K$OKBrMsBeY# z9}0Gr2!hB~dHy+fSY?2qv{fxg8}|wygtEr{N5KaftO@>OQ?*6|K?`e!kd}Mh3esk; zzu@O}O-NgcgAa1p;DexZB79KyhQLcLli`J zTiOtHl?@-nxQz^*%Ldw@r2y$|Lxv71_Mm{u2IXu^LGT6-J_@7)^3N;ZzkuhD8SBo= z<(%{5zxr4D4szNNg|H`r_@I#;Wat{fe@x*)%t9y#%5qT9STM*StX&G|GLiq1AhPJ| zzcLUar;GoQ`*(36?$#j0x#S;$222Sd#xciUSI0Blkb_0}v5( zc7O)Cwr2<_=JA&VdQkthw?CwSp8ot-syZZr*iS&ne&9bO+Fx6eBMRuP_`kNKAe_IF zFGQLK{gE1vs39)aAjD$o9|Aq?-{J8cQ$S~C{GnZsA!WRvxXk~4>OwRBNTlBoK*Pr` zBWEu9Z<(s-uS|G?|6jN0fA%*1-k_JMIV}073+mso2A)trBX&S&vcJ>>gsSTTRd Date: Thu, 28 Dec 2023 04:06:41 -0500 Subject: [PATCH 11/18] chore(build): update base-requirements + add script for regeneration (#9524) --- .../base-requirements.txt | 317 +++++++++--------- .../regenerate-base-requirements.sh | 37 ++ 2 files changed, 195 insertions(+), 159 deletions(-) create mode 100755 docker/datahub-ingestion-base/regenerate-base-requirements.sh diff --git a/docker/datahub-ingestion-base/base-requirements.txt b/docker/datahub-ingestion-base/base-requirements.txt index 141382466ab9f6..90928759027942 100644 --- a/docker/datahub-ingestion-base/base-requirements.txt +++ b/docker/datahub-ingestion-base/base-requirements.txt @@ -1,149 +1,147 @@ -# Excluded for slim -# pyspark==3.0.3 -# pydeequ==1.0.1 - +# Generated requirements file. Run ./regenerate-base-requirements.sh to regenerate. acryl-datahub-classify==0.0.8 -acryl-PyHive==0.6.14 -acryl-sqlglot==18.5.2.dev45 +acryl-PyHive==0.6.16 +acryl-sqlglot==20.4.1.dev14 aenum==3.1.15 -aiohttp==3.8.6 +aiohttp==3.9.1 aiosignal==1.3.1 -alembic==1.12.0 +alembic==1.13.1 altair==4.2.0 +annotated-types==0.6.0 anyio==3.7.1 -apache-airflow==2.7.2 -apache-airflow-providers-common-sql==1.7.2 -apache-airflow-providers-ftp==3.5.2 -apache-airflow-providers-http==4.5.2 -apache-airflow-providers-imap==3.3.2 -apache-airflow-providers-sqlite==3.4.3 -apispec==6.3.0 +apache-airflow==2.7.3 +apache-airflow-providers-common-sql==1.9.0 +apache-airflow-providers-ftp==3.7.0 +apache-airflow-providers-http==4.8.0 +apache-airflow-providers-imap==3.5.0 +apache-airflow-providers-sqlite==3.6.0 +apispec==6.3.1 appdirs==1.4.4 appnope==0.1.3 -argcomplete==3.1.2 +argcomplete==3.2.1 argon2-cffi==23.1.0 argon2-cffi-bindings==21.2.0 asgiref==3.7.2 asn1crypto==1.5.1 -asttokens==2.4.0 +asttokens==2.4.1 async-timeout==4.0.3 -asynch==0.2.2 +asynch==0.2.3 attrs==23.1.0 -avro==1.10.2 +avro==1.11.3 avro-gen3==0.7.11 -Babel==2.13.0 -backcall==0.2.0 +Babel==2.14.0 backoff==2.2.1 beautifulsoup4==4.12.2 bleach==6.1.0 -blinker==1.6.3 +blinker==1.7.0 blis==0.7.11 -boto3==1.28.62 -botocore==1.31.62 +boto3==1.34.8 +botocore==1.34.8 bowler==0.9.0 bracex==2.4 cached-property==1.5.2 cachelib==0.9.0 -cachetools==5.3.1 +cachetools==5.3.2 catalogue==2.0.10 -cattrs==23.1.2 -certifi==2023.7.22 +cattrs==23.2.3 +certifi==2023.11.17 cffi==1.16.0 chardet==5.2.0 -charset-normalizer==3.3.0 -ciso8601==2.3.0 +charset-normalizer==3.3.2 +ciso8601==2.3.1 click==8.1.7 click-default-group==1.2.4 click-spinner==0.1.10 clickclick==20.10.2 -clickhouse-cityhash==1.0.2.4 clickhouse-driver==0.2.6 clickhouse-sqlalchemy==0.2.4 -cloudpickle==2.2.1 +cloudpickle==3.0.0 colorama==0.4.6 colorlog==4.8.0 -comm==0.1.4 -confection==0.1.3 -ConfigUpdater==3.1.1 +comm==0.2.0 +confection==0.1.4 +ConfigUpdater==3.2 confluent-kafka==2.3.0 connexion==2.14.2 cron-descriptor==1.4.0 croniter==2.0.1 -cryptography==41.0.4 +cryptography==41.0.7 cx-Oracle==8.3.0 cymem==2.0.8 -dask==2023.9.3 +dask==2023.12.1 databricks-cli==0.18.0 databricks-dbapi==0.6.0 -databricks-sdk==0.10.0 +databricks-sdk==0.15.0 +databricks-sql-connector==2.9.3 debugpy==1.8.0 decorator==5.1.1 defusedxml==0.7.1 -deltalake==0.11.0 +deltalake==0.14.0 Deprecated==1.2.14 dill==0.3.7 dnspython==2.4.2 -docker==6.1.3 +docker==7.0.0 docutils==0.20.1 ecdsa==0.18.0 elasticsearch==7.13.4 email-validator==1.3.1 entrypoints==0.4 et-xmlfile==1.1.0 -exceptiongroup==1.1.3 -executing==2.0.0 -expandvars==0.11.0 -fastapi==0.103.2 -fastavro==1.8.4 -fastjsonschema==2.18.1 +exceptiongroup==1.2.0 +executing==2.0.1 +expandvars==0.12.0 +fastapi==0.108.0 +fastavro==1.9.2 +fastjsonschema==2.19.0 feast==0.31.1 -filelock==3.12.4 +filelock==3.13.1 fissix==21.11.13 Flask==2.2.5 flatdict==4.0.1 -frozenlist==1.4.0 -fsspec==2023.9.2 +frozenlist==1.4.1 +fsspec==2023.12.2 future==0.18.3 -GeoAlchemy2==0.14.1 -gitdb==4.0.10 -GitPython==3.1.37 -google-api-core==2.12.0 -google-auth==2.23.3 -google-cloud-appengine-logging==1.3.2 +GeoAlchemy2==0.14.3 +gitdb==4.0.11 +GitPython==3.1.40 +google-api-core==2.15.0 +google-auth==2.25.2 +google-cloud-appengine-logging==1.4.0 google-cloud-audit-log==0.2.5 -google-cloud-bigquery==3.12.0 -google-cloud-core==2.3.3 +google-cloud-bigquery==3.14.1 +google-cloud-core==2.4.1 google-cloud-datacatalog-lineage==0.2.2 google-cloud-logging==3.5.0 google-crc32c==1.5.0 google-re2==1.1 -google-resumable-media==2.6.0 -googleapis-common-protos==1.60.0 +google-resumable-media==2.7.0 +googleapis-common-protos==1.62.0 gql==3.4.1 graphql-core==3.2.3 graphviz==0.20.1 great-expectations==0.15.50 -greenlet==3.0.0 -grpc-google-iam-v1==0.12.6 -grpcio==1.59.0 -grpcio-reflection==1.59.0 -grpcio-status==1.59.0 -grpcio-tools==1.59.0 +greenlet==3.0.3 +grpc-google-iam-v1==0.13.0 +grpcio==1.60.0 +grpcio-reflection==1.60.0 +grpcio-status==1.60.0 +grpcio-tools==1.60.0 gssapi==1.8.3 gunicorn==21.2.0 h11==0.14.0 -httpcore==0.18.0 -httptools==0.6.0 -httpx==0.25.0 +hdbcli==2.19.20 +httpcore==1.0.2 +httptools==0.6.1 +httpx==0.26.0 humanfriendly==10.0 -idna==3.4 +idna==3.6 ijson==3.2.3 -importlib-metadata==6.8.0 -importlib-resources==6.1.0 +importlib-metadata==6.11.0 +importlib-resources==6.1.1 inflection==0.5.1 ipaddress==1.0.23 ipykernel==6.17.1 -ipython==8.16.1 +ipython==8.19.0 ipython-genutils==0.2.0 ipywidgets==8.1.1 iso3166==2.1.1 @@ -152,34 +150,34 @@ itsdangerous==2.1.2 jedi==0.19.1 Jinja2==3.1.2 jmespath==1.0.1 -JPype1==1.4.1 +JPype1==1.5.0 jsonlines==4.0.0 jsonpatch==1.33 jsonpointer==2.4 jsonref==1.1.0 -jsonschema==4.19.1 -jsonschema-specifications==2023.7.1 +jsonschema==4.20.0 +jsonschema-specifications==2023.12.1 jupyter-server==1.24.0 jupyter_client==7.4.9 jupyter_core==4.12.0 -jupyterlab-pygments==0.2.2 jupyterlab-widgets==3.0.9 +jupyterlab_pygments==0.3.0 langcodes==3.3.0 lark==1.1.4 -lazy-object-proxy==1.9.0 +lazy-object-proxy==1.10.0 leb128==1.0.5 -limits==3.6.0 +limits==3.7.0 linear-tsv==1.1.0 linkify-it-py==2.0.2 -lkml==1.3.1 +lkml==1.3.3 locket==1.0.0 lockfile==0.12.2 looker-sdk==23.0.0 -lxml==4.9.3 +lxml==4.9.4 lz4==4.3.2 -makefun==1.15.1 -Mako==1.2.4 -Markdown==3.5 +makefun==1.15.2 +Mako==1.3.0 +Markdown==3.5.1 markdown-it-py==3.0.0 MarkupSafe==2.1.3 marshmallow==3.20.1 @@ -190,26 +188,26 @@ mdit-py-plugins==0.4.0 mdurl==0.1.2 mistune==3.0.2 mixpanel==4.10.0 -mlflow-skinny==2.7.1 +mlflow-skinny==2.9.2 mmh3==4.0.1 mmhash3==3.0.1 more-itertools==10.1.0 moreorless==0.4.0 -moto==4.2.5 +moto==4.2.12 msal==1.22.0 multidict==6.0.4 murmurhash==1.0.10 -mypy==1.6.0 +mypy==1.8.0 mypy-extensions==1.0.0 nbclassic==1.0.0 nbclient==0.6.3 -nbconvert==7.9.2 +nbconvert==7.13.1 nbformat==5.9.1 nest-asyncio==1.5.8 -networkx==3.1 +networkx==3.2.1 notebook==6.5.6 notebook_shim==0.2.3 -numpy==1.26.0 +numpy==1.26.2 oauthlib==3.2.2 okta==1.7.0 openlineage-airflow==1.2.0 @@ -217,110 +215,107 @@ openlineage-integration-common==1.2.0 openlineage-python==1.2.0 openlineage_sql==1.2.0 openpyxl==3.1.2 -opentelemetry-api==1.20.0 -opentelemetry-exporter-otlp==1.20.0 -opentelemetry-exporter-otlp-proto-common==1.20.0 -opentelemetry-exporter-otlp-proto-grpc==1.20.0 -opentelemetry-exporter-otlp-proto-http==1.20.0 -opentelemetry-proto==1.20.0 -opentelemetry-sdk==1.20.0 -opentelemetry-semantic-conventions==0.41b0 +opentelemetry-api==1.22.0 +opentelemetry-exporter-otlp==1.22.0 +opentelemetry-exporter-otlp-proto-common==1.22.0 +opentelemetry-exporter-otlp-proto-grpc==1.22.0 +opentelemetry-exporter-otlp-proto-http==1.22.0 +opentelemetry-proto==1.22.0 +opentelemetry-sdk==1.22.0 +opentelemetry-semantic-conventions==0.43b0 ordered-set==4.1.0 -oscrypto==1.3.0 packaging==23.2 pandas==1.5.3 pandavro==1.5.2 pandocfilters==1.5.0 -parse==1.19.1 +parse==1.20.0 parso==0.8.3 partd==1.4.1 -pathspec==0.11.2 -pathy==0.10.2 +pathspec==0.12.1 +pathy==0.10.3 pendulum==2.1.2 -pexpect==4.8.0 +pexpect==4.9.0 phonenumbers==8.13.0 -pickleshare==0.7.5 platformdirs==3.11.0 pluggy==1.3.0 preshed==3.0.9 prison==0.2.1 -progressbar2==4.2.0 -prometheus-client==0.17.1 -prompt-toolkit==3.0.39 -proto-plus==1.22.3 -protobuf==4.24.4 -psutil==5.9.5 +progressbar2==4.3.2 +prometheus-client==0.19.0 +prompt-toolkit==3.0.43 +proto-plus==1.23.0 +protobuf==4.25.1 +psutil==5.9.7 psycopg2-binary==2.9.9 ptyprocess==0.7.0 pure-eval==0.2.2 pure-sasl==0.6.2 -py-partiql-parser==0.3.7 +py-partiql-parser==0.5.0 pyarrow==11.0.0 -pyasn1==0.5.0 +pyasn1==0.5.1 pyasn1-modules==0.3.0 -pyathena==2.4.1 -pycountry==22.3.5 +pyathena==2.25.2 +pycountry==23.12.11 pycparser==2.21 pycryptodome==3.19.0 -pycryptodomex==3.19.0 pydantic==1.10.13 +pydantic_core==2.14.6 pydash==7.0.6 -pydruid==0.6.5 -Pygments==2.16.1 +pydruid==0.6.6 +Pygments==2.17.2 pyiceberg==0.4.0 -pymongo==4.5.0 +pymongo==4.6.1 PyMySQL==1.1.0 -pyOpenSSL==23.2.0 +pyOpenSSL==23.3.0 pyparsing==3.0.9 pyspnego==0.10.2 python-daemon==3.0.1 python-dateutil==2.8.2 python-dotenv==1.0.0 python-jose==3.3.0 -python-ldap==3.4.3 +python-ldap==3.4.4 python-nvd3==0.15.0 python-slugify==8.0.1 python-stdnum==1.19 -python-tds==1.13.0 +python-tds==1.14.0 python-utils==3.8.1 python3-openid==3.2.0 pytz==2023.3.post1 pytzdata==2020.1 PyYAML==6.0.1 pyzmq==24.0.1 -ratelimiter==1.2.0.post0 redash-toolbelt==0.1.9 -redshift-connector==2.0.914 -referencing==0.30.2 -regex==2023.10.3 +redshift-connector==2.0.918 +referencing==0.32.0 +regex==2023.12.25 requests==2.31.0 requests-file==1.5.1 requests-gssapi==1.2.3 requests-ntlm==1.2.0 requests-toolbelt==0.10.1 -responses==0.23.3 +responses==0.24.1 rfc3339-validator==0.1.4 rfc3986==2.0.0 -rich==13.6.0 -rich-argparse==1.3.0 -rpds-py==0.10.6 +rich==13.7.0 +rich-argparse==1.4.0 +rpds-py==0.15.2 rsa==4.9 ruamel.yaml==0.17.17 ruamel.yaml.clib==0.2.8 -s3transfer==0.7.0 -schwifty==2023.9.0 -scipy==1.11.3 +s3transfer==0.10.0 +schwifty==2023.11.2 +scipy==1.11.4 scramp==1.4.4 Send2Trash==1.8.2 -sentry-sdk==1.32.0 +sentry-sdk==1.39.1 setproctitle==1.3.3 simple-salesforce==1.12.5 six==1.16.0 smart-open==6.4.0 smmap==5.0.1 sniffio==1.3.0 -snowflake-connector-python==3.2.1 -snowflake-sqlalchemy==1.5.0 +snowflake-connector-python==3.6.0 +snowflake-sqlalchemy==1.5.1 sortedcontainers==2.4.0 soupsieve==2.5 spacy==3.4.3 @@ -328,67 +323,71 @@ spacy-legacy==3.0.12 spacy-loggers==1.0.5 sql-metadata==2.2.2 SQLAlchemy==1.4.44 -sqlalchemy-bigquery==1.8.0 -SQLAlchemy-JSONField==1.0.1.post0 +sqlalchemy-bigquery==1.9.0 +sqlalchemy-hana==1.1.1 +SQLAlchemy-JSONField==1.0.2 sqlalchemy-pytds==0.3.5 sqlalchemy-redshift==0.8.14 SQLAlchemy-Utils==0.41.1 -sqlalchemy2-stubs==0.0.2a35 +sqlalchemy2-stubs==0.0.2a37 sqllineage==1.3.8 sqlparse==0.4.4 srsly==2.4.8 stack-data==0.6.3 -starlette==0.27.0 +starlette==0.32.0.post1 strictyaml==1.7.3 tableauserverclient==0.25 tableschema==1.20.2 tabulate==0.9.0 tabulator==1.53.5 tenacity==8.2.3 -termcolor==2.3.0 -terminado==0.17.1 +teradatasql==20.0.0.2 +teradatasqlalchemy==17.20.0.0 +termcolor==2.4.0 +terminado==0.18.0 text-unidecode==1.3 thinc==8.1.12 -thrift==0.13.0 +thrift==0.16.0 thrift-sasl==0.4.3 tinycss2==1.2.1 toml==0.10.2 tomli==2.0.1 -tomlkit==0.12.1 +tomlkit==0.12.3 toolz==0.12.0 -tornado==6.3.3 +tornado==6.4 tqdm==4.66.1 traitlets==5.2.1.post0 trino==0.327.0 typeguard==2.13.3 typer==0.7.0 -types-PyYAML==6.0.12.12 typing-inspect==0.9.0 -typing_extensions==4.8.0 -tzlocal==5.1 +typing_extensions==4.9.0 +tzlocal==5.2 uc-micro-py==1.0.2 -ujson==5.8.0 +ujson==5.9.0 unicodecsv==0.14.1 -urllib3==1.26.17 -uvicorn==0.23.2 -uvloop==0.17.0 -vertica-python==1.3.5 -vertica-sqlalchemy-dialect==0.0.8 +universal-pathlib==0.1.4 +urllib3==1.26.18 +uvicorn==0.25.0 +uvloop==0.19.0 +vertica-python==1.3.8 +vertica-sqlalchemy-dialect==0.0.8.1 vininfo==1.7.0 volatile==2.1.0 wasabi==0.10.1 -watchfiles==0.20.0 +watchfiles==0.21.0 wcmatch==8.5 -wcwidth==0.2.8 +wcwidth==0.2.12 webencodings==0.5.1 -websocket-client==1.6.4 -websockets==11.0.3 +websocket-client==1.7.0 +websockets==12.0 Werkzeug==2.2.3 widgetsnbextension==4.0.9 -wrapt==1.15.0 -WTForms==3.1.0 +wrapt==1.16.0 +WTForms==3.0.1 xlrd==2.0.1 xmltodict==0.13.0 -yarl==1.9.2 +yarl==1.9.4 zeep==4.2.1 -zstd==1.5.5.1 \ No newline at end of file +zipp==3.17.0 +zstd==1.5.5.1 diff --git a/docker/datahub-ingestion-base/regenerate-base-requirements.sh b/docker/datahub-ingestion-base/regenerate-base-requirements.sh new file mode 100755 index 00000000000000..6fb331afa484a3 --- /dev/null +++ b/docker/datahub-ingestion-base/regenerate-base-requirements.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# This script is used to regenerate the base-requirements.txt file + +set -euxo pipefail +cd "$( dirname "${BASH_SOURCE[0]}" )" + +SCRIPT_NAME=$(basename "$0") +DATAHUB_DIR=$(pwd)/../.. + +# Create a virtualenv. +VENV_DIR=$(mktemp -d) +python -c "import sys; assert sys.version_info >= (3, 9), 'Python 3.9 or higher is required.'" +python -m venv $VENV_DIR +source $VENV_DIR/bin/activate +pip install --upgrade pip setuptools wheel +echo "Using virtualenv at $VENV_DIR" + +# Install stuff. +pushd $DATAHUB_DIR/metadata-ingestion +pip install -e . +pip install -e '../metadata-ingestion-modules/airflow-plugin/[plugin-v2]' +pip install -e '.[all]' +popd + +# Generate the requirements file. +# Removing Flask deps due as per https://github.com/datahub-project/datahub/pull/6867/files +# Removing py4j and PyJWT due to https://github.com/datahub-project/datahub/pull/6868/files +# Removing pyspark and pydeequ because we don't want them in the slim image, so they can be added separately. +# TODO: It's unclear if these removals are still actually needed. +echo "# Generated requirements file. Run ./$SCRIPT_NAME to regenerate." > base-requirements.txt +pip freeze \ + | grep -v -E "^-e" \ + | grep -v "Flask-" \ + | grep -v -E "(py4j|PyJWT)==" \ + | grep -v -E "(pyspark|pydeequ)==" \ + >> base-requirements.txt From 4efa46f8c91dfdedc21b7081143d196c7a0be0da Mon Sep 17 00:00:00 2001 From: gaurav2733 <77378510+gaurav2733@users.noreply.github.com> Date: Thu, 28 Dec 2023 15:05:14 +0530 Subject: [PATCH 12/18] test(cypress/users): add automatic reset password test (#9515) --- .../src/app/identity/user/UserListItem.tsx | 17 ++- .../app/identity/user/ViewResetTokenModal.tsx | 7 +- .../cypress/e2e/mutations/add_users.js | 135 +++++++++++++----- 3 files changed, 114 insertions(+), 45 deletions(-) diff --git a/datahub-web-react/src/app/identity/user/UserListItem.tsx b/datahub-web-react/src/app/identity/user/UserListItem.tsx index 69b8a6c2d1355f..8ad3d7d93d6573 100644 --- a/datahub-web-react/src/app/identity/user/UserListItem.tsx +++ b/datahub-web-react/src/app/identity/user/UserListItem.tsx @@ -98,8 +98,8 @@ export default function UserListItem({ user, canManageUserCredentials, selectRol
{displayName}
-
- {user.username} +
+ {user.username}
{userStatus && ( @@ -121,8 +121,12 @@ export default function UserListItem({ user, canManageUserCredentials, selectRol trigger={['click']} overlay={ - setIsViewingResetToken(true)}> -   Reset user password + setIsViewingResetToken(true)} + data-testid="reset-menu-item" + > +   Reset user password  Delete @@ -130,7 +134,10 @@ export default function UserListItem({ user, canManageUserCredentials, selectRol } > - + Generate a new reset link! Note, any old links will cease to be active. - + diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/add_users.js b/smoke-test/tests/cypress/cypress/e2e/mutations/add_users.js index e19c6065d42743..ba225ba37884ba 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/add_users.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/add_users.js @@ -1,47 +1,104 @@ const tryToSignUp = () => { - let number = Math.floor(Math.random() * 100000); - let name = `Example Name ${number}`; - cy.enterTextInTestId("email", `example${number}@example.com`); - cy.enterTextInTestId("name", name); - cy.enterTextInTestId("password", "Example password"); - cy.enterTextInTestId("confirmPassword", "Example password"); - - cy.mouseover("#title").click(); - cy.waitTextVisible("Other").click(); - - cy.get("[type=submit]").click(); - return name; + let number = Math.floor(Math.random() * 100000); + let name = `Example Name ${number}`; + let email = `example${number}@example.com`; + cy.enterTextInTestId("email", email); + cy.enterTextInTestId("name", name); + cy.enterTextInTestId("password", "Example password"); + cy.enterTextInTestId("confirmPassword", "Example password"); + + cy.mouseover("#title").click(); + cy.waitTextVisible("Other").click(); + + cy.get("[type=submit]").click(); + return { name, email }; }; describe("add_user", () => { - it("go to user link and invite a user", () => { - cy.login(); + let registeredEmail = ""; + it("go to user link and invite a user", () => { + cy.login(); + + cy.visit("/settings/identities/users"); + cy.waitTextVisible("Invite Users"); + cy.clickOptionWithText("Invite Users"); + + cy.waitTextVisible(/signup\?invite_token=\w{32}/) + .then(($elem) => { + const inviteLink = $elem.text(); + cy.log(inviteLink); cy.visit("/settings/identities/users"); - cy.waitTextVisible("Invite Users"); - - cy.clickOptionWithText("Invite Users"); - - cy.waitTextVisible(/signup\?invite_token=\w{32}/).then(($elem) => { - const inviteLink = $elem.text(); - cy.log(inviteLink); - cy.visit("/settings/identities/users"); - cy.logout(); - cy.visit(inviteLink); - let name = tryToSignUp(); - cy.waitTextVisible("Welcome to DataHub"); - cy.hideOnboardingTour(); - cy.waitTextVisible(name); - }).then(() => { - cy.logout(); - cy.visit("/signup?invite_token=bad_token"); - tryToSignUp(); - cy.waitTextVisible("Failed to log in! An unexpected error occurred."); - }); + cy.logout(); + cy.visit(inviteLink); + const { name, email } = tryToSignUp(); + registeredEmail = email; + cy.waitTextVisible("Welcome to DataHub"); + cy.hideOnboardingTour(); + cy.waitTextVisible(name); + }) + .then(() => { + cy.logout(); + cy.visit("/signup?invite_token=bad_token"); + tryToSignUp(); + cy.waitTextVisible("Failed to log in! An unexpected error occurred."); + }); + }); + + it("Verify you can’t generate a reset password link for a non-native user", () => { + cy.login(); + cy.visit("/settings/identities/users"); + cy.waitTextVisible("Invite Users"); + cy.get("[data-testid=userItem-non-native]").first().click(); + cy.get('[data-testid="reset-menu-item"]').should( + "have.attr", + "aria-disabled", + "true" + ); + }); + + it("Generate a reset password link for a native user", () => { + cy.login(); + cy.visit("/settings/identities/users"); + cy.waitTextVisible("Invite Users"); + cy.get(`[data-testid="email-native"]`) + .contains(registeredEmail) + .should("exist") + .parents(".ant-list-item") + .find('[data-testid="userItem-native"]') + .should("be.visible") + .click(); + + cy.get("[data-testid=resetButton]").first().click(); + cy.get("[data-testid=refreshButton]").click(); + cy.waitTextVisible("Generated new link to reset credentials"); + + cy.window().then((win) => { + cy.stub(win, "prompt"); }); -}); + cy.get(".ant-typography-copy").should("be.visible").click(); + cy.get(".ant-modal-close").should("be.visible").click(); -// Verify you can’t generate a reset password link for a non-native user (root, for example) -// Generate a reset password link for a native user -// Log out, then verify that using a bad reset token in the URL doesn’t allow you to reset password -// Use the correct reset link to reset native user credentials \ No newline at end of file + cy.waitTextVisible(/reset\?reset_token=\w{32}/) + .then(($elem) => { + const inviteLink = $elem.text(); + cy.logout(); + cy.visit(inviteLink); + cy.enterTextInTestId("email", registeredEmail); + cy.enterTextInTestId("password", "Example Reset Password"); + cy.enterTextInTestId("confirmPassword", "Example Reset Password"); + cy.get("[type=submit]").click(); + cy.waitTextVisible("Welcome back"); + cy.hideOnboardingTour(); + }) + .then(() => { + cy.logout(); + cy.visit("/reset?reset_token=bad_token"); + cy.enterTextInTestId("email", registeredEmail); + cy.enterTextInTestId("password", "Example Reset Password"); + cy.enterTextInTestId("confirmPassword", "Example Reset Password"); + cy.get("[type=submit]").click(); + cy.waitTextVisible("Failed to log in!"); + }); + }); +}); From 3635c1c2213cfb8421d89b7cc106ab236d72c7ec Mon Sep 17 00:00:00 2001 From: Shubham Jagtap <132359390+shubhamjagtap639@users.noreply.github.com> Date: Thu, 28 Dec 2023 15:24:26 +0530 Subject: [PATCH 13/18] feat(ingestion/bigquery): Use sqlglot_lineage for usage and add more perf timers (#9247) Co-authored-by: Andrew Sikowitz --- metadata-ingestion/setup.py | 2 - .../ingestion/source/bigquery_v2/bigquery.py | 22 +- .../source/bigquery_v2/bigquery_audit.py | 16 +- .../source/bigquery_v2/bigquery_config.py | 5 + .../source/bigquery_v2/bigquery_report.py | 12 +- .../ingestion/source/bigquery_v2/usage.py | 86 ++--- .../datahub/utilities/bigquery_sql_parser.py | 92 ----- .../src/datahub/utilities/sqlglot_lineage.py | 8 +- .../bigquery/test_bigquery_usage.py | 8 +- .../tests/unit/test_bigquery_sql_lineage.py | 66 +++- .../tests/unit/test_bigquery_sql_parser.py | 327 ------------------ .../tests/unit/test_bigquery_usage.py | 14 +- .../unit/test_bigqueryv2_usage_source.py | 6 +- 13 files changed, 159 insertions(+), 505 deletions(-) delete mode 100644 metadata-ingestion/src/datahub/utilities/bigquery_sql_parser.py delete mode 100644 metadata-ingestion/tests/unit/test_bigquery_sql_parser.py diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 32d49ffc73fa34..8e4791e253c7cf 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -295,8 +295,6 @@ "bigquery": sql_common | bigquery_common | { - # TODO: I doubt we need all three sql parsing libraries. - *sqllineage_lib, *sqlglot_lib, "sqlalchemy-bigquery>=1.4.1", "google-cloud-datacatalog-lineage==0.2.2", diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 9813945683289c..3704eae96aece0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -221,6 +221,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): self.bigquery_data_dictionary = BigQuerySchemaApi( self.report.schema_api_perf, self.config.get_bigquery_client() ) + self.sql_parser_schema_resolver = self._init_schema_resolver() redundant_lineage_run_skip_handler: Optional[ RedundantLineageRunSkipHandler @@ -253,6 +254,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): self.usage_extractor = BigQueryUsageExtractor( config, self.report, + schema_resolver=self.sql_parser_schema_resolver, dataset_urn_builder=self.gen_dataset_urn_from_ref, redundant_run_skip_handler=redundant_usage_run_skip_handler, ) @@ -283,8 +285,6 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): # Maps view ref -> actual sql self.view_definitions: FileBackedDict[str] = FileBackedDict() - self.sql_parser_schema_resolver = self._init_schema_resolver() - self.add_config_to_report() atexit.register(cleanup, config) @@ -371,7 +371,10 @@ def usage_capability_test( report: BigQueryV2Report, ) -> CapabilityReport: usage_extractor = BigQueryUsageExtractor( - connection_conf, report, lambda ref: "" + connection_conf, + report, + schema_resolver=SchemaResolver(platform="bigquery"), + dataset_urn_builder=lambda ref: "", ) for project_id in project_ids: try: @@ -447,7 +450,9 @@ def _init_schema_resolver(self) -> SchemaResolver: self.config.lineage_parse_view_ddl or self.config.lineage_use_sql_parser ) schema_ingestion_enabled = ( - self.config.include_views and self.config.include_tables + self.config.include_schema_metadata + and self.config.include_tables + and self.config.include_views ) if schema_resolution_required and not schema_ingestion_enabled: @@ -545,10 +550,11 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: if not projects: return - for project_id in projects: - self.report.set_ingestion_stage(project_id.id, METADATA_EXTRACTION) - logger.info(f"Processing project: {project_id.id}") - yield from self._process_project(project_id) + if self.config.include_schema_metadata: + for project_id in projects: + self.report.set_ingestion_stage(project_id.id, METADATA_EXTRACTION) + logger.info(f"Processing project: {project_id.id}") + yield from self._process_project(project_id) if self.config.include_usage_statistics: yield from self.usage_extractor.get_usage_workunits( diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py index 55366d6c57cf83..8cef10ca234481 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py @@ -12,6 +12,7 @@ get_first_missing_key, get_first_missing_key_any, ) +from datahub.utilities.urns.dataset_urn import DatasetUrn AuditLogEntry = Any @@ -178,6 +179,17 @@ def from_string_name(cls, ref: str) -> "BigQueryTableRef": raise ValueError(f"invalid BigQuery table reference: {ref}") return cls(BigqueryTableIdentifier(parts[1], parts[3], parts[5])) + @classmethod + def from_urn(cls, urn: str) -> "BigQueryTableRef": + """Raises: ValueError if urn is not a valid BigQuery table URN.""" + dataset_urn = DatasetUrn.create_from_string(urn) + split = dataset_urn.get_dataset_name().rsplit(".", 3) + if len(split) == 3: + project, dataset, table = split + else: + _, project, dataset, table = split + return cls(BigqueryTableIdentifier(project, dataset, table)) + def is_temporary_table(self, prefixes: List[str]) -> bool: for prefix in prefixes: if self.table_identifier.dataset.startswith(prefix): @@ -566,7 +578,7 @@ def from_query_event( query_event: QueryEvent, debug_include_full_payloads: bool = False, ) -> "ReadEvent": - readEvent = ReadEvent( + return ReadEvent( actor_email=query_event.actor_email, timestamp=query_event.timestamp, resource=read_resource, @@ -577,8 +589,6 @@ def from_query_event( from_query=True, ) - return readEvent - @classmethod def from_exported_bigquery_audit_metadata( cls, row: BigQueryAuditMetadata, debug_include_full_payloads: bool = False diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index c13b08a6d9656b..58f2a600c2ff7d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -94,6 +94,11 @@ class BigQueryV2Config( description="Regex patterns for project_id to filter in ingestion.", ) + include_schema_metadata: bool = Field( + default=True, + description="Whether to ingest the BigQuery schema, i.e. projects, schemas, tables, and views.", + ) + usage: BigQueryUsageConfig = Field( default=BigQueryUsageConfig(), description="Usage related configs" ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py index 9d92b011ee2856..69913b383af874 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py @@ -33,6 +33,13 @@ class BigQueryAuditLogApiPerfReport(Report): list_log_entries: PerfTimer = field(default_factory=PerfTimer) +@dataclass +class BigQueryProcessingPerfReport(Report): + sql_parsing_sec: PerfTimer = field(default_factory=PerfTimer) + store_usage_event_sec: PerfTimer = field(default_factory=PerfTimer) + usage_state_size: Optional[str] = None + + @dataclass class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowReport): num_total_lineage_entries: TopKDict[str, int] = field(default_factory=TopKDict) @@ -120,8 +127,6 @@ class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowR read_reasons_stat: Counter[str] = field(default_factory=collections.Counter) operation_types_stat: Counter[str] = field(default_factory=collections.Counter) - usage_state_size: Optional[str] = None - exclude_empty_projects: Optional[bool] = None schema_api_perf: BigQuerySchemaApiPerfReport = field( @@ -130,6 +135,9 @@ class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowR audit_log_api_perf: BigQueryAuditLogApiPerfReport = field( default_factory=BigQueryAuditLogApiPerfReport ) + processing_perf: BigQueryProcessingPerfReport = field( + default_factory=BigQueryProcessingPerfReport + ) lineage_start_time: Optional[datetime] = None lineage_end_time: Optional[datetime] = None diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py index 65b559550ffc59..ccc64184f3346f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/usage.py @@ -35,7 +35,6 @@ AuditEvent, AuditLogEntry, BigQueryAuditMetadata, - BigqueryTableIdentifier, BigQueryTableRef, QueryEvent, ReadEvent, @@ -60,9 +59,9 @@ USAGE_EXTRACTION_USAGE_AGGREGATION, ) from datahub.metadata.schema_classes import OperationClass, OperationTypeClass -from datahub.utilities.bigquery_sql_parser import BigQuerySQLParser from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedDict from datahub.utilities.perf_timer import PerfTimer +from datahub.utilities.sqlglot_lineage import SchemaResolver, sqlglot_lineage logger: logging.Logger = logging.getLogger(__name__) @@ -284,7 +283,7 @@ def delete_original_read_events_for_view_query_events(self) -> None: ) def report_disk_usage(self, report: BigQueryV2Report) -> None: - report.usage_state_size = str( + report.processing_perf.usage_state_size = str( { "main": humanfriendly.format_size(os.path.getsize(self.conn.filename)), "queries": humanfriendly.format_size( @@ -310,11 +309,14 @@ def __init__( self, config: BigQueryV2Config, report: BigQueryV2Report, + *, + schema_resolver: SchemaResolver, dataset_urn_builder: Callable[[BigQueryTableRef], str], redundant_run_skip_handler: Optional[RedundantUsageRunSkipHandler] = None, ): self.config: BigQueryV2Config = config self.report: BigQueryV2Report = report + self.schema_resolver = schema_resolver self.dataset_urn_builder = dataset_urn_builder # Replace hash of query with uuid if there are hash conflicts self.uuid_to_query: Dict[str, str] = {} @@ -415,10 +417,11 @@ def generate_read_events_from_query( ) -> Iterable[AuditEvent]: try: tables = self.get_tables_from_query( - query_event_on_view.project_id, query_event_on_view.query, + default_project=query_event_on_view.project_id, + default_dataset=query_event_on_view.default_dataset, ) - assert tables is not None and len(tables) != 0 + assert len(tables) != 0 for table in tables: yield AuditEvent.create( ReadEvent.from_query_event(table, query_event_on_view) @@ -462,12 +465,15 @@ def _ingest_events( self.report.num_view_query_events += 1 for new_event in self.generate_read_events_from_query(query_event): - num_generated += self._store_usage_event( - new_event, usage_state, table_refs - ) - num_aggregated += self._store_usage_event( - audit_event, usage_state, table_refs - ) + with self.report.processing_perf.store_usage_event_sec: + num_generated += self._store_usage_event( + new_event, usage_state, table_refs + ) + with self.report.processing_perf.store_usage_event_sec: + num_aggregated += self._store_usage_event( + audit_event, usage_state, table_refs + ) + except Exception as e: logger.warning( f"Unable to store usage event {audit_event}", exc_info=True @@ -905,54 +911,38 @@ def _generate_filter(self, corrected_start_time, corrected_end_time): ) def get_tables_from_query( - self, default_project: str, query: str - ) -> Optional[List[BigQueryTableRef]]: + self, query: str, default_project: str, default_dataset: Optional[str] = None + ) -> List[BigQueryTableRef]: """ This method attempts to parse bigquery objects read in the query """ if not query: - return None + return [] - parsed_tables = set() try: - parser = BigQuerySQLParser( - query, - self.config.sql_parser_use_external_process, - use_raw_names=self.config.lineage_sql_parser_use_raw_names, - ) - tables = parser.get_tables() - except Exception as ex: + with self.report.processing_perf.sql_parsing_sec: + result = sqlglot_lineage( + query, + self.schema_resolver, + default_db=default_project, + default_schema=default_dataset, + ) + except Exception: logger.debug( - f"Sql parsing failed on this query on view: {query}. " - f"Usage won't be added. The error was {ex}." + f"Sql parsing failed on this query on view: {query}. Usage won't be added." ) - return None + logger.debug(result.debug_info) + return [] - for table in tables: - parts = table.split(".") - if len(parts) == 2: - parsed_tables.add( - BigQueryTableRef( - BigqueryTableIdentifier( - project_id=default_project, dataset=parts[0], table=parts[1] - ) - ).get_sanitized_table_ref() - ) - elif len(parts) == 3: - parsed_tables.add( - BigQueryTableRef( - BigqueryTableIdentifier( - project_id=parts[0], dataset=parts[1], table=parts[2] - ) - ).get_sanitized_table_ref() - ) - else: - logger.debug( - f"Invalid table identifier {table} when parsing query on view {query}" - ) + parsed_table_refs = [] + for urn in result.in_tables: + try: + parsed_table_refs.append(BigQueryTableRef.from_urn(urn)) + except ValueError: + logger.debug(f"Invalid urn {urn} when parsing query on view {query}") self.report.num_view_query_events_failed_table_identification += 1 - return list(parsed_tables) + return parsed_table_refs def _report_error( self, label: str, e: Exception, group: Optional[str] = None diff --git a/metadata-ingestion/src/datahub/utilities/bigquery_sql_parser.py b/metadata-ingestion/src/datahub/utilities/bigquery_sql_parser.py deleted file mode 100644 index 4ad41f1fe23c9a..00000000000000 --- a/metadata-ingestion/src/datahub/utilities/bigquery_sql_parser.py +++ /dev/null @@ -1,92 +0,0 @@ -import re -from typing import List - -import sqlparse - -from datahub.utilities.sql_parser import SqlLineageSQLParser, SQLParser - - -class BigQuerySQLParser(SQLParser): - parser: SQLParser - - def __init__( - self, - sql_query: str, - use_external_process: bool = False, - use_raw_names: bool = False, - ) -> None: - super().__init__(sql_query) - - self._parsed_sql_query = self.parse_sql_query(sql_query) - self.parser = SqlLineageSQLParser( - self._parsed_sql_query, use_external_process, use_raw_names - ) - - def parse_sql_query(self, sql_query: str) -> str: - sql_query = BigQuerySQLParser._parse_bigquery_comment_sign(sql_query) - sql_query = BigQuerySQLParser._escape_keyword_from_as_field_name(sql_query) - sql_query = BigQuerySQLParser._escape_cte_name_after_keyword_with(sql_query) - - sql_query = sqlparse.format( - sql_query.strip(), - reindent_aligned=True, - strip_comments=True, - ) - - sql_query = BigQuerySQLParser._escape_table_or_view_name_at_create_statement( - sql_query - ) - sql_query = BigQuerySQLParser._escape_object_name_after_keyword_from(sql_query) - sql_query = BigQuerySQLParser._remove_comma_before_from(sql_query) - - return sql_query - - @staticmethod - def _parse_bigquery_comment_sign(sql_query: str) -> str: - return re.sub(r"#(.*)", r"-- \1", sql_query, flags=re.IGNORECASE) - - @staticmethod - def _escape_keyword_from_as_field_name(sql_query: str) -> str: - return re.sub(r"(\w*\.from)", r"`\1`", sql_query, flags=re.IGNORECASE) - - @staticmethod - def _escape_cte_name_after_keyword_with(sql_query: str) -> str: - """ - Escape the first cte name in case it is one of reserved words - """ - return re.sub(r"(with\s)([^`\s()]+)", r"\1`\2`", sql_query, flags=re.IGNORECASE) - - @staticmethod - def _escape_table_or_view_name_at_create_statement(sql_query: str) -> str: - """ - Reason: in case table name contains hyphens which breaks sqllineage later on - """ - return re.sub( - r"(create.*\s)(table\s|view\s)([^`\s()]+)(?=\sas)", - r"\1\2`\3`", - sql_query, - flags=re.IGNORECASE, - ) - - @staticmethod - def _remove_comma_before_from(sql_query: str) -> str: - return re.sub(r",(\s*?)(?=from)", r" ", sql_query, flags=re.IGNORECASE) - - @staticmethod - def _escape_object_name_after_keyword_from(sql_query: str) -> str: - """ - Reason: in case table name contains hyphens which breaks sqllineage later on - Note: ignore cases of having keyword FROM as part of datetime function EXTRACT - """ - return re.sub( - r"(? List[str]: - return self.parser.get_tables() - - def get_columns(self) -> List[str]: - return self.parser.get_columns() diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index b43c8de4c8f3d8..0f84871d6c96ac 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -333,6 +333,9 @@ def _table_level_lineage( return tables, modified +TABLE_CASE_SENSITIVE_PLATFORMS = {"bigquery"} + + class SchemaResolver(Closeable): def __init__( self, @@ -402,7 +405,10 @@ def resolve_table(self, table: _TableName) -> Tuple[str, Optional[SchemaInfo]]: if schema_info: return urn_lower, schema_info - return urn_lower, None + if self.platform in TABLE_CASE_SENSITIVE_PLATFORMS: + return urn, None + else: + return urn_lower, None def _resolve_schema_info(self, urn: str) -> Optional[SchemaInfo]: if urn in self._schema_cache: diff --git a/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py b/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py index bbc3378450bffd..9bbe9c45887a8f 100644 --- a/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py +++ b/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py @@ -14,6 +14,7 @@ from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report from datahub.ingestion.source.bigquery_v2.usage import BigQueryUsageExtractor from datahub.utilities.perf_timer import PerfTimer +from datahub.utilities.sqlglot_lineage import SchemaResolver from tests.performance.bigquery.bigquery_events import generate_events, ref_from_table from tests.performance.data_generation import ( NormalDistribution, @@ -47,7 +48,10 @@ def run_test(): usage_extractor = BigQueryUsageExtractor( config, report, - lambda ref: make_dataset_urn("bigquery", str(ref.table_identifier)), + schema_resolver=SchemaResolver(platform="bigquery"), + dataset_urn_builder=lambda ref: make_dataset_urn( + "bigquery", str(ref.table_identifier) + ), ) report.set_ingestion_stage("All", "Event Generation") @@ -83,7 +87,7 @@ def run_test(): print( f"Peak Memory Used: {humanfriendly.format_size(peak_memory_usage - pre_mem_usage)}" ) - print(f"Disk Used: {report.usage_state_size}") + print(f"Disk Used: {report.processing_perf.usage_state_size}") print(f"Hash collisions: {report.num_usage_query_hash_collisions}") diff --git a/metadata-ingestion/tests/unit/test_bigquery_sql_lineage.py b/metadata-ingestion/tests/unit/test_bigquery_sql_lineage.py index f807be747a193a..755e9081dda390 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_sql_lineage.py +++ b/metadata-ingestion/tests/unit/test_bigquery_sql_lineage.py @@ -1,4 +1,35 @@ -from datahub.utilities.bigquery_sql_parser import BigQuerySQLParser +from typing import List + +from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigQueryTableRef +from datahub.utilities.sqlglot_lineage import SchemaResolver, sqlglot_lineage + + +class BigQuerySQLParser: + def __init__(self, sql_query: str, schema_resolver: SchemaResolver) -> None: + self.result = sqlglot_lineage(sql_query, schema_resolver) + + def get_tables(self) -> List[str]: + ans = [] + for urn in self.result.in_tables: + table_ref = BigQueryTableRef.from_urn(urn) + ans.append(str(table_ref.table_identifier)) + return ans + + def get_columns(self) -> List[str]: + ans = [] + for col_info in self.result.column_lineage or []: + for col_ref in col_info.upstreams: + ans.append(col_ref.column) + return ans + + +def test_bigquery_sql_lineage_basic(): + parser = BigQuerySQLParser( + sql_query="""SELECT * FROM project_1.database_1.view_1""", + schema_resolver=SchemaResolver(platform="bigquery"), + ) + + assert parser.get_tables() == ["project_1.database_1.view_1"] def test_bigquery_sql_lineage_hash_as_comment_sign_is_accepted(): @@ -14,7 +45,8 @@ def test_bigquery_sql_lineage_hash_as_comment_sign_is_accepted(): -- this comment will not break sqllineage either # this comment will not break sqllineage either FROM `project.dataset.src_tbl` - """ + """, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == ["project.dataset.src_tbl"] @@ -39,7 +71,7 @@ def test_bigquery_sql_lineage_camel_case_table(): # this comment will not break sqllineage either FROM `project.dataset.CamelCaseTable` """, - use_raw_names=True, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == ["project.dataset.CamelCaseTable"] @@ -64,7 +96,7 @@ def test_bigquery_sql_lineage_camel_case_dataset(): # this comment will not break sqllineage either FROM `project.DataSet.table` """, - use_raw_names=True, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == ["project.DataSet.table"] @@ -89,7 +121,7 @@ def test_bigquery_sql_lineage_camel_case_table_and_dataset(): # this comment will not break sqllineage either FROM `project.DataSet.CamelTable` """, - use_raw_names=True, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == ["project.DataSet.CamelTable"] @@ -117,7 +149,7 @@ def test_bigquery_sql_lineage_camel_case_table_and_dataset_subquery(): SELECT * FROM `project.DataSet.CamelTable` ) """, - use_raw_names=True, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == ["project.DataSet.CamelTable"] @@ -146,7 +178,7 @@ def test_bigquery_sql_lineage_camel_case_table_and_dataset_joins(): LEFT JOIN `project.DataSet3.CamelTable3` on c.id = b.id """, - use_raw_names=True, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == [ @@ -179,7 +211,7 @@ def test_bigquery_sql_lineage_camel_case_table_and_dataset_joins_and_subquery(): LEFT JOIN (SELECT * FROM `project.DataSet3.CamelTable3`) c ON c.id = b.id """, - use_raw_names=True, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == [ @@ -199,7 +231,8 @@ def test_bigquery_sql_lineage_keyword_data_is_accepted(): FROM `project.example_dataset.example_table` ) SELECT * FROM data - """ + """, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == ["project.example_dataset.example_table"] @@ -213,7 +246,8 @@ def test_bigquery_sql_lineage_keyword_admin_is_accepted(): FROM `project.example_dataset.example_table` ) SELECT * FROM admin - """ + """, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == ["project.example_dataset.example_table"] @@ -238,7 +272,8 @@ def test_bigquery_sql_lineage_cte_alias_as_keyword_is_accepted(): ) SELECT * FROM map - """ + """, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == [ @@ -255,7 +290,8 @@ def test_bigquery_sql_lineage_create_or_replace_view_name_with_hyphens_is_accept FROM project.dataset.src_table_a UNION SELECT * FROM `project.dataset.src_table_b` - """ + """, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == [ @@ -270,7 +306,8 @@ def test_bigquery_sql_lineage_source_table_name_with_hyphens_is_accepted(): CREATE OR REPLACE VIEW `project.dataset.test_view` AS SELECT * FROM test-project.dataset.src_table - """ + """, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == ["test-project.dataset.src_table"] @@ -282,7 +319,8 @@ def test_bigquery_sql_lineage_from_as_column_name_is_accepted(): CREATE OR REPLACE VIEW `project.dataset.test_view` AS SELECT x.from AS col FROM project.dataset.src_table AS x - """ + """, + schema_resolver=SchemaResolver(platform="bigquery"), ) assert parser.get_tables() == ["project.dataset.src_table"] diff --git a/metadata-ingestion/tests/unit/test_bigquery_sql_parser.py b/metadata-ingestion/tests/unit/test_bigquery_sql_parser.py deleted file mode 100644 index 2a73bfc5e8b686..00000000000000 --- a/metadata-ingestion/tests/unit/test_bigquery_sql_parser.py +++ /dev/null @@ -1,327 +0,0 @@ -import pytest - -from datahub.utilities.bigquery_sql_parser import BigQuerySQLParser - - -def test_bigquery_sql_parser_comments_are_removed(): - parser = BigQuerySQLParser( - sql_query=""" -/* -HERE IS A STANDARD COMMENT BLOCK -THIS WILL NOT BREAK sqllineage -*/ -CREATE OR REPLACE TABLE `project.dataset.test_view` AS -#This, comment will not break sqllineage -SELECT foo --- this comment will not break sqllineage either -# this comment will not break sqllineage either - FROM `project.dataset.src_table` -""" - ) - - assert ( - parser._parsed_sql_query - == """CREATE OR REPLACE TABLE `project.dataset.test_view` AS SELECT foo - FROM `project.dataset.src_table`""" - ) - - assert parser.get_tables() == ["project.dataset.src_table"] - - -def test_bigquery_sql_parser_formats_input_sql(): - parser = BigQuerySQLParser( - sql_query=""" -CREATE OR REPLACE TABLE `project.dataset.test_view` AS -SELECT foo FROM `project.dataset.src_table_a` AS a -INNER JOIN `project.dataset.src_table_b` AS b ON a.key_field = b.key_field -""" - ) - - assert ( - parser._parsed_sql_query - == """CREATE OR REPLACE TABLE `project.dataset.test_view` AS SELECT foo - FROM `project.dataset.src_table_a` AS a - INNER JOIN `project.dataset.src_table_b` AS b - ON a.key_field = b.key_field""" - ) - - assert parser.get_tables() == [ - "project.dataset.src_table_a", - "project.dataset.src_table_b", - ] - - -def test_remove_comma_before_from(): - assert ( - BigQuerySQLParser._remove_comma_before_from( - """ -select a, b,from `project.dataset.table_name_1` -""" - ) - == """ -select a, b from `project.dataset.table_name_1` -""" - ) - - assert ( - BigQuerySQLParser._remove_comma_before_from( - """ -select a, b from `project.dataset.table_name_1` -""" - ) - == """ -select a, b from `project.dataset.table_name_1` -""" - ) - - assert ( - BigQuerySQLParser._remove_comma_before_from( - """ -select - a, - b, -from `project.dataset.table_name_1` -""" - ) - == """ -select - a, - b from `project.dataset.table_name_1` -""" - ) - - -def test_bigquery_sql_parser_subquery(): - parser = BigQuerySQLParser( - sql_query=""" - create or replace table smoke_test_db.table_from_view_and_table - as (select b.date_utc, v.revenue from smoke_test_db.base_table b, smoke_test_db.view_from_table v - """ - ) - assert parser.get_tables() == [ - "smoke_test_db.base_table", - "smoke_test_db.view_from_table", - ] - - -def test_bigquery_sql_parser_comment_sign_switched_correctly(): - sql_query = BigQuerySQLParser._parse_bigquery_comment_sign( - """ -#upper comment -SELECT * FROM hello -# lower comment -""" - ) - - assert ( - sql_query - == """ --- upper comment -SELECT * FROM hello --- lower comment -""" - ) - - -def test_bigquery_sql_parser_keyword_from_is_escaped_if_used_as_fieldname(): - sql_query = BigQuerySQLParser._escape_keyword_from_as_field_name( - """ -SELECT hello.from AS col FROM hello -""" - ) - - assert ( - sql_query - == """ -SELECT `hello.from` AS col FROM hello -""" - ) - - -def test_bigquery_sql_parser_first_cte_name_is_escaped(): - sql_query = BigQuerySQLParser._escape_cte_name_after_keyword_with( - """ -CREATE OR REPLACE VIEW `test_view` AS -WITH cte_1 AS ( - SELECT * FROM foo -), -cte_2 AS ( - SELECT * FROM bar -) -SELECT * FROM cte_1 UNION ALL -SELECT * FROM cte_2 -""" - ) - - assert ( - sql_query - == """ -CREATE OR REPLACE VIEW `test_view` AS -WITH `cte_1` AS ( - SELECT * FROM foo -), -cte_2 AS ( - SELECT * FROM bar -) -SELECT * FROM cte_1 UNION ALL -SELECT * FROM cte_2 -""" - ) - - -def test_bigquery_sql_parser_table_name_is_escaped_at_create_statement(): - sql_query_create = BigQuerySQLParser._escape_table_or_view_name_at_create_statement( - """ -CREATE TABLE project.dataset.test_table AS -col_1 STRING, -col_2 STRING -""" - ) - - sql_query_create_or_replace = BigQuerySQLParser._escape_table_or_view_name_at_create_statement( - """ -CREATE OR REPLACE TABLE project.dataset.test_table AS -col_1 STRING, -col_2 STRING -""" - ) - - assert ( - sql_query_create - == """ -CREATE TABLE `project.dataset.test_table` AS -col_1 STRING, -col_2 STRING -""" - ) - assert ( - sql_query_create_or_replace - == """ -CREATE OR REPLACE TABLE `project.dataset.test_table` AS -col_1 STRING, -col_2 STRING -""" - ) - - -def test_bigquery_sql_parser_view_name_is_escaped_at_create_statement(): - sql_query_create = BigQuerySQLParser._escape_table_or_view_name_at_create_statement( - """ -CREATE VIEW project.dataset.test_view AS -SELECT * FROM project.dataset.src_table -""" - ) - - sql_query_create_or_replace = BigQuerySQLParser._escape_table_or_view_name_at_create_statement( - """ -CREATE OR REPLACE VIEW project.dataset.test_view AS -SELECT * FROM project.dataset.src_table -""" - ) - - assert ( - sql_query_create - == """ -CREATE VIEW `project.dataset.test_view` AS -SELECT * FROM project.dataset.src_table -""" - ) - assert ( - sql_query_create_or_replace - == """ -CREATE OR REPLACE VIEW `project.dataset.test_view` AS -SELECT * FROM project.dataset.src_table -""" - ) - - -def test_bigquery_sql_parser_object_name_is_escaped_after_keyword_from(): - sql_query = BigQuerySQLParser._escape_object_name_after_keyword_from( - """ -CREATE OR REPLACE VIEW `project.dataset.test_view` AS -SELECT * FROM src-project.dataset.src_table_a UNION ALL -SELECT * FROM project.dataset.src_table_b -""" - ) - - assert ( - sql_query - == """ -CREATE OR REPLACE VIEW `project.dataset.test_view` AS -SELECT * FROM `src-project.dataset.src_table_a` UNION ALL -SELECT * FROM `project.dataset.src_table_b` -""" - ) - - -def test_bigquery_sql_parser_field_name_is_not_escaped_after_keyword_from_in_datetime_functions(): - sql_query = BigQuerySQLParser._escape_object_name_after_keyword_from( - """ -CREATE OR REPLACE VIEW `project.dataset.test_view` AS -SELECT -EXTRACT(MICROSECOND FROM time_field) AS col_1, -EXTRACT(MILLISECOND FROM time_field) AS col_2, -EXTRACT(SECOND FROM time_field) AS col_3, -EXTRACT(MINUTE FROM time_field) AS col_4, -EXTRACT(HOUR FROM time_field) AS col_5, -EXTRACT(DAYOFWEEK FROM time_field) AS col_6, -EXTRACT(DAY FROM time_field) AS col_7, -EXTRACT(DAYOFYEAR FROM time_field) AS col_8, -EXTRACT(WEEK FROM time_field) AS col_9, -EXTRACT(WEEK FROM time_field) AS col_10, -EXTRACT(ISOWEEK FROM time_field) AS col_11, -EXTRACT(MONTH FROM time_field) AS col_12, -EXTRACT(QUARTER FROM time_field) AS col_13, -EXTRACT(YEAR FROM time_field) AS col_14, -EXTRACT(ISOYEAR FROM time_field) AS col_15, -EXTRACT(DATE FROM time_field) AS col_16, -EXTRACT(TIME FROM time_field) AS col_17 -FROM src-project.dataset.src_table_a -""" - ) - - assert ( - sql_query - == """ -CREATE OR REPLACE VIEW `project.dataset.test_view` AS -SELECT -EXTRACT(MICROSECOND FROM time_field) AS col_1, -EXTRACT(MILLISECOND FROM time_field) AS col_2, -EXTRACT(SECOND FROM time_field) AS col_3, -EXTRACT(MINUTE FROM time_field) AS col_4, -EXTRACT(HOUR FROM time_field) AS col_5, -EXTRACT(DAYOFWEEK FROM time_field) AS col_6, -EXTRACT(DAY FROM time_field) AS col_7, -EXTRACT(DAYOFYEAR FROM time_field) AS col_8, -EXTRACT(WEEK FROM time_field) AS col_9, -EXTRACT(WEEK FROM time_field) AS col_10, -EXTRACT(ISOWEEK FROM time_field) AS col_11, -EXTRACT(MONTH FROM time_field) AS col_12, -EXTRACT(QUARTER FROM time_field) AS col_13, -EXTRACT(YEAR FROM time_field) AS col_14, -EXTRACT(ISOYEAR FROM time_field) AS col_15, -EXTRACT(DATE FROM time_field) AS col_16, -EXTRACT(TIME FROM time_field) AS col_17 -FROM `src-project.dataset.src_table_a` -""" - ) - - -def test_bigquery_sql_parser_with_semicolon_in_from(): - sql_query = """CREATE VIEW `acryl-staging.smoke_test_db.view_from_table`\nAS select * from smoke_test_db.base_table;""" - - table_list = BigQuerySQLParser(sql_query).get_tables() - table_list.sort() - assert table_list == ["smoke_test_db.base_table"] - - -@pytest.mark.xfail -def test_bigquery_sql_parser_with_parenthesis_in_from(): - sql_query = """ - CREATE VIEW `acryl-staging.smoke_test_db.view_from_table` AS - select * from smoke_test_db.base_table LEFT JOIN UNNEST(my_array) ON day1 = day2; - """ - - table_list = BigQuerySQLParser(sql_query).get_tables() - table_list.sort() - assert table_list == ["smoke_test_db.base_table"] diff --git a/metadata-ingestion/tests/unit/test_bigquery_usage.py b/metadata-ingestion/tests/unit/test_bigquery_usage.py index c0055763bc15b4..664d3112810ff0 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_usage.py +++ b/metadata-ingestion/tests/unit/test_bigquery_usage.py @@ -35,6 +35,7 @@ TimeWindowSizeClass, ) from datahub.testing.compare_metadata_json import diff_metadata_json +from datahub.utilities.sqlglot_lineage import SchemaResolver from tests.performance.bigquery.bigquery_events import generate_events, ref_from_table from tests.performance.data_generation import generate_data, generate_queries from tests.performance.data_model import Container, FieldAccess, Query, Table, View @@ -202,7 +203,10 @@ def usage_extractor(config: BigQueryV2Config) -> BigQueryUsageExtractor: return BigQueryUsageExtractor( config, report, - lambda ref: make_dataset_urn("bigquery", str(ref.table_identifier)), + schema_resolver=SchemaResolver(platform="bigquery"), + dataset_urn_builder=lambda ref: make_dataset_urn( + "bigquery", str(ref.table_identifier) + ), ) @@ -961,21 +965,21 @@ def test_operational_stats( def test_get_tables_from_query(usage_extractor): assert usage_extractor.get_tables_from_query( - PROJECT_1, "SELECT * FROM project-1.database_1.view_1" + "SELECT * FROM project-1.database_1.view_1", default_project=PROJECT_1 ) == [ BigQueryTableRef(BigqueryTableIdentifier("project-1", "database_1", "view_1")) ] assert usage_extractor.get_tables_from_query( - PROJECT_1, "SELECT * FROM database_1.view_1" + "SELECT * FROM database_1.view_1", default_project=PROJECT_1 ) == [ BigQueryTableRef(BigqueryTableIdentifier("project-1", "database_1", "view_1")) ] assert sorted( usage_extractor.get_tables_from_query( - PROJECT_1, "SELECT v.id, v.name, v.total, t.name as name1 FROM database_1.view_1 as v inner join database_1.table_1 as t on v.id=t.id", + default_project=PROJECT_1, ) ) == [ BigQueryTableRef(BigqueryTableIdentifier("project-1", "database_1", "table_1")), @@ -984,8 +988,8 @@ def test_get_tables_from_query(usage_extractor): assert sorted( usage_extractor.get_tables_from_query( - PROJECT_1, "CREATE TABLE database_1.new_table AS SELECT v.id, v.name, v.total, t.name as name1 FROM database_1.view_1 as v inner join database_1.table_1 as t on v.id=t.id", + default_project=PROJECT_1, ) ) == [ BigQueryTableRef(BigqueryTableIdentifier("project-1", "database_1", "table_1")), diff --git a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py b/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py index 44fd840f28d594..25e849a5092938 100644 --- a/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py +++ b/metadata-ingestion/tests/unit/test_bigqueryv2_usage_source.py @@ -10,6 +10,7 @@ from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report from datahub.ingestion.source.bigquery_v2.usage import BigQueryUsageExtractor +from datahub.utilities.sqlglot_lineage import SchemaResolver FROZEN_TIME = "2021-07-20 00:00:00" @@ -114,7 +115,10 @@ def test_bigqueryv2_filters(): corrected_start_time = config.start_time - config.max_query_duration corrected_end_time = config.end_time + config.max_query_duration filter: str = BigQueryUsageExtractor( - config, BigQueryV2Report(), lambda x: "" + config, + BigQueryV2Report(), + schema_resolver=SchemaResolver(platform="bigquery"), + dataset_urn_builder=lambda x: "", )._generate_filter(corrected_start_time, corrected_end_time) assert filter == expected_filter From 60347d6735ea2136d721bbf6644ae82df6519d9c Mon Sep 17 00:00:00 2001 From: Diego Reiriz Cores Date: Thu, 28 Dec 2023 12:09:10 +0100 Subject: [PATCH 14/18] fix(ingest/mongodb): support disabling schemaSamplingSize (#9295) Co-authored-by: Harshal Sheth --- .../src/datahub/ingestion/source/mongodb.py | 8 +++++--- .../tests/integration/mongodb/test_mongodb.py | 1 + 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index 2aa8b1d37d4776..283ab652f23c62 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -102,7 +102,7 @@ class MongoDBConfig( ) schemaSamplingSize: Optional[PositiveInt] = Field( default=1000, - description="Number of documents to use when inferring schema size. If set to `0`, all documents will be scanned.", + description="Number of documents to use when inferring schema size. If set to `null`, all documents will be scanned.", ) useRandomSampling: bool = Field( default=True, @@ -225,13 +225,15 @@ def construct_schema_pymongo( ] if use_random_sampling: # get sample documents in collection - aggregations.append({"$sample": {"size": sample_size}}) + if sample_size: + aggregations.append({"$sample": {"size": sample_size}}) documents = collection.aggregate( aggregations, allowDiskUse=True, ) else: - aggregations.append({"$limit": sample_size}) + if sample_size: + aggregations.append({"$limit": sample_size}) documents = collection.aggregate(aggregations, allowDiskUse=True) return construct_schema(list(documents), delimiter) diff --git a/metadata-ingestion/tests/integration/mongodb/test_mongodb.py b/metadata-ingestion/tests/integration/mongodb/test_mongodb.py index 56fb471d4c9f1b..0a0ba55ff5b802 100644 --- a/metadata-ingestion/tests/integration/mongodb/test_mongodb.py +++ b/metadata-ingestion/tests/integration/mongodb/test_mongodb.py @@ -26,6 +26,7 @@ def test_mongodb_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time "password": "examplepass", "maxDocumentSize": 25000, "platform_instance": "instance", + "schemaSamplingSize": None, }, }, "sink": { From 2cd38a469d5ac607bd510a0ca045d151b4657afd Mon Sep 17 00:00:00 2001 From: Tony Ouyang Date: Thu, 28 Dec 2023 03:09:30 -0800 Subject: [PATCH 15/18] fix(ingest): Fix mongodb ingestion when platform_instance is missing from recipe (#9486) Co-authored-by: Harshal Sheth --- metadata-ingestion/src/datahub/ingestion/source/mongodb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index 283ab652f23c62..577da91ee82da9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -379,6 +379,8 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: platform_instance=self.config.platform_instance, ) + # Initialize data_platform_instance with a default value + data_platform_instance = None if self.config.platform_instance: data_platform_instance = DataPlatformInstanceClass( platform=make_data_platform_urn(platform), From e343b69ce4881ceefdf4af0cafea29188092de52 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Thu, 28 Dec 2023 16:50:13 +0530 Subject: [PATCH 16/18] fix(ingest/snowflake): explicit set schema if public schema is absent (#9526) --- .../source/snowflake/snowflake_profiler.py | 14 ++++++++++++++ .../ingestion/source/snowflake/snowflake_query.py | 4 ++++ .../source/state/stateful_ingestion_base.py | 2 +- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py index 4bda7da422e9d6..9a37f779bbcd58 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py @@ -24,6 +24,8 @@ logger = logging.getLogger(__name__) +PUBLIC_SCHEMA = "PUBLIC" + class SnowflakeProfiler(GenericProfiler, SnowflakeCommonMixin): def __init__( @@ -36,6 +38,7 @@ def __init__( self.config: SnowflakeV2Config = config self.report: SnowflakeV2Report = report self.logger = logger + self.database_default_schema: Dict[str, str] = dict() def get_workunits( self, database: SnowflakeDatabase, db_tables: Dict[str, List[SnowflakeTable]] @@ -47,6 +50,10 @@ def get_workunits( "max_overflow", self.config.profiling.max_workers ) + if PUBLIC_SCHEMA not in db_tables: + # If PUBLIC schema is absent, we use any one of schemas as default schema + self.database_default_schema[database.name] = list(db_tables.keys())[0] + profile_requests = [] for schema in database.schemas: for table in db_tables[schema.name]: @@ -136,9 +143,16 @@ def get_profiler_instance( ) def callable_for_db_connection(self, db_name: str) -> Callable: + schema_name = self.database_default_schema.get(db_name) + def get_db_connection(): conn = self.config.get_connection() conn.cursor().execute(SnowflakeQuery.use_database(db_name)) + + # As mentioned here - https://docs.snowflake.com/en/sql-reference/sql/use-database#usage-notes + # no schema is selected if PUBLIC schema is absent. We need to explicitly call `USE SCHEMA ` + if schema_name: + conn.cursor().execute(SnowflakeQuery.use_schema(schema_name)) return conn return get_db_connection diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index 267f7cf0749099..724e4392f1d612 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -80,6 +80,10 @@ def show_tags() -> str: def use_database(db_name: str) -> str: return f'use database "{db_name}"' + @staticmethod + def use_schema(schema_name: str) -> str: + return f'use schema "{schema_name}"' + @staticmethod def get_databases(db_name: Optional[str]) -> str: db_clause = f'"{db_name}".' if db_name is not None else "" diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py index 8a448f40e95b4b..61d39b18f523d2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py @@ -98,7 +98,7 @@ class StatefulIngestionConfigBase(GenericModel, Generic[CustomConfig]): ) -class StatefulLineageConfigMixin: +class StatefulLineageConfigMixin(ConfigModel): enable_stateful_lineage_ingestion: bool = Field( default=True, description="Enable stateful lineage ingestion." From 4de2c24249697fa68831f880fda216ddb46fba3d Mon Sep 17 00:00:00 2001 From: Sumit Patil <91715217+sumitappt@users.noreply.github.com> Date: Thu, 28 Dec 2023 21:37:57 +0530 Subject: [PATCH 17/18] style(search): Border is too thick for sidebar (#9528) --- .../src/app/search/sidebar/BrowseSidebar.tsx | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/datahub-web-react/src/app/search/sidebar/BrowseSidebar.tsx b/datahub-web-react/src/app/search/sidebar/BrowseSidebar.tsx index c16bcdcaf6c727..1731727c14cfc1 100644 --- a/datahub-web-react/src/app/search/sidebar/BrowseSidebar.tsx +++ b/datahub-web-react/src/app/search/sidebar/BrowseSidebar.tsx @@ -9,7 +9,6 @@ import useSidebarEntities from './useSidebarEntities'; import { ANTD_GRAY_V2 } from '../../entity/shared/constants'; import { ProfileSidebarResizer } from '../../entity/shared/containers/profile/sidebar/ProfileSidebarResizer'; - export const MAX_BROWSER_WIDTH = 500; export const MIN_BROWSWER_WIDTH = 200; @@ -18,7 +17,6 @@ export const SidebarWrapper = styled.div<{ visible: boolean; width: number }>` width: ${(props) => (props.visible ? `${props.width}px` : '0')}; min-width: ${(props) => (props.visible ? `${props.width}px` : '0')}; transition: width 250ms ease-in-out; - border-right: 1px solid ${(props) => props.theme.styles['border-color-base']}; background-color: ${ANTD_GRAY_V2[1]}; background: white; `; @@ -53,7 +51,12 @@ const BrowseSidebar = ({ visible }: Props) => { return ( <> - + Navigate From 5321352852a511bf92685290fc8a4371faaed876 Mon Sep 17 00:00:00 2001 From: gaurav2733 <77378510+gaurav2733@users.noreply.github.com> Date: Fri, 29 Dec 2023 12:53:58 +0530 Subject: [PATCH 18/18] style(ui): humanise duration shown on ingestion page (#9530) --- .../executions/IngestionExecutionTable.tsx | 8 +++---- .../src/app/shared/formatDuration.ts | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 datahub-web-react/src/app/shared/formatDuration.ts diff --git a/datahub-web-react/src/app/ingest/source/executions/IngestionExecutionTable.tsx b/datahub-web-react/src/app/ingest/source/executions/IngestionExecutionTable.tsx index 8c81cc36ae3f99..a9d9283ef1377e 100644 --- a/datahub-web-react/src/app/ingest/source/executions/IngestionExecutionTable.tsx +++ b/datahub-web-react/src/app/ingest/source/executions/IngestionExecutionTable.tsx @@ -4,6 +4,7 @@ import { StyledTable } from '../../../entity/shared/components/styled/StyledTabl import { ExecutionRequest } from '../../../../types.generated'; import { ButtonsColumn, SourceColumn, StatusColumn, TimeColumn } from './IngestionExecutionTableColumns'; import { SUCCESS } from '../utils'; +import { formatDuration } from '../../../shared/formatDuration'; interface Props { executionRequests: ExecutionRequest[]; @@ -34,13 +35,10 @@ export default function IngestionExecutionTable({ render: TimeColumn, }, { - title: 'Duration (s)', + title: 'Duration', dataIndex: 'duration', key: 'duration', - render: (durationMs: number) => { - const seconds = (durationMs && `${durationMs / 1000}s`) || 'None'; - return seconds; - }, + render: (durationMs: number) => formatDuration(durationMs), }, { title: 'Status', diff --git a/datahub-web-react/src/app/shared/formatDuration.ts b/datahub-web-react/src/app/shared/formatDuration.ts new file mode 100644 index 00000000000000..1028b46f70b316 --- /dev/null +++ b/datahub-web-react/src/app/shared/formatDuration.ts @@ -0,0 +1,21 @@ +export const formatDuration = (durationMs: number): string => { + if (!durationMs) return 'None'; + + const seconds = durationMs / 1000; + + if (seconds < 60) { + return `${seconds.toFixed(1)} s`; + } + + const minutes = Math.floor(seconds / 60); + const remainingSeconds = Math.round(seconds % 60); + + if (minutes < 60) { + return `${minutes} min ${remainingSeconds} s`; + } + + const hours = Math.floor(minutes / 60); + const remainingMinutes = Math.round(minutes % 60); + + return `${hours} hr ${remainingMinutes} min`; +};