From 91b2daf7547d5c114a6f8019b7e88861dba1fc8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thierry=20Sall=C3=A9?= Date: Wed, 29 Nov 2023 03:01:17 +0100 Subject: [PATCH] Bigquery sqlalchemy metastore (#1352) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [bigquery] bump bigquery engine dependencies * [bigquery] Allow use of Application defaults credentials if no credentials specified * [metastore] Split table name on '.' when using sqlalchemy-bigquery metasore --- querybook/server/clients/google_client.py | 8 ++++---- .../lib/metastore/loaders/sqlalchemy_metastore_loader.py | 5 ++++- querybook/server/lib/query_executor/clients/bigquery.py | 8 +++++--- .../lib/query_executor/executor_template/templates.py | 6 +++++- requirements/engine/bigquery.txt | 9 ++++----- 5 files changed, 22 insertions(+), 14 deletions(-) diff --git a/querybook/server/clients/google_client.py b/querybook/server/clients/google_client.py index d283285f5..adde3fc57 100644 --- a/querybook/server/clients/google_client.py +++ b/querybook/server/clients/google_client.py @@ -21,11 +21,11 @@ def get_google_credentials(creds_info=None): ) cred_to_use = creds_info or QuerybookSettings.GOOGLE_CREDS - assert cred_to_use is not None, "Invalid Google credentials" - credentials = service_account.Credentials.from_service_account_info(cred_to_use) - - return credentials + if cred_to_use is not None: + return service_account.Credentials.from_service_account_info(cred_to_use) + else: + return None GOOGLE_AUTH_CONFIG = "https://accounts.google.com/.well-known/openid-configuration" diff --git a/querybook/server/lib/metastore/loaders/sqlalchemy_metastore_loader.py b/querybook/server/lib/metastore/loaders/sqlalchemy_metastore_loader.py index 25a452d40..302998db0 100644 --- a/querybook/server/lib/metastore/loaders/sqlalchemy_metastore_loader.py +++ b/querybook/server/lib/metastore/loaders/sqlalchemy_metastore_loader.py @@ -24,7 +24,10 @@ def get_all_schema_names(self) -> List[str]: return self._inspect.get_schema_names() def get_all_table_names_in_schema(self, schema_name: str) -> List[str]: - return self._inspect.get_table_names(schema=schema_name) + if self._engine.dialect.name == "bigquery": + return [table.split(".")[1] for table in self._inspect.get_table_names(schema=schema_name)] + else: + return self._inspect.get_table_names(schema=schema_name) def get_table_and_columns( self, schema_name, table_name diff --git a/querybook/server/lib/query_executor/clients/bigquery.py b/querybook/server/lib/query_executor/clients/bigquery.py index 1ef9a36f7..9b3ae8f11 100644 --- a/querybook/server/lib/query_executor/clients/bigquery.py +++ b/querybook/server/lib/query_executor/clients/bigquery.py @@ -12,9 +12,11 @@ def __init__(self, google_credentials_json=None, *args, **kwargs): if google_credentials_json is not None else None ) - cred = get_google_credentials(parsed_google_json) - - client = Client(project=cred.project_id, credentials=cred) + if parsed_google_json is not None: + cred = get_google_credentials(parsed_google_json) + client = Client(project=cred.project_id, credentials=cred) + else: + client = Client() self._conn = dbapi.connect(client=client) super(BigQueryClient, self).__init__() diff --git a/querybook/server/lib/query_executor/executor_template/templates.py b/querybook/server/lib/query_executor/executor_template/templates.py index 1b57b5964..09bb8a7a7 100644 --- a/querybook/server/lib/query_executor/executor_template/templates.py +++ b/querybook/server/lib/query_executor/executor_template/templates.py @@ -120,7 +120,11 @@ ( "google_credentials_json", FormField( - helper="The JSON string used to log in as service account. If not provided then **GOOGLE_CREDS** from settings will be used.", + helper=""" +

The JSON string used to log in as service account.

+

If not provided then **GOOGLE_CREDS** from settings will be used

+

If both are empty, Application default credentials are used

+""", ), ) ) diff --git a/requirements/engine/bigquery.txt b/requirements/engine/bigquery.txt index e64dd114f..73ffdf90f 100644 --- a/requirements/engine/bigquery.txt +++ b/requirements/engine/bigquery.txt @@ -1,5 +1,4 @@ -google-cloud-bigquery==1.28.0 - -# Downgrade Protobuf to fix error with bigquery: -# TypeError: Descriptors cannot not be created directly. -protobuf==3.20.1 +google-cloud-bigquery==3.12.0 +google-cloud-bigquery-storage==2.22.0 +pyarrow==13.0.0 +sqlalchemy-bigquery==1.8.0 \ No newline at end of file