From f41b220a6b15f1e7993beddc512d8abd81d58edb Mon Sep 17 00:00:00 2001 From: "Julian.Frenzel" Date: Wed, 25 May 2022 22:21:03 +0200 Subject: [PATCH 1/2] support kms_key_name feature for seeds --- dbt/adapters/bigquery/impl.py | 6 +- .../bigquery/macros/materializations/seed.sql | 3 +- .../simple_seed_test/models-bq/schema.yml | 5 + .../seeds-config/seed_kms.csv | 21 +++ .../test_seed_kms_key_name.py | 160 ++++++++++++++++++ 5 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 tests/integration/simple_seed_test/seeds-config/seed_kms.csv create mode 100644 tests/integration/simple_seed_test/test_seed_kms_key_name.py diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 344ffb22e..69f32e9a1 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -22,6 +22,7 @@ import google.cloud.bigquery from google.cloud.bigquery import AccessEntry, SchemaField +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration import time import agate @@ -610,16 +611,19 @@ def alter_table_add_columns(self, relation, columns): client.update_table(new_table, ["schema"]) @available.parse_none - def load_dataframe(self, database, schema, table_name, agate_table, column_override): + def load_dataframe(self, database, schema, table_name, agate_table, column_override, kms_key_name): bq_schema = self._agate_to_schema(agate_table, column_override) conn = self.connections.get_thread_connection() client = conn.handle table_ref = self.connections.table_ref(database, schema, table_name) + destination_encryption_configuration = EncryptionConfiguration(kms_key_name=kms_key_name) + load_config = google.cloud.bigquery.LoadJobConfig() load_config.skip_leading_rows = 1 load_config.schema = bq_schema + load_config.destination_encryption_configuration = destination_encryption_configuration with open(agate_table.original_abspath, "rb") as f: job = client.load_table_from_file(f, table_ref, rewind=True, job_config=load_config) diff --git a/dbt/include/bigquery/macros/materializations/seed.sql b/dbt/include/bigquery/macros/materializations/seed.sql index d95cc4e1b..950cc73d7 100644 --- a/dbt/include/bigquery/macros/materializations/seed.sql +++ b/dbt/include/bigquery/macros/materializations/seed.sql @@ -10,8 +10,9 @@ {% macro bigquery__load_csv_rows(model, agate_table) %} {%- set column_override = model['config'].get('column_types', {}) -%} + {%- set kms_key_name = model['config'].get('kms_key_name', {}) -%} {{ adapter.load_dataframe(model['database'], model['schema'], model['alias'], - agate_table, column_override) }} + agate_table, column_override, kms_key_name) }} {% if config.persist_relation_docs() and 'description' in model %} {{ adapter.update_table_description(model['database'], model['schema'], model['alias'], model['description']) }} diff --git a/tests/integration/simple_seed_test/models-bq/schema.yml b/tests/integration/simple_seed_test/models-bq/schema.yml index 019a9524f..7d9314385 100644 --- a/tests/integration/simple_seed_test/models-bq/schema.yml +++ b/tests/integration/simple_seed_test/models-bq/schema.yml @@ -45,3 +45,8 @@ seeds: tests: - column_type: type: STRING + +- name: seed_kms + columns: + - name: birthday + - name: id diff --git a/tests/integration/simple_seed_test/seeds-config/seed_kms.csv b/tests/integration/simple_seed_test/seeds-config/seed_kms.csv new file mode 100644 index 000000000..0227dd609 --- /dev/null +++ b/tests/integration/simple_seed_test/seeds-config/seed_kms.csv @@ -0,0 +1,21 @@ +id,first_name,email,ip_address,birthday +1,Larry,lking0@miitbeian.gov.cn,69.135.206.194,2008-09-12 19:08:31 +2,Larry,lperkins1@toplist.cz,64.210.133.162,1978-05-09 04:15:14 +3,Anna,amontgomery2@miitbeian.gov.cn,168.104.64.114,2011-10-16 04:07:57 +4,Sandra,sgeorge3@livejournal.com,229.235.252.98,1973-07-19 10:52:43 +5,Fred,fwoods4@google.cn,78.229.170.124,2012-09-30 16:38:29 +6,Stephen,shanson5@livejournal.com,182.227.157.105,1995-11-07 21:40:50 +7,William,wmartinez6@upenn.edu,135.139.249.50,1982-09-05 03:11:59 +8,Jessica,jlong7@hao123.com,203.62.178.210,1991-10-16 11:03:15 +9,Douglas,dwhite8@tamu.edu,178.187.247.1,1979-10-01 09:49:48 +10,Lisa,lcoleman9@nydailynews.com,168.234.128.249,2011-05-26 07:45:49 +11,Ralph,rfieldsa@home.pl,55.152.163.149,1972-11-18 19:06:11 +12,Louise,lnicholsb@samsung.com,141.116.153.154,2014-11-25 20:56:14 +13,Clarence,cduncanc@sfgate.com,81.171.31.133,2011-11-17 07:02:36 +14,Daniel,dfranklind@omniture.com,8.204.211.37,1980-09-13 00:09:04 +15,Katherine,klanee@auda.org.au,176.96.134.59,1997-08-22 19:36:56 +16,Billy,bwardf@wikia.com,214.108.78.85,2003-10-19 02:14:47 +17,Annie,agarzag@ocn.ne.jp,190.108.42.70,1988-10-28 15:12:35 +18,Shirley,scolemanh@fastcompany.com,109.251.164.84,1988-08-24 10:50:57 +19,Roger,rfrazieri@scribd.com,38.145.218.108,1985-12-31 15:17:15 +20,Lillian,lstanleyj@goodreads.com,47.57.236.17,1970-06-08 02:09:05 diff --git a/tests/integration/simple_seed_test/test_seed_kms_key_name.py b/tests/integration/simple_seed_test/test_seed_kms_key_name.py new file mode 100644 index 000000000..e11c566f1 --- /dev/null +++ b/tests/integration/simple_seed_test/test_seed_kms_key_name.py @@ -0,0 +1,160 @@ +import json +import os + +from google.oauth2 import service_account +from google.api_core.exceptions import AlreadyExists +from google.cloud import bigquery +from google.cloud import kms + +from tests.integration.base import DBTIntegrationTest, use_profile + + +class GcpKmsAdapter: + def __init__(self, service_account_info, location_id) -> None: + self.location_id = location_id + credentials = service_account.Credentials.from_service_account_info( + info=service_account_info + ) + self.project_id = credentials.project_id + self.kms_client = kms.KeyManagementServiceClient.from_service_account_info( + info=service_account_info + ) + + def get_or_create_key(self, key_ring_id, key_id): + self._create_key_ring(name=key_ring_id) + self._create_key_symmetric_encrypt_decrypt( + key_ring_id=key_ring_id, key_id=key_id + ) + return f"projects/{self.project_id}/locations/{self.location_id}/keyRings/{key_ring_id}/cryptoKeys/{key_id}" + + def _create_key_ring(self, name): + location_name = f"projects/{self.project_id}/locations/{self.location_id}" + key_ring = {} + try: + self.kms_client.create_key_ring( + request={ + "parent": location_name, + "key_ring_id": name, + "key_ring": key_ring, + } + ) + except AlreadyExists: + pass + + def _create_key_symmetric_encrypt_decrypt(self, key_ring_id, key_id): + # Build the parent key ring name. + key_ring_name = self.kms_client.key_ring_path( + self.project_id, self.location_id, key_ring_id + ) + + # Build the key. + purpose = kms.CryptoKey.CryptoKeyPurpose.ENCRYPT_DECRYPT + algorithm = ( + kms.CryptoKeyVersion.CryptoKeyVersionAlgorithm.GOOGLE_SYMMETRIC_ENCRYPTION + ) + key = { + "purpose": purpose, + "version_template": { + "algorithm": algorithm, + }, + } + + # Call the API. + try: + self.kms_client.create_crypto_key( + request={ + "parent": key_ring_name, + "crypto_key_id": key_id, + "crypto_key": key, + } + ) + except AlreadyExists: + pass + + +class TestSimpleSeedKmsKeyName(DBTIntegrationTest): + def setUp(self): + self.gcs_kms_adapter = GcpKmsAdapter( + service_account_info=self.gcs_service_account_info, location_id="us" + ) + self.kms_key_name = self.gcs_kms_adapter.get_or_create_key( + key_ring_id="dbt-integration-test", key_id="dbt-integration-test-key" + ) + return super().setUp() + + @property + def schema(self): + return "simple_seed" + + @property + def project_id(self): + project_id = ( + self.bigquery_profile() + .get("test", {}) + .get("outputs", {}) + .get("default2", {}) + .get("project") + ) + if project_id is None: + raise Exception("unable to get gcp project") + return project_id + + @property + def gcs_service_account_info(self): + credentials_json_str = os.getenv("BIGQUERY_TEST_SERVICE_ACCOUNT_JSON").replace( + "'", "" + ) + credentials_dict = json.loads(credentials_json_str) + return credentials_dict + + @property + def bigquery_client(self): + credentials = service_account.Credentials.from_service_account_info( + info=self.gcs_service_account_info + ) + client = bigquery.Client( + credentials=credentials, project=credentials.project_id + ) + return client + + @property + def project_config(self): + return { + "config-version": 2, + "seed-paths": ["seeds-config"], + "macro-paths": ["macros"], + "seeds": { + "+kms_key_name": self.kms_key_name, + "test": { + "enabled": False, + "quote_columns": True, + "seed_kms": { + "enabled": True, + }, + }, + }, + } + + +class TestSimpleSeedKmsKeyNameBq(TestSimpleSeedKmsKeyName): + @property + def models(self): + return "models-bq" + + @property + def bigquery_table_metadata(self): + table_id = f"{self.project_id}.{self.unique_schema()}.seed_kms" + table = self.bigquery_client.get_table(table_id) + return table + + @property + def profile_config(self): + return self.bigquery_profile() + + @use_profile("bigquery") + def test_bigquery_simple_seed_with_kms_key_name_bigquery(self): + results = self.run_dbt(["seed", "--show"]) + self.assertEqual(len(results), 1) + self.assertIsNotNone( + self.bigquery_table_metadata.encryption_configuration.kms_key_name + ) From 6c79aa54dd73d49b661a52cc3415150585976011 Mon Sep 17 00:00:00 2001 From: b0lle <26822719+b0lle@users.noreply.github.com> Date: Tue, 31 May 2022 17:25:11 +0200 Subject: [PATCH 2/2] pre commit hooks (black formatting) --- dbt/adapters/bigquery/impl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 69f32e9a1..c7957e341 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -611,7 +611,9 @@ def alter_table_add_columns(self, relation, columns): client.update_table(new_table, ["schema"]) @available.parse_none - def load_dataframe(self, database, schema, table_name, agate_table, column_override, kms_key_name): + def load_dataframe( + self, database, schema, table_name, agate_table, column_override, kms_key_name + ): bq_schema = self._agate_to_schema(agate_table, column_override) conn = self.connections.get_thread_connection() client = conn.handle