From 699c96b87502f5f9067effeee0705bdbae12d220 Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Sat, 17 Sep 2022 10:28:58 +0200 Subject: [PATCH 1/4] Support Hive catalog in pytests --- docker-compose-starburst.yml | 2 +- docker-compose-trino.yml | 2 +- docker/starburst/catalog/hive.properties | 10 ++++++++++ docker/trino/catalog/hive.properties | 10 ++++++++++ pytest.ini | 1 + tests/conftest.py | 4 ++++ 6 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 docker/starburst/catalog/hive.properties create mode 100644 docker/trino/catalog/hive.properties diff --git a/docker-compose-starburst.yml b/docker-compose-starburst.yml index 88f93869..5878ef57 100755 --- a/docker-compose-starburst.yml +++ b/docker-compose-starburst.yml @@ -26,7 +26,7 @@ services: hive-metastore: hostname: hive-metastore - image: 'starburstdata/hive:3.1.2-e.15' + image: 'starburstdata/hive:3.1.2-e.18' ports: - '9083:9083' # Metastore Thrift environment: diff --git a/docker-compose-trino.yml b/docker-compose-trino.yml index a56214cd..6c8ab933 100644 --- a/docker-compose-trino.yml +++ b/docker-compose-trino.yml @@ -27,7 +27,7 @@ services: hive-metastore: hostname: hive-metastore - image: 'starburstdata/hive:3.1.2-e.15' + image: 'starburstdata/hive:3.1.2-e.18' ports: - '9083:9083' # Metastore Thrift environment: diff --git a/docker/starburst/catalog/hive.properties b/docker/starburst/catalog/hive.properties new file mode 100644 index 00000000..39fa5df4 --- /dev/null +++ b/docker/starburst/catalog/hive.properties @@ -0,0 +1,10 @@ +connector.name=hive +hive.metastore.uri=thrift://hive-metastore:9083 +hive.s3.endpoint=http://minio:9000 +hive.s3.path-style-access=true +hive.s3.aws-access-key=minio +hive.s3.aws-secret-key=minio123 +hive.metastore-cache-ttl=0s +hive.metastore-refresh-interval=5s +hive.metastore-timeout=10s +hive.security=sql-standard diff --git a/docker/trino/catalog/hive.properties b/docker/trino/catalog/hive.properties new file mode 100644 index 00000000..39fa5df4 --- /dev/null +++ b/docker/trino/catalog/hive.properties @@ -0,0 +1,10 @@ +connector.name=hive +hive.metastore.uri=thrift://hive-metastore:9083 +hive.s3.endpoint=http://minio:9000 +hive.s3.path-style-access=true +hive.s3.aws-access-key=minio +hive.s3.aws-secret-key=minio123 +hive.metastore-cache-ttl=0s +hive.metastore-refresh-interval=5s +hive.metastore-timeout=10s +hive.security=sql-standard diff --git a/pytest.ini b/pytest.ini index 3f0eb950..4ac10ca5 100644 --- a/pytest.ini +++ b/pytest.ini @@ -8,6 +8,7 @@ testpaths = markers = delta iceberg + hive postgresql prepared_statements_disabled skip_profile(profile) diff --git a/tests/conftest.py b/tests/conftest.py index 281953c2..58a00ccf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -31,6 +31,7 @@ def dbt_profile_target(request): postgresql = request.node.get_closest_marker("postgresql") iceberg = request.node.get_closest_marker("iceberg") delta = request.node.get_closest_marker("delta") + hive = request.node.get_closest_marker("hive") if sum(bool(x) for x in (postgresql, iceberg, delta)) > 1: raise ValueError("Only one of postgresql, iceberg, delta can be specified as a marker") @@ -44,6 +45,9 @@ def dbt_profile_target(request): if iceberg: target.update({"catalog": "iceberg"}) + if hive: + target.update({"catalog": "hive"}) + return target From 9ea5be82a4e0ff1ce661e68963854dd03fe76ab5 Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Sat, 17 Sep 2022 18:56:05 +0200 Subject: [PATCH 2/4] Enable roles support in hive --- docker-compose-starburst.yml | 1 + docker-compose-trino.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/docker-compose-starburst.yml b/docker-compose-starburst.yml index 5878ef57..4a2ffafd 100755 --- a/docker-compose-starburst.yml +++ b/docker-compose-starburst.yml @@ -53,6 +53,7 @@ services: AZURE_ABFS_OAUTH_SECRET: "" AZURE_ABFS_OAUTH_ENDPOINT: "" AZURE_WASB_ACCESS_KEY: "" + HIVE_METASTORE_USERS_IN_ADMIN_ROLE: "admin" depends_on: - metastore_db diff --git a/docker-compose-trino.yml b/docker-compose-trino.yml index 6c8ab933..3b4c95b9 100644 --- a/docker-compose-trino.yml +++ b/docker-compose-trino.yml @@ -54,6 +54,7 @@ services: AZURE_ABFS_OAUTH_SECRET: "" AZURE_ABFS_OAUTH_ENDPOINT: "" AZURE_WASB_ACCESS_KEY: "" + HIVE_METASTORE_USERS_IN_ADMIN_ROLE: "admin" depends_on: - metastore_db From d336e437b5f669c83d4a27b0d19c9540d1247ee0 Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Sat, 17 Sep 2022 18:58:09 +0200 Subject: [PATCH 3/4] Support roles in dbt profile --- dbt/adapters/trino/connections.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dbt/adapters/trino/connections.py b/dbt/adapters/trino/connections.py index 01ced246..50048bea 100644 --- a/dbt/adapters/trino/connections.py +++ b/dbt/adapters/trino/connections.py @@ -94,6 +94,7 @@ class TrinoNoneCredentials(TrinoCredentials): host: str port: Port user: str + roles: Optional[Dict[str, str]] = None cert: Optional[str] = None http_scheme: HttpScheme = HttpScheme.HTTP http_headers: Optional[Dict[str, str]] = None @@ -116,6 +117,7 @@ class TrinoCertificateCredentials(TrinoCredentials): client_certificate: str client_private_key: str user: Optional[str] = None + roles: Optional[Dict[str, str]] = None cert: Optional[str] = None http_headers: Optional[Dict[str, str]] = None session_properties: Dict[str, Any] = field(default_factory=dict) @@ -143,6 +145,7 @@ class TrinoLdapCredentials(TrinoCredentials): port: Port user: str password: str + roles: Optional[Dict[str, str]] = None cert: Optional[str] = None http_headers: Optional[Dict[str, str]] = None session_properties: Dict[str, Any] = field(default_factory=dict) @@ -166,6 +169,7 @@ class TrinoKerberosCredentials(TrinoCredentials): host: str port: Port user: str + roles: Optional[Dict[str, str]] = None keytab: Optional[str] = None principal: Optional[str] = None krb5_config: Optional[str] = None @@ -210,6 +214,7 @@ class TrinoJwtCredentials(TrinoCredentials): port: Port jwt_token: str user: Optional[str] = None + roles: Optional[Dict[str, str]] = None cert: Optional[str] = None http_headers: Optional[Dict[str, str]] = None session_properties: Dict[str, Any] = field(default_factory=dict) @@ -233,6 +238,7 @@ class TrinoOauthCredentials(TrinoCredentials): host: str port: Port user: Optional[str] = None + roles: Optional[Dict[str, str]] = None cert: Optional[str] = None http_headers: Optional[Dict[str, str]] = None session_properties: Dict[str, Any] = field(default_factory=dict) @@ -404,6 +410,7 @@ def open(cls, connection): host=credentials.host, port=credentials.port, user=credentials.user, + roles=credentials.roles, catalog=credentials.database, schema=credentials.schema, http_scheme=credentials.http_scheme.value, From f45008b9a853ae45fe230a91bac53ae980ad3a7c Mon Sep 17 00:00:00 2001 From: Michiel De Smet Date: Sat, 17 Sep 2022 18:58:36 +0200 Subject: [PATCH 4/4] Support grants config --- .../unreleased/Features-20221005-134613.yaml | 7 ++ Makefile | 6 ++ README.md | 73 ++++++++++++------- dbt/include/trino/macros/apply_grants.sql | 45 ++++++++++++ .../macros/materializations/incremental.sql | 7 ++ .../trino/macros/materializations/table.sql | 10 ++- tests/conftest.py | 3 + tests/functional/adapter/test_model_grants.py | 32 ++++++++ tox.ini | 2 +- 9 files changed, 154 insertions(+), 31 deletions(-) create mode 100644 .changes/unreleased/Features-20221005-134613.yaml create mode 100644 dbt/include/trino/macros/apply_grants.sql create mode 100644 tests/functional/adapter/test_model_grants.py diff --git a/.changes/unreleased/Features-20221005-134613.yaml b/.changes/unreleased/Features-20221005-134613.yaml new file mode 100644 index 00000000..b8c842ba --- /dev/null +++ b/.changes/unreleased/Features-20221005-134613.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Support security grants +time: 2022-10-05T13:46:13.789545+02:00 +custom: + Author: mdesmet + Issue: "" + PR: "130" diff --git a/Makefile b/Makefile index 06bb5138..a9defc83 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,9 @@ +.EXPORT_ALL_VARIABLES: + +DBT_TEST_USER_1=user1 +DBT_TEST_USER_2=user2 +DBT_TEST_USER_3=user3 + start-trino: docker network create dbt-net || true ./docker/dbt/build.sh diff --git a/README.md b/README.md index d72f9cd5..61f42524 100644 --- a/README.md +++ b/README.md @@ -39,34 +39,35 @@ $ pip install dbt-trino A dbt profile can be configured to run against Trino using the following configuration: -| Option | Description | Required? | Example | -|--------------------------------|--------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------|----------------------------------| -| method | The Trino authentication method to use | Optional (default is `none`, supported methods are `ldap`, `kerberos`, `jwt`, `oauth` or `certificate`) | `none` or `kerberos` | -| user | Username for authentication | Optional (required if `method` is `none`, `ldap` or `kerberos`) | `commander` | -| password | Password for authentication | Optional (required if `method` is `ldap`) | `none` or `abc123` | -| keytab | Path to keytab for kerberos authentication | Optional (may be required if `method` is `kerberos`) | `/tmp/trino.keytab` | -| krb5_config | Path to config for kerberos authentication | Optional (may be required if `method` is `kerberos`) | `/tmp/krb5.conf` | -| principal | Principal for kerberos authentication | Optional (may be required if `method` is `kerberos`) | `trino@EXAMPLE.COM` | -| service_name | Service name for kerberos authentication | Optional (default is `trino`) | `abc123` | -| mutual_authentication | Boolean flag for mutual authentication | Optional (may be required if `method` is `kerberos`) | `false` | -| force_preemptive | Boolean flag for preemptively initiate the Kerberos GSS exchange | Optional (may be required if `method` is `kerberos`) | `false` | -| hostname_override | Kerberos hostname for a host whose DNS name doesn't match | Optional (may be required if `method` is `kerberos`) | `EXAMPLE.COM` | -| sanitize_mutual_error_response | Boolean flag to strip content and headers from error responses | Optional (may be required if `method` is `kerberos`) | `true` | -| delegate | Boolean flag for credential delgation (GSS_C_DELEG_FLAG) | Optional (may be required if `method` is `kerberos`) | `false` | -| jwt_token | JWT token for authentication | Optional (required if `method` is `jwt`) | `none` or `abc123` | -| client_certificate | Path to client certificate to be used for certificate based authentication | Optional (required if `method` is `certificate`) | `/tmp/tls.crt` | -| client_private_key | Path to client private key to be used for certificate based authentication | Optional (required if `method` is `certificate`) | `/tmp/tls.key` | -| http_headers | HTTP Headers to send alongside requests to Trino, specified as a yaml dictionary of (header, value) pairs. | Optional | `X-Trino-Client-Info: dbt-trino` | -| http_scheme | The HTTP scheme to use for requests to Trino | Optional (default is `http`, or `https` for `method: kerberos`, `ldap` or `jwt`) | `https` or `http` | -| cert | The full path to a certificate file for authentication with trino | Optional | | -| session_properties | Sets Trino session properties used in the connection | Optional | `query_max_run_time: 5d` | -| database | Specify the database to build models into | Required | `analytics` | -| schema | Specify the schema to build models into. Note: it is not recommended to use upper or mixed case schema names | Required | `public` | -| host | The hostname to connect to | Required | `127.0.0.1` | -| port | The port to connect to the host on | Required | `8080` | -| threads | How many threads dbt should use | Optional (default is `1`) | `8` | -| prepared_statements_enabled | Enable usage of Trino prepared statements (used in `dbt seed` commands) | Optional (default is `true`) | `true` or `false` | -| retries | Configure how many times a database operation is retried when connection issues arise | Optional (default is `3`) | `10` | +| Option | Description | Required? | Example | +|--------------------------------|--------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------|----------------------------------| +| method | The Trino authentication method to use | Optional (default is `none`, supported methods are `ldap`, `kerberos`, `jwt`, `oauth` or `certificate`) | `none` or `kerberos` | +| user | Username for authentication | Optional (required if `method` is `none`, `ldap` or `kerberos`) | `commander` | +| password | Password for authentication | Optional (required if `method` is `ldap`) | `none` or `abc123` | +| roles | Catalog roles | Optional | `hive: analyst` | +| keytab | Path to keytab for kerberos authentication | Optional (may be required if `method` is `kerberos`) | `/tmp/trino.keytab` | +| krb5_config | Path to config for kerberos authentication | Optional (may be required if `method` is `kerberos`) | `/tmp/krb5.conf` | +| principal | Principal for kerberos authentication | Optional (may be required if `method` is `kerberos`) | `trino@EXAMPLE.COM` | +| service_name | Service name for kerberos authentication | Optional (default is `trino`) | `abc123` | +| mutual_authentication | Boolean flag for mutual authentication | Optional (may be required if `method` is `kerberos`) | `false` | +| force_preemptive | Boolean flag for preemptively initiate the Kerberos GSS exchange | Optional (may be required if `method` is `kerberos`) | `false` | +| hostname_override | Kerberos hostname for a host whose DNS name doesn't match | Optional (may be required if `method` is `kerberos`) | `EXAMPLE.COM` | +| sanitize_mutual_error_response | Boolean flag to strip content and headers from error responses | Optional (may be required if `method` is `kerberos`) | `true` | +| delegate | Boolean flag for credential delgation (GSS_C_DELEG_FLAG) | Optional (may be required if `method` is `kerberos`) | `false` | +| jwt_token | JWT token for authentication | Optional (required if `method` is `jwt`) | `none` or `abc123` | +| client_certificate | Path to client certificate to be used for certificate based authentication | Optional (required if `method` is `certificate`) | `/tmp/tls.crt` | +| client_private_key | Path to client private key to be used for certificate based authentication | Optional (required if `method` is `certificate`) | `/tmp/tls.key` | +| http_headers | HTTP Headers to send alongside requests to Trino, specified as a yaml dictionary of (header, value) pairs. | Optional | `X-Trino-Client-Info: dbt-trino` | +| http_scheme | The HTTP scheme to use for requests to Trino | Optional (default is `http`, or `https` for `method: kerberos`, `ldap` or `jwt`) | `https` or `http` | +| cert | The full path to a certificate file for authentication with trino | Optional | | +| session_properties | Sets Trino session properties used in the connection | Optional | `query_max_run_time: 5d` | +| database | Specify the database to build models into | Required | `analytics` | +| schema | Specify the schema to build models into. Note: it is not recommended to use upper or mixed case schema names | Required | `public` | +| host | The hostname to connect to | Required | `127.0.0.1` | +| port | The port to connect to the host on | Required | `8080` | +| threads | How many threads dbt should use | Optional (default is `1`) | `8` | +| prepared_statements_enabled | Enable usage of Trino prepared statements (used in `dbt seed` commands) | Optional (default is `true`) | `true` or `false` | +| retries | Configure how many times a database operation is retried when connection issues arise | Optional (default is `3`) | `10` | **Example profiles.yml entry:** @@ -425,6 +426,22 @@ When executing a prepared statement with a large number of parameters, you might The prepared statements can be disabled by setting `prepared_statements_enabled` to `true` in your dbt profile (reverting back to the legacy behavior using Python string interpolation). This flag may be removed in later releases. +#### Grants + +Please note that grants are only supported in [Starburst Enterprise](https://docs.starburst.io/latest/security/biac-overview.html) and [Starburst Galaxy](https://docs.starburst.io/starburst-galaxy/security/access-control.html) and Hive ([sql-standard](https://trino.io/docs/current/connector/hive-security.html)). + +You can manage access to the datasets you're producing with dbt by using grants. To implement these permissions, define grants as resource configs on each model, seed, or snapshot. Define the default grants that apply to the entire project in your dbt_project.yml, and define model-specific grants within each model's SQL or YAML file. + +```yaml +models: + - name: specific_model + config: + grants: + select: ['reporter', 'bi'] +``` + +Read everything about grants in the [dbt docs](https://docs.getdbt.com/reference/resource-configs/grants). + ## Contributing - Want to report a bug or request a feature? Let us know on [Slack](http://community.getdbt.com/) in the [#db-presto-trino](https://getdbt.slack.com/channels/db-presto-trino) channel, or open [an issue](https://github.com/starburstdata/dbt-trino/issues/new) diff --git a/dbt/include/trino/macros/apply_grants.sql b/dbt/include/trino/macros/apply_grants.sql new file mode 100644 index 00000000..a638da8d --- /dev/null +++ b/dbt/include/trino/macros/apply_grants.sql @@ -0,0 +1,45 @@ +{% macro trino__get_show_grant_sql(relation) -%} + select + grantee, + lower(privilege_type) as privilege_type + from information_schema.table_privileges + where table_catalog = '{{ relation.database }}' + and table_schema = '{{ relation.schema }}' + and table_name = '{{ relation.identifier }}' +{%- endmacro %} + +{% macro trino__copy_grants() %} + {# + -- This macro should return true or false depending on the answer to + -- following question: + -- when an object is fully replaced on your database, do grants copy over? + -- e.g. on Postgres this is never true, + -- on Spark this is different for views vs. non-Delta tables vs. Delta tables, + -- on Snowflake it depends on the user-supplied copy_grants configuration. + -- true by default, which means “play it safe”: grants MIGHT have copied over, + -- so dbt will run an extra query to check them + calculate diffs. + #} + {{ return(False) }} +{% endmacro %} + +{%- macro trino__get_grant_sql(relation, privilege, grantees) -%} + grant {{ privilege }} on {{ relation }} to {{ adapter.quote(grantees[0]) }} +{%- endmacro %} + +{%- macro trino__support_multiple_grantees_per_dcl_statement() -%} + {# + -- This macro should return true or false depending on the answer to + -- following question: + -- does this database support grant {privilege} to user_a, user_b, ...? + -- or do user_a + user_b need their own separate grant statements? + #} + {{ return(False) }} +{%- endmacro -%} + +{% macro trino__call_dcl_statements(dcl_statement_list) %} + {% for dcl_statement in dcl_statement_list %} + {% call statement('grant_or_revoke') %} + {{ dcl_statement }} + {% endcall %} + {% endfor %} +{% endmacro %} diff --git a/dbt/include/trino/macros/materializations/incremental.sql b/dbt/include/trino/macros/materializations/incremental.sql index 82c6ef82..672ee0fd 100644 --- a/dbt/include/trino/macros/materializations/incremental.sql +++ b/dbt/include/trino/macros/materializations/incremental.sql @@ -130,6 +130,9 @@ {{ run_hooks(pre_hooks) }} + -- grab current tables grants config for comparision later on + {% set grant_config = config.get('grants') %} + {% if existing_relation is none %} {% set build_sql = create_table_as(False, target_relation, sql) %} {% elif existing_relation.is_view or full_refresh_mode %} @@ -153,6 +156,10 @@ {{ run_hooks(post_hooks) }} + {% set should_revoke = + should_revoke(existing_relation.is_table, full_refresh_mode) %} + {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} + {% do persist_docs(target_relation, model) %} {{ return({'relations': [target_relation]}) }} diff --git a/dbt/include/trino/macros/materializations/table.sql b/dbt/include/trino/macros/materializations/table.sql index c6cbc0af..a71c8db0 100644 --- a/dbt/include/trino/macros/materializations/table.sql +++ b/dbt/include/trino/macros/materializations/table.sql @@ -37,6 +37,9 @@ {{ run_hooks(pre_hooks) }} + -- grab current tables grants config for comparision later on + {% set grant_config = config.get('grants') %} + {% if on_table_exists == 'rename' %} {#-- build modeldock #} {% call statement('main') -%} @@ -65,9 +68,12 @@ {%- endcall %} {% endif %} - {{ run_hooks(post_hooks) }} - {% do persist_docs(target_relation, model) %} + {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %} + {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} + + {{ run_hooks(post_hooks) }} + {{ return({'relations': [target_relation]}) }} {% endmaterialization %} diff --git a/tests/conftest.py b/tests/conftest.py index 58a00ccf..7c3966c3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -60,6 +60,9 @@ def get_trino_starburst_target(): "port": 8080, "user": "admin", "password": "", + "roles": { + "hive": "ROLE{admin}", + }, "catalog": "memory", "schema": "default", } diff --git a/tests/functional/adapter/test_model_grants.py b/tests/functional/adapter/test_model_grants.py new file mode 100644 index 00000000..26bd0d92 --- /dev/null +++ b/tests/functional/adapter/test_model_grants.py @@ -0,0 +1,32 @@ +import pytest +from dbt.context.base import BaseContext # diff_of_two_dicts only +from dbt.tests.adapter.grants.test_model_grants import BaseModelGrants + + +@pytest.mark.hive +# TODO: setup Galaxy and Starbust tests +# See https://github.com/starburstdata/dbt-trino/issues/147 +# and also https://github.com/starburstdata/dbt-trino/issues/146 +@pytest.mark.skip_profile("starburst_galaxy") +# To run this test locally add following env vars: +# DBT_TEST_USER_1=user1 +# DBT_TEST_USER_2=user2 +# DBT_TEST_USER_3=user3 +class TestModelGrants(BaseModelGrants): + def assert_expected_grants_match_actual(self, project, relation_name, expected_grants): + actual_grants = self.get_grants_on_relation(project, relation_name) + # Remove the creation user + try: + for privilege in ["delete", "update", "insert", "select"]: + if privilege in actual_grants: + actual_grants[privilege].remove("admin") + if len(actual_grants[privilege]) == 0: + del actual_grants[privilege] + except ValueError: + pass + + # need a case-insensitive comparison + # so just a simple "assert expected == actual_grants" won't work + diff_a = BaseContext.diff_of_two_dicts(actual_grants, expected_grants) + diff_b = BaseContext.diff_of_two_dicts(expected_grants, actual_grants) + assert diff_a == diff_b == {} diff --git a/tox.ini b/tox.ini index 8863cd56..27c9bb1e 100644 --- a/tox.ini +++ b/tox.ini @@ -13,7 +13,7 @@ deps = [testenv:integration] basepython = python3 commands = /bin/bash -c '{envpython} -m pytest tests/functional' -passenv = DBT_INVOCATION_ENV DBT_TEST_TRINO_HOST +passenv = DBT_INVOCATION_ENV DBT_TEST_TRINO_HOST DBT_TEST_USER_1 DBT_TEST_USER_2 DBT_TEST_USER_3 deps = -r{toxinidir}/dev_requirements.txt -e.