diff --git a/.changes/unreleased/Features-20231214-195655.yaml b/.changes/unreleased/Features-20231214-195655.yaml new file mode 100644 index 000000000..93f372c67 --- /dev/null +++ b/.changes/unreleased/Features-20231214-195655.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Support limiting get_catalog by object name +time: 2023-12-14T19:56:55.124051-05:00 +custom: + Author: mikealfare + Issue: "625" diff --git a/dbt/adapters/redshift/impl.py b/dbt/adapters/redshift/impl.py index f523acfb7..1a5a9c8f5 100644 --- a/dbt/adapters/redshift/impl.py +++ b/dbt/adapters/redshift/impl.py @@ -50,6 +50,7 @@ class RedshiftAdapter(SQLAdapter): _capabilities = CapabilityDict( { Capability.TableLastModifiedMetadata: CapabilitySupport(support=Support.Full), + Capability.SchemaMetadataByRelations: CapabilitySupport(support=Support.Full), } ) diff --git a/dbt/include/redshift/macros/catalog.sql b/dbt/include/redshift/macros/catalog.sql deleted file mode 100644 index 69dc71713..000000000 --- a/dbt/include/redshift/macros/catalog.sql +++ /dev/null @@ -1,258 +0,0 @@ - -{% macro redshift__get_base_catalog(information_schema, schemas) -%} - {%- call statement('base_catalog', fetch_result=True) -%} - {% set database = information_schema.database %} - {{ adapter.verify_database(database) }} - - with late_binding as ( - select - '{{ database }}'::varchar as table_database, - table_schema, - table_name, - 'LATE BINDING VIEW'::varchar as table_type, - null::text as table_comment, - - column_name, - column_index, - column_type, - null::text as column_comment - from pg_get_late_binding_view_cols() - cols(table_schema name, table_name name, column_name name, - column_type varchar, - column_index int) - order by "column_index" - ), - - materialized_views as ( - select - table_schema as nspname, - table_name as relname - from information_schema.views - where ( - {%- for schema in schemas -%} - upper(table_schema) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} - {%- endfor -%} - ) - and table_catalog = '{{ database }}' - and view_definition ilike '%create materialized view%' - ), - - early_binding as ( - select - '{{ database }}'::varchar as table_database, - sch.nspname as table_schema, - tbl.relname as table_name, - case - when tbl.relkind = 'v' and materialized_views.relname is not null then 'MATERIALIZED VIEW' - when tbl.relkind = 'v' then 'VIEW' - else 'BASE TABLE' - end as table_type, - tbl_desc.description as table_comment, - col.attname as column_name, - col.attnum as column_index, - pg_catalog.format_type(col.atttypid, col.atttypmod) as column_type, - col_desc.description as column_comment - - from pg_catalog.pg_namespace sch - join pg_catalog.pg_class tbl on tbl.relnamespace = sch.oid - join pg_catalog.pg_attribute col on col.attrelid = tbl.oid - left outer join pg_catalog.pg_description tbl_desc on (tbl_desc.objoid = tbl.oid and tbl_desc.objsubid = 0) - left outer join pg_catalog.pg_description col_desc on (col_desc.objoid = tbl.oid and col_desc.objsubid = col.attnum) - left outer join materialized_views on (materialized_views.nspname = sch.nspname and materialized_views.relname = tbl.relname) - where ( - {%- for schema in schemas -%} - upper(sch.nspname) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} - {%- endfor -%} - ) - and tbl.relkind in ('r', 'v', 'f', 'p') - and col.attnum > 0 - and not col.attisdropped - ), - - table_owners as ( - - select - '{{ database }}'::varchar as table_database, - schemaname as table_schema, - tablename as table_name, - tableowner as table_owner - - from pg_tables - - union all - - select - '{{ database }}'::varchar as table_database, - schemaname as table_schema, - viewname as table_name, - viewowner as table_owner - - from pg_views - - ), - - unioned as ( - - select * - from early_binding - - union all - - select * - from late_binding - - ) - - select *, - table_database || '.' || table_schema || '.' || table_name as table_id - - from unioned - join table_owners using (table_database, table_schema, table_name) - - where ( - {%- for schema in schemas -%} - upper(table_schema) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} - {%- endfor -%} - ) - - order by "column_index" - {%- endcall -%} - - {{ return(load_result('base_catalog').table) }} -{%- endmacro %} - -{% macro redshift__get_extended_catalog(schemas) %} - {%- call statement('extended_catalog', fetch_result=True) -%} - - select - "database" || '.' || "schema" || '.' || "table" as table_id, - - 'Encoded'::text as "stats:encoded:label", - encoded as "stats:encoded:value", - 'Indicates whether any column in the table has compression encoding defined.'::text as "stats:encoded:description", - true as "stats:encoded:include", - - 'Dist Style' as "stats:diststyle:label", - diststyle as "stats:diststyle:value", - 'Distribution style or distribution key column, if key distribution is defined.'::text as "stats:diststyle:description", - true as "stats:diststyle:include", - - 'Sort Key 1' as "stats:sortkey1:label", - -- handle 0xFF byte in response for interleaved sort styles - case - when sortkey1 like 'INTERLEAVED%' then 'INTERLEAVED'::text - else sortkey1 - end as "stats:sortkey1:value", - 'First column in the sort key.'::text as "stats:sortkey1:description", - (sortkey1 is not null) as "stats:sortkey1:include", - - 'Max Varchar' as "stats:max_varchar:label", - max_varchar as "stats:max_varchar:value", - 'Size of the largest column that uses a VARCHAR data type.'::text as "stats:max_varchar:description", - true as "stats:max_varchar:include", - - -- exclude this, as the data is strangely returned with null-byte characters - 'Sort Key 1 Encoding' as "stats:sortkey1_enc:label", - sortkey1_enc as "stats:sortkey1_enc:value", - 'Compression encoding of the first column in the sort key.' as "stats:sortkey1_enc:description", - false as "stats:sortkey1_enc:include", - - '# Sort Keys' as "stats:sortkey_num:label", - sortkey_num as "stats:sortkey_num:value", - 'Number of columns defined as sort keys.' as "stats:sortkey_num:description", - (sortkey_num > 0) as "stats:sortkey_num:include", - - 'Approximate Size' as "stats:size:label", - size * 1000000 as "stats:size:value", - 'Approximate size of the table, calculated from a count of 1MB blocks'::text as "stats:size:description", - true as "stats:size:include", - - 'Disk Utilization' as "stats:pct_used:label", - pct_used / 100.0 as "stats:pct_used:value", - 'Percent of available space that is used by the table.'::text as "stats:pct_used:description", - true as "stats:pct_used:include", - - 'Unsorted %' as "stats:unsorted:label", - unsorted / 100.0 as "stats:unsorted:value", - 'Percent of unsorted rows in the table.'::text as "stats:unsorted:description", - (unsorted is not null) as "stats:unsorted:include", - - 'Stats Off' as "stats:stats_off:label", - stats_off as "stats:stats_off:value", - 'Number that indicates how stale the table statistics are; 0 is current, 100 is out of date.'::text as "stats:stats_off:description", - true as "stats:stats_off:include", - - 'Approximate Row Count' as "stats:rows:label", - tbl_rows as "stats:rows:value", - 'Approximate number of rows in the table. This value includes rows marked for deletion, but not yet vacuumed.'::text as "stats:rows:description", - true as "stats:rows:include", - - 'Sort Key Skew' as "stats:skew_sortkey1:label", - skew_sortkey1 as "stats:skew_sortkey1:value", - 'Ratio of the size of the largest non-sort key column to the size of the first column of the sort key.'::text as "stats:skew_sortkey1:description", - (skew_sortkey1 is not null) as "stats:skew_sortkey1:include", - - 'Skew Rows' as "stats:skew_rows:label", - skew_rows as "stats:skew_rows:value", - 'Ratio of the number of rows in the slice with the most rows to the number of rows in the slice with the fewest rows.'::text as "stats:skew_rows:description", - (skew_rows is not null) as "stats:skew_rows:include" - - from svv_table_info - where ( - {%- for schema in schemas -%} - upper(schema) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} - {%- endfor -%} - ) - - {%- endcall -%} - - {{ return(load_result('extended_catalog').table) }} - -{% endmacro %} - -{% macro redshift__can_select_from(table_name) %} - - {%- call statement('has_table_privilege', fetch_result=True) -%} - - select has_table_privilege(current_user, '{{ table_name }}', 'SELECT') as can_select - - {%- endcall -%} - - {% set can_select = load_result('has_table_privilege').table[0]['can_select'] %} - {{ return(can_select) }} - -{% endmacro %} - -{% macro redshift__no_svv_table_info_warning() %} - - {% set msg %} - - Warning: The database user "{{ target.user }}" has insufficient permissions to - query the "svv_table_info" table. Please grant SELECT permissions on this table - to the "{{ target.user }}" user to fetch extended table details from Redshift. - - {% endset %} - - {{ log(msg, info=True) }} - -{% endmacro %} - - -{% macro redshift__get_catalog(information_schema, schemas) %} - - {#-- Compute a left-outer join in memory. Some Redshift queries are - -- leader-only, and cannot be joined to other compute-based queries #} - - {% set catalog = redshift__get_base_catalog(information_schema, schemas) %} - - {% set select_extended = redshift__can_select_from('svv_table_info') %} - {% if select_extended %} - {% set extended_catalog = redshift__get_extended_catalog(schemas) %} - {% set catalog = catalog.join(extended_catalog, 'table_id') %} - {% else %} - {{ redshift__no_svv_table_info_warning() }} - {% endif %} - - {{ return(catalog.exclude(['table_id'])) }} - -{% endmacro %} diff --git a/dbt/include/redshift/macros/catalog/by_relation.sql b/dbt/include/redshift/macros/catalog/by_relation.sql new file mode 100644 index 000000000..d0d79c65a --- /dev/null +++ b/dbt/include/redshift/macros/catalog/by_relation.sql @@ -0,0 +1,82 @@ +{% macro redshift__get_catalog_relations(information_schema, relations) -%} + + {% set database = information_schema.database %} + {{ adapter.verify_database(database) }} + + {#-- Compute a left-outer join in memory. Some Redshift queries are + -- leader-only, and cannot be joined to other compute-based queries #} + + {% set catalog = _redshift__get_base_catalog_by_relation(database, relations) %} + + {% set select_extended = redshift__can_select_from('svv_table_info') %} + {% if select_extended %} + {% set extended_catalog = _redshift__get_extended_catalog_by_relation(relations) %} + {% set catalog = catalog.join(extended_catalog, ['table_schema', 'table_name']) %} + {% else %} + {{ redshift__no_svv_table_info_warning() }} + {% endif %} + + {{ return(catalog) }} + +{% endmacro %} + + +{% macro _redshift__get_base_catalog_by_relation(database, relations) -%} + {%- call statement('base_catalog', fetch_result=True) -%} + with + late_binding as ({{ _redshift__get_late_binding_by_relation_sql(relations) }}), + early_binding as ({{ _redshift__get_early_binding_by_relation_sql(database, relations) }}), + unioned as (select * from early_binding union all select * from late_binding), + table_owners as ({{ redshift__get_table_owners_sql() }}) + select '{{ database }}' as table_database, * + from unioned + join table_owners using (table_schema, table_name) + order by "column_index" + {%- endcall -%} + {{ return(load_result('base_catalog').table) }} +{%- endmacro %} + + +{% macro _redshift__get_late_binding_by_relation_sql(relations) %} + {{ redshift__get_late_binding_sql() }} + where ( + {%- for relation in relations -%} + ( + upper(table_schema) = upper('{{ relation.schema }}') + and upper(table_name) = upper('{{ relation.identifier }}') + ) + {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) +{% endmacro %} + + +{% macro _redshift__get_early_binding_by_relation_sql(database, relations) %} + {{ redshift__get_early_binding_sql(database) }} + and ( + {%- for relation in relations -%} + ( + upper(sch.nspname) = upper('{{ relation.schema }}') + and upper(tbl.relname) = upper('{{ relation.identifier }}') + ) + {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) +{% endmacro %} + + +{% macro _redshift__get_extended_catalog_by_relation(relations) %} + {%- call statement('extended_catalog', fetch_result=True) -%} + {{ redshift__get_extended_catalog_sql() }} + where ( + {%- for relation in relations -%} + ( + upper("schema") = upper('{{ relation.schema }}') + and upper("table") = upper('{{ relation.identifier }}') + ) + {%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) + {%- endcall -%} + {{ return(load_result('extended_catalog').table) }} +{% endmacro %} diff --git a/dbt/include/redshift/macros/catalog/by_schema.sql b/dbt/include/redshift/macros/catalog/by_schema.sql new file mode 100644 index 000000000..99325f765 --- /dev/null +++ b/dbt/include/redshift/macros/catalog/by_schema.sql @@ -0,0 +1,70 @@ +{% macro redshift__get_catalog(information_schema, schemas) %} + + {% set database = information_schema.database %} + {{ adapter.verify_database(database) }} + + {#-- Compute a left-outer join in memory. Some Redshift queries are + -- leader-only, and cannot be joined to other compute-based queries #} + + {% set catalog = _redshift__get_base_catalog_by_schema(database, schemas) %} + + {% set select_extended = redshift__can_select_from('svv_table_info') %} + {% if select_extended %} + {% set extended_catalog = _redshift__get_extended_catalog_by_schema(schemas) %} + {% set catalog = catalog.join(extended_catalog, ['table_schema', 'table_name']) %} + {% else %} + {{ redshift__no_svv_table_info_warning() }} + {% endif %} + + {{ return(catalog) }} + +{% endmacro %} + + +{% macro _redshift__get_base_catalog_by_schema(database, schemas) -%} + {%- call statement('base_catalog', fetch_result=True) -%} + with + late_binding as ({{ _redshift__get_late_binding_by_schema_sql(schemas) }}), + early_binding as ({{ _redshift__get_early_binding_by_schema_sql(database, schemas) }}), + unioned as (select * from early_binding union all select * from late_binding), + table_owners as ({{ redshift__get_table_owners_sql() }}) + select '{{ database }}' as table_database, * + from unioned + join table_owners using (table_schema, table_name) + order by "column_index" + {%- endcall -%} + {{ return(load_result('base_catalog').table) }} +{%- endmacro %} + + +{% macro _redshift__get_late_binding_by_schema_sql(schemas) %} + {{ redshift__get_late_binding_sql() }} + where ( + {%- for schema in schemas -%} + upper(table_schema) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) +{% endmacro %} + + +{% macro _redshift__get_early_binding_by_schema_sql(database, schemas) %} + {{ redshift__get_early_binding_sql(database) }} + and ( + {%- for schema in schemas -%} + upper(sch.nspname) = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) +{% endmacro %} + + +{% macro _redshift__get_extended_catalog_by_schema(schemas) %} + {%- call statement('extended_catalog', fetch_result=True) -%} + {{ redshift__get_extended_catalog_sql() }} + where ( + {%- for schema in schemas -%} + upper("schema") = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%} + {%- endfor -%} + ) + {%- endcall -%} + {{ return(load_result('extended_catalog').table) }} +{% endmacro %} diff --git a/dbt/include/redshift/macros/catalog/catalog.sql b/dbt/include/redshift/macros/catalog/catalog.sql new file mode 100644 index 000000000..694a9441b --- /dev/null +++ b/dbt/include/redshift/macros/catalog/catalog.sql @@ -0,0 +1,176 @@ +{% macro redshift__get_late_binding_sql() %} + select + table_schema, + table_name, + 'LATE BINDING VIEW'::varchar as table_type, + null::text as table_comment, + column_name, + column_index, + column_type, + null::text as column_comment + from pg_get_late_binding_view_cols() + cols( + table_schema name, + table_name name, + column_name name, + column_type varchar, + column_index int + ) +{% endmacro %} + + +{% macro redshift__get_early_binding_sql(database) %} + select + sch.nspname as table_schema, + tbl.relname as table_name, + case + when tbl.relkind = 'v' and mat_views.table_name is not null then 'MATERIALIZED VIEW' + when tbl.relkind = 'v' then 'VIEW' + else 'BASE TABLE' + end as table_type, + tbl_desc.description as table_comment, + col.attname as column_name, + col.attnum as column_index, + pg_catalog.format_type(col.atttypid, col.atttypmod) as column_type, + col_desc.description as column_comment + from pg_catalog.pg_namespace sch + join pg_catalog.pg_class tbl + on tbl.relnamespace = sch.oid + join pg_catalog.pg_attribute col + on col.attrelid = tbl.oid + left outer join pg_catalog.pg_description tbl_desc + on tbl_desc.objoid = tbl.oid + and tbl_desc.objsubid = 0 + left outer join pg_catalog.pg_description col_desc + on col_desc.objoid = tbl.oid + and col_desc.objsubid = col.attnum + left outer join information_schema.views mat_views + on mat_views.table_schema = sch.nspname + and mat_views.table_name = tbl.relname + and mat_views.view_definition ilike '%create materialized view%' + and mat_views.table_catalog = '{{ database }}' + where tbl.relkind in ('r', 'v', 'f', 'p') + and col.attnum > 0 + and not col.attisdropped +{% endmacro %} + + +{% macro redshift__get_table_owners_sql() %} + select + schemaname as table_schema, + tablename as table_name, + tableowner as table_owner + from pg_tables + union all + select + schemaname as table_schema, + viewname as table_name, + viewowner as table_owner + from pg_views +{% endmacro %} + + +{% macro redshift__get_extended_catalog_sql() %} + select + "schema" as table_schema, + "table" as table_name, + + 'Encoded'::text as "stats:encoded:label", + encoded as "stats:encoded:value", + 'Indicates whether any column in the table has compression encoding defined.'::text as "stats:encoded:description", + true as "stats:encoded:include", + + 'Dist Style' as "stats:diststyle:label", + diststyle as "stats:diststyle:value", + 'Distribution style or distribution key column, if key distribution is defined.'::text as "stats:diststyle:description", + true as "stats:diststyle:include", + + 'Sort Key 1' as "stats:sortkey1:label", + -- handle 0xFF byte in response for interleaved sort styles + case + when sortkey1 like 'INTERLEAVED%' then 'INTERLEAVED'::text + else sortkey1 + end as "stats:sortkey1:value", + 'First column in the sort key.'::text as "stats:sortkey1:description", + (sortkey1 is not null) as "stats:sortkey1:include", + + 'Max Varchar' as "stats:max_varchar:label", + max_varchar as "stats:max_varchar:value", + 'Size of the largest column that uses a VARCHAR data type.'::text as "stats:max_varchar:description", + true as "stats:max_varchar:include", + + -- exclude this, as the data is strangely returned with null-byte characters + 'Sort Key 1 Encoding' as "stats:sortkey1_enc:label", + sortkey1_enc as "stats:sortkey1_enc:value", + 'Compression encoding of the first column in the sort key.' as "stats:sortkey1_enc:description", + false as "stats:sortkey1_enc:include", + + '# Sort Keys' as "stats:sortkey_num:label", + sortkey_num as "stats:sortkey_num:value", + 'Number of columns defined as sort keys.' as "stats:sortkey_num:description", + (sortkey_num > 0) as "stats:sortkey_num:include", + + 'Approximate Size' as "stats:size:label", + size * 1000000 as "stats:size:value", + 'Approximate size of the table, calculated from a count of 1MB blocks'::text as "stats:size:description", + true as "stats:size:include", + + 'Disk Utilization' as "stats:pct_used:label", + pct_used / 100.0 as "stats:pct_used:value", + 'Percent of available space that is used by the table.'::text as "stats:pct_used:description", + true as "stats:pct_used:include", + + 'Unsorted %' as "stats:unsorted:label", + unsorted / 100.0 as "stats:unsorted:value", + 'Percent of unsorted rows in the table.'::text as "stats:unsorted:description", + (unsorted is not null) as "stats:unsorted:include", + + 'Stats Off' as "stats:stats_off:label", + stats_off as "stats:stats_off:value", + 'Number that indicates how stale the table statistics are; 0 is current, 100 is out of date.'::text as "stats:stats_off:description", + true as "stats:stats_off:include", + + 'Approximate Row Count' as "stats:rows:label", + tbl_rows as "stats:rows:value", + 'Approximate number of rows in the table. This value includes rows marked for deletion, but not yet vacuumed.'::text as "stats:rows:description", + true as "stats:rows:include", + + 'Sort Key Skew' as "stats:skew_sortkey1:label", + skew_sortkey1 as "stats:skew_sortkey1:value", + 'Ratio of the size of the largest non-sort key column to the size of the first column of the sort key.'::text as "stats:skew_sortkey1:description", + (skew_sortkey1 is not null) as "stats:skew_sortkey1:include", + + 'Skew Rows' as "stats:skew_rows:label", + skew_rows as "stats:skew_rows:value", + 'Ratio of the number of rows in the slice with the most rows to the number of rows in the slice with the fewest rows.'::text as "stats:skew_rows:description", + (skew_rows is not null) as "stats:skew_rows:include" + + from svv_table_info +{% endmacro %} + + +{% macro redshift__can_select_from(table_name) %} + + {%- call statement('has_table_privilege', fetch_result=True) -%} + select has_table_privilege(current_user, '{{ table_name }}', 'SELECT') as can_select + {%- endcall -%} + + {% set can_select = load_result('has_table_privilege').table[0]['can_select'] %} + {{ return(can_select) }} + +{% endmacro %} + + +{% macro redshift__no_svv_table_info_warning() %} + + {% set msg %} + + Warning: The database user "{{ target.user }}" has insufficient permissions to + query the "svv_table_info" table. Please grant SELECT permissions on this table + to the "{{ target.user }}" user to fetch extended table details from Redshift. + + {% endset %} + + {{ log(msg, info=True) }} + +{% endmacro %} diff --git a/tests/functional/adapter/catalog_tests/test_get_catalog.py b/tests/functional/adapter/catalog_tests/test_get_catalog.py new file mode 100644 index 000000000..048539163 --- /dev/null +++ b/tests/functional/adapter/catalog_tests/test_get_catalog.py @@ -0,0 +1,48 @@ +from dbt.adapters.capability import Capability, CapabilitySupport, Support +from dbt.tests.util import run_dbt +import pytest + +from tests.functional.adapter.catalog_tests import files + + +class BaseGetCatalog: + @pytest.fixture(scope="class", autouse=True) + def seeds(self): + return {"my_seed.csv": files.MY_SEED} + + @pytest.fixture(scope="class", autouse=True) + def models(self): + yield { + "my_table.sql": files.MY_TABLE, + "my_view.sql": files.MY_VIEW, + "my_materialized_view.sql": files.MY_MATERIALIZED_VIEW, + } + + @pytest.fixture(scope="class", autouse=True) + def setup(self, project): + run_dbt(["seed"]) + run_dbt(["run"]) + + +class TestGetCatalogByRelations(BaseGetCatalog): + def test_get_one_catalog_by_relations(self, project, adapter): + project.adapter._capabilities[Capability.SchemaMetadataByRelations] = CapabilitySupport( + support=Support.Full + ) + results = run_dbt(["docs", "generate"]) + assert len(results.nodes) == 4 + assert project.adapter._capabilities[ + Capability.SchemaMetadataByRelations + ] == CapabilitySupport(support=Support.Full) + + +class TestGetCatalogBySchemas(BaseGetCatalog): + def test_get_one_catalog_by_schemas(self, project, adapter): + project.adapter._capabilities[Capability.SchemaMetadataByRelations] = CapabilitySupport( + support=Support.NotImplemented + ) + results = run_dbt(["docs", "generate"]) + assert len(results.nodes) == 4 + assert project.adapter._capabilities[ + Capability.SchemaMetadataByRelations + ] == CapabilitySupport(support=Support.NotImplemented)