Skip to content

Commit

Permalink
Merge branch 'main' into talkwithkeyboard/faster-caching
Browse files Browse the repository at this point in the history
# Conflicts:
#	dbt/adapters/spark/impl.py
  • Loading branch information
TalkWIthKeyboard committed May 21, 2022
2 parents d29bd47 + 8744cf1 commit 24e223c
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 11 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
## dbt-spark 1.1.0 (TBD)
## dbt-spark 1.2.0 (April 28, 2022)

### Fixes
- `adapter.get_columns_in_relation` (method) and `get_columns_in_relation` (macro) now return identical responses. The previous behavior of `get_columns_in_relation` (macro) is now represented by a new macro, `get_columns_in_relation_raw` ([#354](https://github.com/dbt-labs/dbt-spark/issues/354), [#355](https://github.com/dbt-labs/dbt-spark/pull/355))

## dbt-spark 1.1.0 (April 28, 2022)

### Features
- Add session connection method ([#272](https://github.com/dbt-labs/dbt-spark/issues/272), [#279](https://github.com/dbt-labs/dbt-spark/pull/279))
Expand Down
4 changes: 2 additions & 2 deletions dbt/adapters/spark/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
from typing import Any, Dict, Optional, TypeVar, Union

from dbt.adapters.base.column import Column
from dbt.dataclass_schema import dbtClassMixin
from dbt.contracts.relation import FakeAPIObject
from hologram import JsonDict

Self = TypeVar("Self", bound="SparkColumn")


@dataclass
class SparkColumn(dbtClassMixin, Column):
class SparkColumn(FakeAPIObject, Column):
table_database: Optional[str] = None
table_schema: Optional[str] = None
table_name: Optional[str] = None
Expand Down
10 changes: 6 additions & 4 deletions dbt/adapters/spark/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

logger = AdapterLogger("Spark")

GET_COLUMNS_IN_RELATION_MACRO_NAME = "get_columns_in_relation"
GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME = "spark__get_columns_in_relation_raw"
LIST_SCHEMAS_MACRO_NAME = "list_schemas"
LIST_TABLES_MACRO_NAME = "spark__list_tables_without_caching"
LIST_VIEWS_MACRO_NAME = "spark__list_views_without_caching"
Expand Down Expand Up @@ -279,14 +279,16 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
else:
updated_relation = self._set_relation_information(cached_relation)

return updated_relation.columns
return updated_relation.columns if updated_relation is not None else []

def _get_updated_relation(self, relation: BaseRelation) -> Optional[SparkRelation]:
metadata = None
columns = []

try:
rows: List[agate.Row] = super().get_columns_in_relation(relation)
rows: List[agate.Row] = self.execute_macro(
GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME, kwargs={"relation": relation}
)
metadata, columns = self.parse_describe_extended(relation, rows)
except dbt.exceptions.RuntimeException as e:
# spark would throw error when table doesn't exist, where other
Expand Down Expand Up @@ -334,7 +336,7 @@ def _get_columns_for_catalog(
) -> Iterable[Dict[str, Any]]:
updated_relation = self._set_relation_information(relation)

for column in updated_relation.columns:
for column in (updated_relation.columns if updated_relation is not None else []):
# convert SparkColumns into catalog dicts
as_dict = column.to_column_dict()
as_dict["column_name"] = as_dict.pop("column", None)
Expand Down
10 changes: 7 additions & 3 deletions dbt/include/spark/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,15 @@
{%- endcall -%}
{% endmacro %}

{% macro spark__get_columns_in_relation(relation) -%}
{% call statement('get_columns_in_relation', fetch_result=True) %}
{% macro spark__get_columns_in_relation_raw(relation) -%}
{% call statement('get_columns_in_relation_raw', fetch_result=True) %}
describe extended {{ relation.include(schema=(schema is not none)) }}
{% endcall %}
{% do return(load_result('get_columns_in_relation').table) %}
{% do return(load_result('get_columns_in_relation_raw').table) %}
{% endmacro %}

{% macro spark__get_columns_in_relation(relation) -%}
{{ return(adapter.get_columns_in_relation(relation)) }}
{% endmacro %}

{% macro spark__list_tables_without_caching(relation) %}
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def pytest_addoption(parser):
parser.addoption("--profile", action="store", default="apache_spark", type=str)


# Using @pytest.mark.skip_adapter('apache_spark') uses the 'skip_by_adapter_type'
# Using @pytest.mark.skip_profile('apache_spark') uses the 'skip_by_profile_type'
# autouse fixture below
def pytest_configure(config):
config.addinivalue_line(
Expand Down

0 comments on commit 24e223c

Please sign in to comment.