Skip to content

Commit

Permalink
Merge branch 'main' into paw/record
Browse files Browse the repository at this point in the history
  • Loading branch information
peterallenwebb authored Jul 16, 2024
2 parents 0de8a6e + 6857e6b commit 8a92a5d
Show file tree
Hide file tree
Showing 17 changed files with 388 additions and 49 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240709-194316.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Improve run times for large projects by reusing connections by default
time: 2024-07-09T19:43:16.489649-04:00
custom:
Author: mikealfare amardatar
Issue: "1082"
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240710-172345.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Improve run times when using key pair auth by caching the private key
time: 2024-07-10T17:23:45.046905-04:00
custom:
Author: mikealfare aranke
Issue: "1082"
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240705-165932.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Use show ... starts with instead of show ... like in _show_object_metadata
time: 2024-07-05T16:59:32.087555+01:00
custom:
Author: aranke
Issue: "1102"
2 changes: 2 additions & 0 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ jobs:
SNOWFLAKE_TEST_OAUTH_REFRESH_TOKEN: ${{ secrets.SNOWFLAKE_TEST_OAUTH_REFRESH_TOKEN }}
SNOWFLAKE_TEST_OAUTH_CLIENT_ID: ${{ secrets.SNOWFLAKE_TEST_OAUTH_CLIENT_ID }}
SNOWFLAKE_TEST_OAUTH_CLIENT_SECRET: ${{ secrets.SNOWFLAKE_TEST_OAUTH_CLIENT_SECRET }}
SNOWFLAKE_TEST_PRIVATE_KEY: ${{ secrets.SNOWFLAKE_TEST_PRIVATE_KEY }}
SNOWFLAKE_TEST_PRIVATE_KEY_PASSPHRASE: ${{ secrets.SNOWFLAKE_TEST_PRIVATE_KEY_PASSPHRASE }}
SNOWFLAKE_TEST_ALT_DATABASE: ${{ secrets.SNOWFLAKE_TEST_ALT_DATABASE }}
SNOWFLAKE_TEST_ALT_WAREHOUSE: ${{ secrets.SNOWFLAKE_TEST_ALT_WAREHOUSE }}
SNOWFLAKE_TEST_DATABASE: ${{ secrets.SNOWFLAKE_TEST_DATABASE }}
Expand Down
57 changes: 57 additions & 0 deletions dbt/adapters/snowflake/auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import base64
import sys
from typing import Optional

if sys.version_info < (3, 9):
from functools import lru_cache

cache = lru_cache(maxsize=None)
else:
from functools import cache

from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric.rsa import RSAPrivateKey


@cache
def private_key_from_string(
private_key_string: str, passphrase: Optional[str] = None
) -> RSAPrivateKey:

if passphrase:
encoded_passphrase = passphrase.encode()
else:
encoded_passphrase = None

if private_key_string.startswith("-"):
return serialization.load_pem_private_key(
data=bytes(private_key_string, "utf-8"),
password=encoded_passphrase,
backend=default_backend(),
)
return serialization.load_der_private_key(
data=base64.b64decode(private_key_string),
password=encoded_passphrase,
backend=default_backend(),
)


@cache
def private_key_from_file(
private_key_path: str, passphrase: Optional[str] = None
) -> RSAPrivateKey:

if passphrase:
encoded_passphrase = passphrase.encode()
else:
encoded_passphrase = None

with open(private_key_path, "rb") as file:
private_key_bytes = file.read()

return serialization.load_pem_private_key(
data=private_key_bytes,
password=encoded_passphrase,
backend=default_backend(),
)
64 changes: 31 additions & 33 deletions dbt/adapters/snowflake/connections.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
import base64
import datetime
import os
import sys

if sys.version_info < (3, 9):
from functools import lru_cache

cache = lru_cache(maxsize=None)
else:
from functools import cache

import pytz
import re
Expand All @@ -11,8 +19,8 @@

from typing import Optional, Tuple, Union, Any, List, Iterable, TYPE_CHECKING

from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric.rsa import RSAPrivateKey
import requests
import snowflake.connector
import snowflake.connector.constants
Expand Down Expand Up @@ -46,6 +54,8 @@
from dbt_common.ui import line_wrap_message, warning_tag
from dbt.adapters.snowflake.record import SnowflakeRecordReplayHandle

from dbt.adapters.snowflake.auth import private_key_from_file, private_key_from_string

if TYPE_CHECKING:
import agate

Expand All @@ -65,6 +75,15 @@
}


@cache
def snowflake_private_key(private_key: RSAPrivateKey) -> bytes:
return private_key.private_bytes(
encoding=serialization.Encoding.DER,
format=serialization.PrivateFormat.PKCS8,
encryption_algorithm=serialization.NoEncryption(),
)


@dataclass
class SnowflakeAdapterResponse(AdapterResponse):
query_id: str = ""
Expand Down Expand Up @@ -96,6 +115,7 @@ class SnowflakeCredentials(Credentials):
retry_on_database_errors: bool = False
retry_all: bool = False
insecure_mode: Optional[bool] = False
# this needs to default to `None` so that we can tell if the user set it; see `__post_init__()`
reuse_connections: Optional[bool] = None

def __post_init__(self):
Expand Down Expand Up @@ -126,6 +146,11 @@ def __post_init__(self):

self.account = self.account.replace("_", "-")

# only default `reuse_connections` to `True` if the user has not turned on `client_session_keep_alive`
# having both of these set to `True` could lead to hanging open connections, so it should be opt-in behavior
if self.client_session_keep_alive is False and self.reuse_connections is None:
self.reuse_connections = True

@property
def type(self):
return "snowflake"
Expand Down Expand Up @@ -275,44 +300,17 @@ def _get_access_token(self) -> str:
)
return result_json["access_token"]

def _get_private_key(self):
def _get_private_key(self) -> Optional[bytes]:
"""Get Snowflake private key by path, from a Base64 encoded DER bytestring or None."""
if self.private_key and self.private_key_path:
raise DbtConfigError("Cannot specify both `private_key` and `private_key_path`")

if self.private_key_passphrase:
encoded_passphrase = self.private_key_passphrase.encode()
else:
encoded_passphrase = None

if self.private_key:
if self.private_key.startswith("-"):
p_key = serialization.load_pem_private_key(
data=bytes(self.private_key, "utf-8"),
password=encoded_passphrase,
backend=default_backend(),
)

else:
p_key = serialization.load_der_private_key(
data=base64.b64decode(self.private_key),
password=encoded_passphrase,
backend=default_backend(),
)

elif self.private_key:
private_key = private_key_from_string(self.private_key, self.private_key_passphrase)
elif self.private_key_path:
with open(self.private_key_path, "rb") as key:
p_key = serialization.load_pem_private_key(
key.read(), password=encoded_passphrase, backend=default_backend()
)
private_key = private_key_from_file(self.private_key_path, self.private_key_passphrase)
else:
return None

return p_key.private_bytes(
encoding=serialization.Encoding.DER,
format=serialization.PrivateFormat.PKCS8,
encryption_algorithm=serialization.NoEncryption(),
)
return snowflake_private_key(private_key)


class SnowflakeConnectionManager(SQLConnectionManager):
Expand Down
2 changes: 1 addition & 1 deletion dbt/adapters/snowflake/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def _show_object_metadata(self, relation: SnowflakeRelation) -> Optional[dict]:
def get_catalog_for_single_relation(
self, relation: SnowflakeRelation
) -> Optional[CatalogTable]:
object_metadata = self._show_object_metadata(relation)
object_metadata = self._show_object_metadata(relation.as_case_sensitive())

if not object_metadata:
return None
Expand Down
16 changes: 15 additions & 1 deletion dbt/adapters/snowflake/relation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
from typing import FrozenSet, Optional, Type

from dbt.adapters.base.relation import BaseRelation
from dbt.adapters.contracts.relation import ComponentName, RelationConfig
from dbt.adapters.relation_configs import (
RelationConfigBase,
RelationConfigChangeAction,
RelationResults,
)
from dbt.adapters.contracts.relation import RelationConfig
from dbt.adapters.utils import classproperty
from dbt_common.exceptions import DbtRuntimeError

Expand Down Expand Up @@ -106,3 +106,17 @@ def dynamic_table_config_changeset(
if config_change_collection.has_changes:
return config_change_collection
return None

def as_case_sensitive(self) -> "SnowflakeRelation":
path_part_map = {}

for path in ComponentName:
if self.include_policy.get_part(path):
part = self.path.get_part(path)
if part:
if self.quote_policy.get_part(path):
path_part_map[path] = part
else:
path_part_map[path] = part.upper()

return self.replace_path(**path_part_map)
2 changes: 1 addition & 1 deletion dbt/include/snowflake/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

{% macro snowflake__show_object_metadata(relation) %}
{%- set sql -%}
show objects like '{{ relation.identifier }}' in {{ relation.include(identifier=False) }} limit 1
show objects in {{ relation.include(identifier=False) }} starts with '{{ relation.identifier }}' limit 1
{%- endset -%}

{%- set result = run_query(sql) -%}
Expand Down
File renamed without changes.
26 changes: 26 additions & 0 deletions tests/functional/auth_tests/test_key_pair.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os

from dbt.tests.util import run_dbt
import pytest


class TestKeyPairAuth:
@pytest.fixture(scope="class", autouse=True)
def dbt_profile_target(self):
return {
"type": "snowflake",
"threads": 4,
"account": os.getenv("SNOWFLAKE_TEST_ACCOUNT"),
"user": os.getenv("SNOWFLAKE_TEST_USER"),
"private_key": os.getenv("SNOWFLAKE_TEST_PRIVATE_KEY"),
"private_key_passphrase": os.getenv("SNOWFLAKE_TEST_PRIVATE_KEY_PASSPHRASE"),
"database": os.getenv("SNOWFLAKE_TEST_DATABASE"),
"warehouse": os.getenv("SNOWFLAKE_TEST_WAREHOUSE"),
}

@pytest.fixture(scope="class")
def models(self):
return {"my_model.sql": "select 1 as id"}

def test_connection(self, project):
run_dbt()
File renamed without changes.
6 changes: 6 additions & 0 deletions tests/performance/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Performance testing

These tests are not meant to run on a regular basis; instead, they are tools for measuring performance impacts of changes as needed.
We often get requests for reducing processing times, researching why a particular component is taking longer to run than expected, etc.
In the past we have performed one-off analyses to address these requests and documented the results in the relevant PR (when a change is made).
It is more useful to document those analyses in the form of performance tests so that we can easily rerun the analysis at a later date.
Loading

0 comments on commit 8a92a5d

Please sign in to comment.