diff --git a/.changes/unreleased/Under the Hood-20240716-172442.yaml b/.changes/unreleased/Under the Hood-20240716-172442.yaml new file mode 100644 index 00000000..8777edbb --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240716-172442.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Add support for experimental record/replay testing. +time: 2024-07-16T17:24:42.271859-04:00 +custom: + Author: peterallenwebb + Issue: "123" diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index d7367190..e3390fbd 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -91,7 +91,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Run integration tests - run: hatch run integration-tests:all + run: hatch run integration-tests env: POSTGRES_TEST_HOST: localhost POSTGRES_TEST_PORT: 5432 diff --git a/.github/workflows/release_prep_hatch.yml b/.github/workflows/release_prep_hatch.yml index 8e9ded04..ab343b45 100644 --- a/.github/workflows/release_prep_hatch.yml +++ b/.github/workflows/release_prep_hatch.yml @@ -31,7 +31,7 @@ # 1. Bump the version if it has not been bumped # 2. Generate the changelog (via changie) if there is no markdown file for this version name: "Release prep" -run-name: "Release prep: Generate changelog and bump ${{ inputs.package }} to ${{ inputs.version }} for release to ${{ inputs.deploy-to }}" +run-name: "Release prep: Generate changelog and bump to ${{ inputs.version }} for release to ${{ inputs.deploy-to }}" on: workflow_call: inputs: @@ -342,7 +342,7 @@ jobs: uses: dbt-labs/dbt-adapters/.github/actions/setup-hatch@main - name: "Run unit tests" - run: hatch run unit-tests:all + run: hatch run unit-tests integration-tests: runs-on: ubuntu-latest @@ -387,7 +387,7 @@ jobs: uses: dbt-labs/dbt-adapters/.github/actions/setup-hatch@main - name: "Run integration tests" - run: hatch run integration-tests:all + run: hatch run integration-tests env: POSTGRES_TEST_HOST: localhost POSTGRES_TEST_PORT: 5432 @@ -464,6 +464,6 @@ jobs: run: echo "sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT # if this is a real release and a release branch was created, delete it - - name: "Delete release branch: ${{ needs.branch.outputs.name }}" + - name: "Delete release branch: ${{ needs.release-branch.outputs.name }}" if: ${{ inputs.deploy-to == 'prod' && inputs.is-nightly-release == 'false' && needs.release-branch.outputs.name != '' }} - run: git push origin -d ${{ needs.branch.outputs.name }} + run: git push origin -d ${{ needs.release-branch.outputs.name }} diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 91d8c578..e171ff78 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -37,5 +37,5 @@ jobs: python-version: ${{ matrix.python-version }} - name: Run unit tests - run: hatch run unit-tests:all + run: hatch run unit-tests shell: bash diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 427d2de5..9d925156 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -66,26 +66,42 @@ Rather than forking `dbt-labs/dbt-postgres`, use `dbt-labs/dbt-postgres` directl ### Installation -1. Ensure the latest version of `pip` is installed: +1. Ensure the latest versions of `pip` and `hatch` are installed: ```shell - pip install --upgrade pip + pip install --user --upgrade pip hatch ``` -2. Configure and activate a virtual environment using `virtualenv` as described in -[Setting up an environment](https://github.com/dbt-labs/dbt-core/blob/HEAD/CONTRIBUTING.md#setting-up-an-environment) -3. Install `dbt-postgres` and development dependencies in the virtual environment +2. This step is optional, but it's recommended. Configure `hatch` to create its virtual environments in the project. Add this block to your `hatch` `config.toml` file: + ```toml + # MacOS: ~/Library/Application Support/hatch/config.toml + [dirs.env] + virtual = ".hatch" + ``` + This makes `hatch` create all virtual environments in the project root inside of the directory `/.hatch`, similar to `/.tox` for `tox`. + It also makes it easier to add this environment as a runner in common IDEs like VSCode and PyCharm. +3. Create a `hatch` environment with all of the development dependencies and activate it: + ```shell + hatch run setup + hatch shell + ``` +4. Run any commands within the virtual environment by prefixing the command with `hatch run`: ```shell - pip install -e .[dev] + hatch run ``` When `dbt-postgres` is installed this way, any changes made to the `dbt-postgres` source code will be reflected in the virtual environment immediately. - ## Testing -`dbt-postgres` contains [unit](https://github.com/dbt-labs/dbt-postgres/tree/main/tests/unit) -and [functional](https://github.com/dbt-labs/dbt-postgres/tree/main/tests/functional) tests. +`dbt-postgres` contains [code quality checks](https://github.com/dbt-labs/dbt-postgres/tree/main/.pre-commit-config.yaml), [unit tests](https://github.com/dbt-labs/dbt-postgres/tree/main/tests/unit), +and [functional tests](https://github.com/dbt-labs/dbt-postgres/tree/main/tests/functional). + +### Code quality +Code quality checks can run with a single command: +```shell +hatch run code-quality +``` ### Unit tests @@ -94,10 +110,14 @@ Unit tests can be run locally without setting up a database connection: ```shell # Note: replace $strings with valid names +# run all unit tests +hatch run unit-test + # run all unit tests in a module -python -m pytest tests/unit/$test_file_name.py +hatch run unit-tests tests/unit/$test_file_name.py + # run a specific unit test -python -m pytest tests/unit/$test_file_name.py::$test_class_name::$test_method_name +hatch run unit-tests tests/unit/$test_file_name.py::$test_class_name::$test_method_name ``` ### Functional tests @@ -120,16 +140,45 @@ Functional tests can be run locally with a valid database connection configured ```shell # Note: replace $strings with valid names +# run all functional tests +hatch run integration-tests + # run all functional tests in a directory -python -m pytest tests/functional/$test_directory +hatch run integration-tests tests/functional/$test_directory + # run all functional tests in a module -python -m pytest tests/functional/$test_dir_and_filename.py +hatch run integration-tests tests/functional/$test_directory/$test_filename.py + # run all functional tests in a class -python -m pytest tests/functional/$test_dir_and_filename.py::$test_class_name +hatch run integration-tests tests/functional/$test_directory/$test_filename.py::$test_class_name + # run a specific functional test -python -m pytest tests/functional/$test_dir_and_filename.py::$test_class_name::$test__method_name +hatch run integration-tests tests/functional/$test_directory/$test_filename.py::$test_class_name::$test__method_name +``` + +### Testing against a development branch + +Some changes require a change in `dbt-common` and/or `dbt-adapters`. +In that case, the dependency on `dbt-common` and/or `dbt-adapters` must be updated to point to the development branch. For example: + +```toml +[tool.hatch.envs.default] +dependencies = [ + "dbt-common @ git+https://github.com/dbt-labs/dbt-common.git@my-dev-branch", + "dbt-adapters @ git+https://github.com/dbt-labs/dbt-adapters.git@my-dev-branch", + "dbt-tests-adapter @ git+https://github.com/dbt-labs/dbt-adapters.git@my-dev-branch#subdirectory=dbt-tests-adapter", + ..., +] ``` +This will install `dbt-common`/`dbt-adapters`/`dbt-tests-adapter` as snapshots. In other words, if `my-dev-branch` is updated on GitHub, those updates will not be reflected locally. +In order to pick up those updates, the `hatch` environment(s) will need to be rebuilt: + +```shell +exit +hatch env prune +hatch shell +``` ## Documentation diff --git a/dbt/adapters/postgres/connections.py b/dbt/adapters/postgres/connections.py index 83f26957..e8f0abe5 100644 --- a/dbt/adapters/postgres/connections.py +++ b/dbt/adapters/postgres/connections.py @@ -5,10 +5,12 @@ from dbt.adapters.contracts.connection import AdapterResponse, Credentials from dbt.adapters.events.logging import AdapterLogger from dbt.adapters.events.types import TypeCodeNotFound +from dbt.adapters.postgres.record import PostgresRecordReplayHandle from dbt.adapters.sql import SQLConnectionManager from dbt_common.exceptions import DbtDatabaseError, DbtRuntimeError from dbt_common.events.functions import warn_or_error from dbt_common.helper_types import Port +from dbt_common.record import get_record_mode_from_env, RecorderMode from mashumaro.jsonschema.annotations import Maximum, Minimum import psycopg2 from typing_extensions import Annotated @@ -132,17 +134,31 @@ def open(cls, connection): kwargs["application_name"] = credentials.application_name def connect(): - handle = psycopg2.connect( - dbname=credentials.database, - user=credentials.user, - host=credentials.host, - password=credentials.password, - port=credentials.port, - connect_timeout=credentials.connect_timeout, - **kwargs, - ) + handle = None + + # In replay mode, we won't connect to a real database at all, while + # in record and diff modes we do, but insert an intermediate handle + # object which monitors native connection activity. + rec_mode = get_record_mode_from_env() + if rec_mode != RecorderMode.REPLAY: + handle = psycopg2.connect( + dbname=credentials.database, + user=credentials.user, + host=credentials.host, + password=credentials.password, + port=credentials.port, + connect_timeout=credentials.connect_timeout, + **kwargs, + ) + + if rec_mode is not None: + # If using the record/replay mechanism, regardless of mode, we + # use a wrapper. + handle = PostgresRecordReplayHandle(handle, connection) + if credentials.role: handle.cursor().execute("set role {}".format(credentials.role)) + return handle retryable_exceptions = [ diff --git a/dbt/adapters/postgres/record/__init__.py b/dbt/adapters/postgres/record/__init__.py new file mode 100644 index 00000000..9b8b9b3a --- /dev/null +++ b/dbt/adapters/postgres/record/__init__.py @@ -0,0 +1,2 @@ +from dbt.adapters.postgres.record.cursor.cursor import PostgresRecordReplayCursor +from dbt.adapters.postgres.record.handle import PostgresRecordReplayHandle diff --git a/dbt/adapters/postgres/record/cursor/cursor.py b/dbt/adapters/postgres/record/cursor/cursor.py new file mode 100644 index 00000000..a096c9f5 --- /dev/null +++ b/dbt/adapters/postgres/record/cursor/cursor.py @@ -0,0 +1,15 @@ +from dbt_common.record import record_function + +from dbt.adapters.record import RecordReplayCursor + +from dbt.adapters.postgres.record.cursor.status import CursorGetStatusMessageRecord + + +class PostgresRecordReplayCursor(RecordReplayCursor): + """A custom extension of RecordReplayCursor that adds the statusmessage + property which is specific to psycopg.""" + + @property + @record_function(CursorGetStatusMessageRecord, method=True, id_field_name="connection_name") + def statusmessage(self): + return self.native_cursor.statusmessage diff --git a/dbt/adapters/postgres/record/cursor/status.py b/dbt/adapters/postgres/record/cursor/status.py new file mode 100644 index 00000000..1e8d9620 --- /dev/null +++ b/dbt/adapters/postgres/record/cursor/status.py @@ -0,0 +1,21 @@ +import dataclasses +from typing import Optional + +from dbt_common.record import Record, Recorder + + +@dataclasses.dataclass +class CursorGetStatusMessageParams: + connection_name: str + + +@dataclasses.dataclass +class CursorGetStatusMessageResult: + msg: Optional[str] + + +@Recorder.register_record_type +class CursorGetStatusMessageRecord(Record): + params_cls = CursorGetStatusMessageParams + result_cls = CursorGetStatusMessageResult + group = "Database" diff --git a/dbt/adapters/postgres/record/handle.py b/dbt/adapters/postgres/record/handle.py new file mode 100644 index 00000000..119dc2f1 --- /dev/null +++ b/dbt/adapters/postgres/record/handle.py @@ -0,0 +1,12 @@ +from dbt.adapters.record import RecordReplayHandle + +from dbt.adapters.postgres.record.cursor.cursor import PostgresRecordReplayCursor + + +class PostgresRecordReplayHandle(RecordReplayHandle): + """A custom extension of RecordReplayHandle that returns + a psycopg-specific PostgresRecordReplayCursor object.""" + + def cursor(self): + cursor = None if self.native_handle is None else self.native_handle.cursor() + return PostgresRecordReplayCursor(cursor, self.connection) diff --git a/pyproject.toml b/pyproject.toml index c2305f16..fd3ddcc0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,6 @@ dependencies = [ "dbt-common>=0.1.0a1,<2.0", "agate>=1.0,<2.0", ] - [project.urls] Homepage = "https://github.com/dbt-labs/dbt-postgres" Documentation = "https://docs.getdbt.com" @@ -56,6 +55,8 @@ path = "dbt/adapters/postgres/__version__.py" dependencies = [ "dbt-adapters @ git+https://github.com/dbt-labs/dbt-adapters.git@mcknight/ct-2819", "dbt-common @ git+https://github.com/dbt-labs/dbt-common.git", + "dbt-tests-adapter @ git+https://github.com/dbt-labs/dbt-adapters.git#subdirectory=dbt-tests-adapter", + "dbt-core @ git+https://github.com/dbt-labs/dbt-core.git#subdirectory=core", 'pre-commit==3.7.0;python_version>="3.9"', 'pre-commit==3.5.0;python_version=="3.8"', ] @@ -89,11 +90,21 @@ extra-dependencies = [ "dbt-tests-adapter @ git+https://github.com/dbt-labs/dbt-adapters.git@mcknight/ct-2819#subdirectory=dbt-tests-adapter", ] [tool.hatch.envs.integration-tests.env-vars] +[tool.hatch.envs.default.env-vars] DBT_TEST_USER_1 = "dbt_test_user_1" DBT_TEST_USER_2 = "dbt_test_user_2" DBT_TEST_USER_3 = "dbt_test_user_3" -[tool.hatch.envs.integration-tests.scripts] -all = "python -m pytest {args:tests/functional}" +[tool.hatch.envs.default.scripts] +setup = "pre-commit install" +code-quality = "pre-commit run --all-files" +unit-tests = "python -m pytest {args:tests/unit}" +integration-tests = "python -m pytest {args:tests/functional}" +docker-dev = [ + "echo Does not support integration testing, only development and unit testing. See issue https://github.com/dbt-labs/dbt-postgres/issues/99", + "docker build -f docker/dev.Dockerfile -t dbt-postgres-dev .", + "docker run --rm -it --name dbt-postgres-dev -v $(pwd):/opt/code dbt-postgres-dev", +] +docker-prod = "docker build -f docker/Dockerfile -t dbt-postgres ." [tool.hatch.envs.build] detached = true