Skip to content

Commit

Permalink
Make CLI tests work with parallel test execution
Browse files Browse the repository at this point in the history
We use pytest-xdist for parallel test execution, which means multiple
worker threads fire up tests which are loosely grouped by class/module.

Unfortunately, there are no guarantees around any of this, so the CLI
tests, which are all in one module, may execute on different workers.
We had a bad fixture layout on the CLI tests which would touch a file
in the CLI directory and then unlink it after the test was done. If
every test ran on the same worker this was fine - only one test would
ever be accessing this path location at a time. But when the tests get
split across workers bad things happen with FileNotFound and things
of that nature.

This change moves us off of having multiple tests creating and
destroying the same file in the same file path location, instead
relying on a fixed file that already exists in a static location in
the repository.
  • Loading branch information
tlento committed Oct 31, 2023
1 parent e62ca4c commit f9b627f
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 15 deletions.
35 changes: 29 additions & 6 deletions metricflow/test/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from dbt_semantic_interfaces.parsing.objects import YamlConfigFile
from dbt_semantic_interfaces.test_utils import base_semantic_manifest_file

from metricflow.cli.cli_context import CLIContext
from metricflow.cli.main import (
dimension_values,
dimensions,
Expand Down Expand Up @@ -76,7 +77,19 @@ def create_directory(directory_path: str) -> Iterator[None]:
shutil.rmtree(path)


def test_validate_configs(cli_runner: MetricFlowCliRunner) -> None: # noqa: D
def test_validate_configs(cli_context: CLIContext) -> None:
"""Tests config validation from a manifest stored on the filesystem.
This test is special, because the CLI bypasses the semantic manifest read into the CLIContext and
validates the config files on disk. It's not entirely clear why we do this, so we should probably
figure that out and, if possible, stop doing it so we can have this test depend on an injectable
in-memory semantic manifest instead of whatever is stored in the filesystem.
At any rate, due to the direct read from disk, we have to store a serialized semantic manifest
in a temporary location. In order to spin up the CLI this requires us to ALSO have a dbt_project.yml
on the filesystem in the project path. Since we don't want to clobber whatever semantic_manifest.json is
in the real filesystem location we do all of this stuff here.
"""
yaml_contents = textwrap.dedent(
"""\
semantic_model:
Expand All @@ -98,16 +111,26 @@ def test_validate_configs(cli_runner: MetricFlowCliRunner) -> None: # noqa: D
apply_transformations=False,
).semantic_manifest

target_directory = Path().absolute() / "target"
with create_directory(target_directory.as_posix()):
manifest_file = target_directory / "semantic_manifest.json"
manifest_file.write_text(manifest.json())
project_directory = Path().absolute()
# If the dbt_project.yml doesn't exist in this path location the CLI will throw an exception.
dummy_project = Path(project_directory, "dbt_project.yml")
dummy_project.touch()

resp = cli_runner.run(validate_configs)
try:
cli_runner = MetricFlowCliRunner(cli_context=cli_context, project_path=str(project_directory))
target_directory = Path(project_directory, "target")
with create_directory(target_directory.as_posix()):
manifest_file = Path(target_directory, "semantic_manifest.json")
manifest_file.write_text(manifest.json())

resp = cli_runner.run(validate_configs)

assert "ERROR" in resp.output
assert resp.exit_code == 0

finally:
dummy_project.unlink()


def test_health_checks(cli_runner: MetricFlowCliRunner) -> None: # noqa: D
resp = cli_runner.run(health_checks)
Expand Down
14 changes: 8 additions & 6 deletions metricflow/test/fixtures/cli_fixtures.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import os
import pathlib
import tempfile
from typing import Generator, Optional, Sequence
Expand All @@ -17,6 +18,7 @@
from metricflow.model.semantic_manifest_lookup import SemanticManifestLookup
from metricflow.plan_conversion.column_resolver import DunderColumnAssociationResolver
from metricflow.protocols.sql_client import SqlClient
from metricflow.test.fixtures.setup_fixtures import dbt_project_dir
from metricflow.test.time.configurable_time_source import ConfigurableTimeSource


Expand Down Expand Up @@ -99,19 +101,19 @@ def cli_context( # noqa: D
class MetricFlowCliRunner(CliRunner):
"""Custom CliRunner class to handle passing context."""

def __init__(self, cli_context: CLIContext) -> None: # noqa: D
def __init__(self, cli_context: CLIContext, project_path: str) -> None: # noqa: D
self.cli_context = cli_context
self.project_path = project_path
super().__init__()

def run(self, cli: click.BaseCommand, args: Optional[Sequence[str]] = None) -> Result: # noqa: D
# TODO: configure CLI to use a dbt_project fixture
dummy_dbt_project = pathlib.Path("dbt_project.yml")
dummy_dbt_project.touch()
current_dir = os.getcwd()
os.chdir(self.project_path)
result = super().invoke(cli, args, obj=self.cli_context)
dummy_dbt_project.unlink()
os.chdir(current_dir)
return result


@pytest.fixture
def cli_runner(cli_context: CLIContext) -> MetricFlowCliRunner: # noqa: D
return MetricFlowCliRunner(cli_context=cli_context)
return MetricFlowCliRunner(cli_context=cli_context, project_path=dbt_project_dir())
9 changes: 9 additions & 0 deletions metricflow/test/fixtures/setup_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import warnings
from dataclasses import dataclass
from pathlib import Path

import _pytest.config
import pytest
Expand Down Expand Up @@ -170,6 +171,14 @@ def dialect_from_url(url: str) -> SqlDialect:
return SqlDialect(dialect_protocol[0])


def dbt_project_dir() -> str:
"""Return the canonical path string for the dbt project dir in the test package.
This is necessary for configuring both the dbt adapter for integration tests and the project location for CLI tests.
"""
return os.path.join(os.path.dirname(__file__), Path("dbt_projects", "metricflow_testing"))


@pytest.fixture(scope="session", autouse=True)
def warn_user_about_slow_tests_without_parallelism( # noqa: D
request: FixtureRequest,
Expand Down
5 changes: 2 additions & 3 deletions metricflow/test/fixtures/sql_client_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from dbt.cli.main import dbtRunner

from metricflow.protocols.sql_client import SqlClient
from metricflow.test.fixtures.setup_fixtures import MetricFlowTestSessionState, dialect_from_url
from metricflow.test.fixtures.setup_fixtures import MetricFlowTestSessionState, dbt_project_dir, dialect_from_url
from metricflow.test.fixtures.sql_clients.adapter_backed_ddl_client import AdapterBackedDDLSqlClient
from metricflow.test.fixtures.sql_clients.common_client import SqlDialect
from metricflow.test.fixtures.sql_clients.ddl_sql_client import SqlClientWithDDLMethods
Expand Down Expand Up @@ -127,8 +127,7 @@ def __initialize_dbt() -> None:
We use the debug command to initialize the profile and make it accessible. This has the nice property of
triggering a failure with reasonable error messages in the event the dbt configs are not set up correctly.
"""
dbt_dir = os.path.join(os.path.dirname(__file__), "dbt_projects/metricflow_testing/")
dbtRunner().invoke(["debug"], project_dir=dbt_dir, PROFILES_DIR=dbt_dir)
dbtRunner().invoke(["debug"], project_dir=dbt_project_dir(), PROFILES_DIR=dbt_project_dir())


def make_test_sql_client(url: str, password: str, schema: str) -> SqlClientWithDDLMethods:
Expand Down

0 comments on commit f9b627f

Please sign in to comment.