Run tests in parallel with pytest-xdist (PP-1212) (#1817)

* Run tests using pytest-xdist to speed up our test runs.
ThePalaceProject · May 3, 2024 · d642e9f · d642e9f
1 parent f3ad4c7
commit d642e9f
Show file tree

Hide file tree

Showing 32 changed files with 844 additions and 606 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -195,7 +195,7 @@ jobs:
           bash -c "
             source env/bin/activate &&
             poetry install --without ci --no-root --sync &&
-            pytest --no-cov tests/manager
+            pytest --no-cov tests
           "
         env:
           BUILD_CACHE_FROM: type=gha,scope=buildkit-${{ github.run_id }}

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -52,49 +52,7 @@ jobs:
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           files: ./coverage.xml
-          flags: manager
-          name: manager-${{ matrix.python-version }}
-          verbose: true
-
-  test-migrations:
-    name: Migration Tests
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-
-    steps:
-      - uses: actions/checkout@v4
-
-      # See comment here: https://github.com/actions/runner-images/issues/1187#issuecomment-686735760
-      - name: Disable network offload
-        run: sudo ethtool -K eth0 tx off rx off
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
-
-      - name: Install Poetry
-        uses: ./.github/actions/poetry
-        with:
-          cache: true
-
-      - name: Install Tox
-        run: |
-          poetry install --only ci --no-root
-        env:
-          POETRY_VIRTUALENVS_CREATE: false
-
-      - name: Run Migration Tests
-        run: tox -e "migration-docker"
-
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v4
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          files: ./coverage.xml
-          flags: migration
-          name: "migration-3.10"
+          name: test-${{ matrix.python-version }}
           verbose: true
 
   docker-test-migrations:

diff --git a/README.md b/README.md
@@ -636,20 +636,27 @@ tox -e "py310-docker"
 If you already have elastic search or postgres running locally, you can run them instead by setting the
 following environment variables:
 
-- `SIMPLIFIED_TEST_DATABASE`
+- `PALACE_TEST_DATABASE_URL`
 - `PALACE_TEST_SEARCH_URL`
 
 Make sure the ports and usernames are updated to reflect the local configuration.
 
 ```sh
 # Set environment variables
-export SIMPLIFIED_TEST_DATABASE="postgresql://simplified_test:test@localhost:9005/simplified_circulation_test"
-export SIMPLIFIED_TEST_OPENSEARCH="http://localhost:9200"
+export PALACE_TEST_DATABASE_URL="postgresql://simplified_test:test@localhost:9005/simplified_circulation_test"
+export PALACE_TEST_SEARCH_URL="http://localhost:9200"
 
 # Run tox
 tox -e "py310"
 ```
 
+The tests assume that they have permission to create and drop databases. They connect to the
+provided database URL and create a new database for each test run. If the user does not have permission
+to create and drop databases, the tests will fail. You can disable this behavior by setting the
+`PALACE_TEST_DATABASE_CREATE_DATABASE` environment variable to `false`.
+
+```sh
+
 ### Override `pytest` arguments
 
 If you wish to pass additional arguments to `pytest` you can do so through `tox`. Every argument passed after a `--` to
@@ -752,16 +759,10 @@ This profiler uses [PyInstrument](https://pyinstrument.readthedocs.io/en/latest/
 PyInstrument can also be used to profile the test suite. This can be useful to identify slow tests, or to identify
 performance regressions.
 
-To profile the core test suite, run the following command:
-
-```sh
-pyinstrument -m pytest --no-cov tests/core/
-```
-
-To profile the API test suite, run the following command:
+To profile the test suite, run the following command:
 
 ```sh
-pyinstrument -m pytest --no-cov tests/api/
+pyinstrument -m pytest --no-cov -n 0 tests
 ```
 
 #### Environment Variables

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -22,7 +22,7 @@ x-cm-variables: &cm
     PALACE_CELERY_CLOUDWATCH_STATISTICS_DRYRUN: "true"
 
     # Set up the environment variables used for testing as well
-    SIMPLIFIED_TEST_DATABASE: "postgresql://palace:test@pg:5432/circ"
+    PALACE_TEST_DATABASE_URL: "postgresql://palace:test@pg:5432/circ"
     PALACE_TEST_SEARCH_URL: "http://os:9200"
     PALACE_TEST_MINIO_ENDPOINT_URL: "http://minio:9000"
     PALACE_TEST_MINIO_USER: "palace"

diff --git a/docker/README.md b/docker/README.md
@@ -86,7 +86,7 @@ Environment variables can be set with the `-e VARIABLE_KEY='variable_value'` opt
 
 *Required.* The URL of the production PostgreSQL database for the application.
 
-### `SIMPLIFIED_TEST_DATABASE`
+### `PALACE_TEST_DATABASE_URL`
 
 *Optional.* The URL of a PostgreSQL database for tests. This optional variable allows unit tests to be run in the
 container.

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,11 +9,14 @@ exclude_also = [
     '^\s*pass\s*$',
     '^\s*raise NotImplementedError\s*$',
 ]
+include_namespace_packages = true
 
 [tool.coverage.run]
 branch = true
+concurrency = ["multiprocessing", "thread"]
+parallel = true
 relative_files = true
-source = ["palace.manager"]
+source = ["src"]
 
 [tool.isort]
 known_first_party = ["palace"]
@@ -286,6 +289,7 @@ pytest-alembic = "^0.11.0"
 pytest-celery = "^0.0.0"
 pytest-cov = "^5.0.0"
 pytest-timeout = "*"
+pytest-xdist = "^3.5.0"
 requests-mock = "1.12.1"
 types-aws-xray-sdk = "^2.11.0.13"
 types-Flask-Cors = "^4.0.0"
@@ -306,6 +310,8 @@ psycopg2 = "~2.9.5"
 addopts = [
     "--cov",
     "--cov-report=xml",
+    "--dist=worksteal",
+    "--numprocesses=auto",
     "--strict-markers",
 ]
 markers = [

diff --git a/src/palace/manager/core/config.py b/src/palace/manager/core/config.py
@@ -41,7 +41,6 @@ class Configuration(ConfigurationConstants):
     log = logging.getLogger("Configuration file loader")
 
     # Environment variables that contain URLs to the database
-    DATABASE_TEST_ENVIRONMENT_VARIABLE = "SIMPLIFIED_TEST_DATABASE"
     DATABASE_PRODUCTION_ENVIRONMENT_VARIABLE = "SIMPLIFIED_PRODUCTION_DATABASE"
 
     # Environment variable for Overdrive fulfillment keys
@@ -72,46 +71,25 @@ class Configuration(ConfigurationConstants):
 
     @classmethod
     def database_url(cls):
-        """Find the database URL configured for this site.
-
-        For compatibility with old configurations, we will look in the
-        site configuration first.
-
-        If it's not there, we will look in the appropriate environment
-        variable.
-        """
-
-        # To avoid expensive mistakes, test and production databases
-        # are always configured with separate keys. The TESTING variable
-        # controls which database is used, and it's set by the
-        # package_setup() function called in every component's
-        # tests/__init__.py.
-        test = os.environ.get("TESTING", False)
-        if test:
-            environment_variable = cls.DATABASE_TEST_ENVIRONMENT_VARIABLE
-        else:
-            environment_variable = cls.DATABASE_PRODUCTION_ENVIRONMENT_VARIABLE
-
-        url = os.environ.get(environment_variable)
+        """Find the database URL configured for this site."""
+        url = os.environ.get(cls.DATABASE_PRODUCTION_ENVIRONMENT_VARIABLE)
         if not url:
             raise CannotLoadConfiguration(
                 "Database URL was not defined in environment variable (%s)."
-                % environment_variable
+                % cls.DATABASE_PRODUCTION_ENVIRONMENT_VARIABLE
             )
 
-        url_obj = None
         try:
             url_obj = make_url(url)
         except ArgumentError as e:
-            # Improve the error message by giving a guide as to what's
-            # likely to work.
+            # Improve the error message by giving a guide as to what's likely to work.
             raise ArgumentError(
-                "Bad format for database URL (%s). Expected something like postgresql://[username]:[password]@[hostname]:[port]/[database name]"
-                % url
+                "Bad format for database URL. Expected something like postgresql://[username]:[password]@[hostname]:[port]/[database name]"
             )
 
-        # Calling __to_string__ will hide the password.
-        logging.info("Connecting to database: %s" % url_obj.__to_string__())
+        logging.info(
+            "Connecting to database: %s" % url_obj.render_as_string(hide_password=True)
+        )
         return url
 
     @classmethod

diff --git a/src/palace/manager/scripts.py b/src/palace/manager/scripts.py
@@ -4,15 +4,15 @@
 import os
 import sys
 import time
-from collections.abc import Sequence
+from collections.abc import Callable, Sequence
 from datetime import timedelta
 from pathlib import Path
 from typing import Any
 
 from alembic import command, config
 from alembic.util import CommandError
 from sqlalchemy import inspect, select
-from sqlalchemy.engine import Connection
+from sqlalchemy.engine import Connection, Engine
 from sqlalchemy.exc import NoResultFound
 from sqlalchemy.orm import Session
 
@@ -486,14 +486,20 @@ class InstanceInitializationScript:
     remain idempotent.
     """
 
-    def __init__(self, config_file: Path | None = None) -> None:
+    def __init__(
+        self,
+        config_file: Path | None = None,
+        engine_factory: Callable[[], Engine] = SessionManager.engine,
+    ) -> None:
         self._log: logging.Logger | None = None
         self._container = container_instance()
 
         # Call init_resources() to initialize the logging configuration.
         self._container.init_resources()
         self._config_file = config_file
 
+        self._engine_factory = engine_factory
+
     @property
     def log(self) -> logging.Logger:
         if self._log is None:
@@ -568,7 +574,7 @@ def run(self) -> None:
         instance of the script is running at a time. This prevents multiple
         instances from trying to initialize the database at the same time.
         """
-        engine = SessionManager.engine()
+        engine = self._engine_factory()
         with engine.begin() as connection:
             with pg_advisory_lock(connection, LOCK_ID_DB_INIT):
                 self.initialize(connection)

diff --git a/src/palace/manager/search/service.py b/src/palace/manager/search/service.py
@@ -110,10 +110,6 @@ def read_pointer_set_empty(self) -> None:
     def index_create(self, revision: SearchSchemaRevision) -> None:
         """Atomically create an index for the given base name and revision."""
 
-    @abstractmethod
-    def indexes_created(self) -> list[str]:
-        """A log of all the indexes that have been created by this client service."""
-
     @abstractmethod
     def index_is_populated(self, revision: SearchSchemaRevision) -> bool:
         """Return True if the index for the given base name and revision has been populated."""
@@ -172,7 +168,6 @@ def __init__(self, client: OpenSearch, base_revision_name: str):
         self._search = Search(using=self._client)
         self._base_revision_name = base_revision_name
         self._multi_search = MultiSearch(using=self._client)
-        self._indexes_created: list[str] = []
 
         # Documents are not allowed to automatically create indexes.
         # AWS OpenSearch only accepts the "flat" format
@@ -184,9 +179,6 @@ def __init__(self, client: OpenSearch, base_revision_name: str):
     def base_revision_name(self) -> str:
         return self._base_revision_name
 
-    def indexes_created(self) -> list[str]:
-        return self._indexes_created
-
     def write_pointer(self) -> SearchWritePointer | None:
         try:
             result: dict = self._client.indices.get_alias(
@@ -207,7 +199,6 @@ def create_empty_index(self) -> None:
             index_name = self._empty(self.base_revision_name)
             self._logger.debug(f"creating empty index {index_name}")
             self._client.indices.create(index=index_name)
-            self._indexes_created.append(index_name)
         except RequestError as e:
             if e.error == "resource_already_exists_exception":
                 return
@@ -261,7 +252,6 @@ def index_create(self, revision: SearchSchemaRevision) -> None:
                 index=index_name,
                 body=revision.mapping_document().serialize(),
             )
-            self._indexes_created.append(index_name)
         except RequestError as e:
             if e.error == "resource_already_exists_exception":
                 return