diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..53e1a23 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,50 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Continuous Integration + +on: + push: + branches: [ "master", "main" ] + pull_request: + branches: [ "master", "main" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.12"] + env: + PYTHON_VERSION: ${{ matrix.python-version }} + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install mongodb + run: | + sudo apt install -y gnupg curl + curl -fsSL https://www.mongodb.org/static/pgp/server-7.0.asc | sudo gpg -o /usr/share/keyrings/mongodb-server-7.0.gpg --dearmor + echo "deb [ arch=amd64,arm64 signed-by=/usr/share/keyrings/mongodb-server-7.0.gpg ] https://repo.mongodb.org/apt/ubuntu jammy/mongodb-org/7.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-7.0.list + sudo apt-get update + sudo apt install -y mongodb-org + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install ruff pytest pytest-lazy-fixtures pytest-asyncio pytest-cov + python -m pip install -e . + - name: Test with pytest + run: | + pytest --asyncio-mode=auto --cov=trolldb --cov-report=xml + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4.0.1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + slug: pytroll/pytroll-db + file: ./coverage.xml + env_vars: PYTHON_VERSION diff --git a/.github/workflows/deploy-sdist.yaml b/.github/workflows/deploy-sdist.yaml new file mode 100644 index 0000000..1499397 --- /dev/null +++ b/.github/workflows/deploy-sdist.yaml @@ -0,0 +1,27 @@ +name: Deploy sdist + +on: + release: + types: + - published + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout source + uses: actions/checkout@v4 + + - name: Create sdist + shell: bash -l {0} + run: | + python -m pip install -q build + python -m build -s + + - name: Publish package to PyPI + if: github.event.action == 'published' + uses: pypa/gh-action-pypi-publish@v1.8.14 + with: + user: __token__ + password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml deleted file mode 100644 index db12d51..0000000 --- a/.github/workflows/pylint.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Pylint - -on: [push] - -jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.10", "3.11", "3.12"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pylint - - name: Analysing the code with pylint - run: | - pylint $(git ls-files '*.py') diff --git a/.gitignore b/.gitignore index 219af40..45291c7 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,21 @@ coverage.xml # Django stuff: *.log *.pot +log # Sphinx documentation -docs/_build/ +docs/build/ +*.rst +!index.rst +*.doctree +*.pickle + +# the actual config file [HAS TO BE ALWAYS EXCLUDED!] +config.yaml +config.yml + +# temp log and storage for the test database +__temp* + +# version file +version.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..867cafe --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,15 @@ +exclude: '^$' +fail_fast: false + +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: 'v0.3.7' + hooks: + - id: ruff + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: no-commit-to-branch diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..0e20fea --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,23 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/source/conf.py + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: docs/requirements.txt diff --git a/LICENSE b/LICENSE index 70566f2..ef7e7ef 100644 --- a/LICENSE +++ b/LICENSE @@ -671,4 +671,4 @@ into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read -. \ No newline at end of file +. diff --git a/README.rst b/README.rst index 707024b..f489eda 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,33 @@ -doobie -====== +The database interface of `Pytroll `_ -Database interface for pytroll + +.. image:: https://results.pre-commit.ci/badge/github/pytroll/pytroll-db/master.svg + :target: https://results.pre-commit.ci/latest/github/pytroll/pytroll-db/master + :alt: pre-commit.ci status + + +Copyright (C) + 2012, 2014, 2015, 2024 + + Martin Raspaud, Pouria Khalaj, Esben S. Nielsen, Adam Dybbroe, Kristian Rune Larsen + + +Authors + - Martin Raspaud + - Pouria Khalaj + - Esben S. Nielsen + - Adam Dybbroe + - Kristian Rune Larsen + + +License + Consult the `LICENSE` file which is included as a part of this package. + + +Disclaimer + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have + received a copy of the GNU General Public License + along with this program. If not, see . diff --git a/bin/pytroll-mongo.py b/bin/pytroll-mongo.py deleted file mode 100644 index 0e6a10d..0000000 --- a/bin/pytroll-mongo.py +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -# Copyright (c) 2019 Martin Raspaud - -# Author(s): - -# Martin Raspaud - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -from posttroll.subscriber import Subscribe -import logging -from threading import Thread -import yaml -import os - -from pymongo import MongoClient -logger = logging.getLogger(__name__) - - -class MongoRecorder: - """A recorder for posttroll file messages.""" - - def __init__(self, - mongo_uri="mongodb://localhost:27017", - db_name='sat_db'): - """Init the recorder.""" - self.db = MongoClient(mongo_uri)[db_name] - self.loop = True - self._recorder = Thread(target=self.record) - - def start(self): - """Start the recording.""" - self._recorder.start() - - def insert_files(self, msg): - """Insert files in the database.""" - self.db.files.insert_one(msg.data) - - def record(self): - """Log stuff.""" - try: - with Subscribe("", addr_listener=True) as sub: - for msg in sub.recv(timeout=1): - if msg: - logger.debug("got msg %s", str(msg)) - if msg.type in ['collection', 'file', 'dataset']: - self.insert_files(msg) - if not self.loop: - logger.info("Stop recording") - break - except Exception: - logger.exception("Something went wrong in record") - raise - - def stop(self): - """Stop the machine.""" - self.loop = False - - -log_levels = { - 0: logging.WARN, - 1: logging.INFO, - 2: logging.DEBUG, -} - - -def setup_logging(cmd_args): - """Set up logging.""" - if cmd_args.log_config is not None: - with open(cmd_args.log_config) as fd: - log_dict = yaml.safe_load(fd.read()) - logging.config.dictConfig(log_dict) - return - - root = logging.getLogger('') - root.setLevel(log_levels[cmd_args.verbosity]) - - if cmd_args.log: - fh_ = logging.handlers.TimedRotatingFileHandler( - os.path.join(cmd_args.log), - "midnight", - backupCount=7) - else: - fh_ = logging.StreamHandler() - - formatter = logging.Formatter(LOG_FORMAT) - fh_.setFormatter(formatter) - - root.addHandler(fh_) - - -LOG_FORMAT = "[%(asctime)s %(name)s %(levelname)s] %(message)s" - -if __name__ == '__main__': - import time - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument("-d", "--database", - help="URI to the mongo database (default mongodb://localhost:27017 ).", - default="mongodb://localhost:27017") - parser.add_argument("-l", "--log", - help="The file to log to. stdout otherwise.") - parser.add_argument("-c", "--log-config", - help="Log config file to use instead of the standard logging.") - parser.add_argument("-v", "--verbose", dest="verbosity", action="count", default=0, - help="Verbosity (between 1 and 2 occurrences with more leading to more " - "verbose logging). WARN=0, INFO=1, " - "DEBUG=2. This is overridden by the log config file if specified.") - cmd_args = parser.parse_args() - - logger = logging.getLogger("mongo_recorder") - logger.setLevel(logging.DEBUG) - setup_logging(cmd_args) - logger.info("Starting up.") - - try: - recorder = MongoRecorder(cmd_args.database) - recorder.start() - while True: - time.sleep(1) - except KeyboardInterrupt: - recorder.stop() - print("Thanks for using pytroll/mongo_recorder. See you soon on www.pytroll.org!") diff --git a/changelog.rst b/changelog.rst index 7c230d0..5b87e54 100644 --- a/changelog.rst +++ b/changelog.rst @@ -215,5 +215,3 @@ Other postgreSQL/postGIS database. [Adam Dybbroe] - Initial commit. [Martin Raspaud] - - diff --git a/docs/Makefile b/docs/Makefile new file mode 100755 index 0000000..037c4de --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,11 @@ +SOURCE_DIR := source + +.DEFAULT_GOAL := html +.PHONY: clean html + +clean: + @cd $(SOURCE_DIR) && find . -type f ! -name 'index.rst' ! -name 'conf.py' -delete + @cd $(SOURCE_DIR) && find . -type d -delete + +html: + @cd $(SOURCE_DIR) && python -m sphinx -T -b html -d _build/doctrees -D language=en . build/html diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..cbf1e36 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,2 @@ +sphinx +sphinx-rtd-theme diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..84b614c --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,75 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. + +import os + +from sphinx.ext import apidoc + +autodoc_mock_imports = ["motor", "pydantic", "fastapi", "uvicorn", "loguru", "pyyaml"] + +# -- Project information ----------------------------------------------------- + +project = "Pytroll-db" +copyright = "2024, Pytroll" +author = "Pouria Khalaj" + +# The full version, including alpha/beta/rc tags +release = "0.1" + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named "sphinx.ext.*") or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx.ext.duration", + "sphinx.ext.doctest", + "sphinx.ext.autosummary", +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ["*tests/*"] +include_patterns = ["**"] + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = ["_static"] + +autodoc_default_options = { + "members": True, + "member-order": "bysource", + "private-members": True, + "special-members": True, + "undoc-members": True, +} + +root_doc = "index" + +output_dir = os.path.join(".") +module_dir = os.path.abspath("../../trolldb") +apidoc.main(["-e", "-M", "-q", "-f", "-o", output_dir, module_dir, *include_patterns]) diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..7091703 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,17 @@ +Welcome to Pytroll documentation! +=========================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + modules + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d4b4cde --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,61 @@ +[project] +name = "pytroll-db" +dynamic = ["version"] +description = "The database API of Pytroll." +authors = [ + { name = "Pouria Khalaj", email = "pouria.khalaj@smhi.se" } +] +dependencies = ["pymongo", "posttroll", "motor", "pydantic", "fastapi", "uvicorn", "loguru", "pyyaml", "urllib3"] +readme = "README.rst" +requires-python = ">= 3.12" +license = {file = "LICENSE"} +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Programming Language :: Python", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering", + "Topic :: Database" +] + +[project.urls] +"Documentation" = "https://pytroll-db.readthedocs.io/en/latest/" + +[project.scripts] +pytroll-db-recorder = "trolldb.cli:run_sync" + +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.hatch.build.targets.wheel] +packages = ["trolldb"] + +[tool.hatch.version] +source = "vcs" + +[tool.hatch.build.hooks.vcs] +version-file = "trolldb/version.py" + + +[tool.ruff] +line-length = 120 + +[tool.ruff.lint] +# See https://docs.astral.sh/ruff/rules/ +select = ["A", "B", "D", "E", "W", "F", "I", "N", "PT", "S", "TID", "C90", "Q", "T10", "T20"] + +[tool.ruff.lint.per-file-ignores] +"*/tests/*" = ["S101"] # assert allowed in tests +"docs/source/conf.py" = ["D100", "A001"] # sphinx misbihaving +"src/trolldb/version.py" = ["D100", "Q000"] # automatically generated by hatch-vcs + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.mccabe] +max-complexity = 10 diff --git a/setup.py b/setup.py deleted file mode 100644 index e50ad92..0000000 --- a/setup.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (c) 2011, 2012, 2014, 2015. - -# Author(s): - -# The pytroll team: -# Martin Raspaud - -# This file is part of pytroll. - -# This is free software: you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation, either version 3 of the License, or (at your option) any later -# version. - -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more -# details. - -# You should have received a copy of the GNU General Public License along with -# this program. If not, see . - -from setuptools import setup -import imp - -version = imp.load_source('trolldb.version', 'trolldb/version.py') - - -requirements = ['pymongo', 'posttroll'] - -setup(name="pytroll-db", - version=version.__version__, - description='Messaging system for pytroll', - author='The pytroll team', - author_email='martin.raspaud@smhi.se', - url="http://github.com/pytroll/pytroll-db", - packages=['trolldb'], - zip_safe=False, - license="GPLv3", - install_requires=requirements, - classifiers=[ - 'Development Status :: 4 - Beta', - 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', - 'Programming Language :: Python', - 'Operating System :: OS Independent', - 'Intended Audience :: Science/Research', - 'Topic :: Scientific/Engineering', - 'Topic :: Database' - ], - entry_points={ - 'console_scripts': [ - 'db_cleanup = trolldb.pytroll_cleanup:threaded_check_all', - ], - }, - scripts=['bin/pytroll-mongo.py'], - ) diff --git a/trolldb/__init__.py b/trolldb/__init__.py index e69de29..f054e81 100644 --- a/trolldb/__init__.py +++ b/trolldb/__init__.py @@ -0,0 +1 @@ +"""trolldb package.""" diff --git a/trolldb/api/__init__.py b/trolldb/api/__init__.py new file mode 100644 index 0000000..6590a31 --- /dev/null +++ b/trolldb/api/__init__.py @@ -0,0 +1,9 @@ +"""This package contains the API capabilities for the pytroll-db package. + +It provides functionality for interacting with the database via the `FastAPI `_ +framework. + +For more information and documentation, please refer to the following sub-packages and modules: + - :obj:`trolldb.api.routes`: The package which defines the API routes. + - :obj:`trollddb.api.api`: The module which defines the API server and how it is run via the given configuration. +""" diff --git a/trolldb/api/api.py b/trolldb/api/api.py new file mode 100644 index 0000000..461ca9c --- /dev/null +++ b/trolldb/api/api.py @@ -0,0 +1,148 @@ +"""The module which includes the main functionalities of the API package. + +This is the main module which is supposed to be imported by the users of the package. + +Note: + The following applies to the :obj:`api` package and all its subpackages/modules. + + To avoid redundant documentation and inconsistencies, only non-FastAPI components are documented via the docstrings. + For the documentation related to the FastAPI components, check out the auto-generated documentation by FastAPI. + Assuming that the API server is running on ``_ (example) the auto-generated documentation can + be accessed via either ``_ or ``_. + + Read more at `FastAPI automatics docs `_. +""" + +import asyncio +import time +from contextlib import contextmanager +from multiprocessing import Process +from typing import Union + +import uvicorn +from fastapi import FastAPI, status +from fastapi.responses import PlainTextResponse +from loguru import logger +from pydantic import FilePath, validate_call + +from trolldb.api.routes import api_router +from trolldb.config.config import AppConfig, Timeout, parse_config_yaml_file +from trolldb.database.mongodb import mongodb_context +from trolldb.errors.errors import ResponseError + +API_INFO = dict( + title="pytroll-db", + version="0.1", + summary="The database API of Pytroll", + description= + "The API allows you to perform CRUD operations as well as querying the database" + "At the moment only MongoDB is supported. It is based on the following Python packages" + "\n * **PyMongo** (https://github.com/mongodb/mongo-python-driver)" + "\n * **motor** (https://github.com/mongodb/motor)", + license_info=dict( + name="The GNU General Public License v3.0", + url="https://www.gnu.org/licenses/gpl-3.0.en.html" + ) +) +"""These will appear int the auto-generated documentation and are passed to the ``FastAPI`` class as keyword args.""" + + +@validate_call +def run_server(config: Union[AppConfig, FilePath], **kwargs) -> None: + """Runs the API server with all the routes and connection to the database. + + It first creates a FastAPI application and runs it using `uvicorn `_ which is + ASGI (Asynchronous Server Gateway Interface) compliant. This function runs the event loop using + `asyncio `_ and does not yield! + + Args: + config: + The configuration of the application which includes both the server and database configurations. Its type + should be a :class:`FilePath`, which is a valid path to an existing config file which will parsed as a + ``.YAML`` file. + + **kwargs: + The keyword arguments are the same as those accepted by the + `FastAPI class `_ and are directly passed + to it. These keyword arguments will be first concatenated with the configurations of the API server which + are read from the ``config`` argument. The keyword arguments which are passed explicitly to the function + take precedence over ``config``. Finally, ``API_INFO``, which are hard-coded information for the API server, + will be concatenated and takes precedence over all. + + Raises: + ValidationError: + If the function is not called with arguments of valid type. + + Example: + .. code-block:: python + + from api.api import run_server + if __name__ == "__main__": + run_server("config.yaml") + """ + logger.info("Attempt to run the API server ...") + if not isinstance(config, AppConfig): + config = parse_config_yaml_file(config) + + # Concatenate the keyword arguments for the API server in the order of precedence (lower to higher). + app = FastAPI(**(config.api_server._asdict() | kwargs | API_INFO)) + + app.include_router(api_router) + + @app.exception_handler(ResponseError) + async def auto_exception_handler(_, exc: ResponseError): + """Catches all the exceptions raised as a ResponseError, e.g. accessing non-existing databases/collections.""" + status_code, message = exc.get_error_details() + info = dict( + status_code=status_code if status_code else status.HTTP_500_INTERNAL_SERVER_ERROR, + content=message if message else "Generic Error [This is not okay, check why we have the generic error!]", + ) + logger.error(f"Response error caught by the API auto exception handler: {info}") + return PlainTextResponse(**info) + + async def _serve(): + """An auxiliary coroutine to be used in the asynchronous execution of the FastAPI application.""" + async with mongodb_context(config.database): + logger.info("Attempt to start the uvicorn server ...") + await uvicorn.Server( + config=uvicorn.Config( + host=config.api_server.url.host, + port=config.api_server.url.port, + app=app + ) + ).serve() + + logger.info("Attempt to run the asyncio loop for the API server ...") + asyncio.run(_serve()) + + +@contextmanager +def api_server_process_context(config: Union[AppConfig, FilePath], startup_time: Timeout = 2): + """A synchronous context manager to run the API server in a separate process (non-blocking). + + It uses the `multiprocessing `_ package. The main use case + is envisaged to be in `TESTING` environments. + + Args: + config: + Same as ``config`` argument for :func:`run_server`. + + startup_time: + The overall time in seconds that is expected for the server and the database connections to be established + before actual requests can be sent to the server. For testing purposes ensure that this is sufficiently + large so that the tests will not time out. + """ + logger.info("Attempt to run the API server process in a context manager ...") + if not isinstance(config, AppConfig): + config = parse_config_yaml_file(config) + + process = Process(target=run_server, args=(config,)) + try: + process.start() + time.sleep(startup_time) + yield process + finally: + logger.info("Attempt to terminate the API server process in the context manager ...") + process.terminate() + process.join() + logger.info("The API server process has terminated successfully.") diff --git a/trolldb/api/routes/__init__.py b/trolldb/api/routes/__init__.py new file mode 100644 index 0000000..4c69061 --- /dev/null +++ b/trolldb/api/routes/__init__.py @@ -0,0 +1,5 @@ +"""routes package.""" + +from .router import api_router + +__all__ = ("api_router",) diff --git a/trolldb/api/routes/common.py b/trolldb/api/routes/common.py new file mode 100644 index 0000000..b05c86b --- /dev/null +++ b/trolldb/api/routes/common.py @@ -0,0 +1,94 @@ +"""The module with common functions to be used in handling requests related to `databases` and `collections`.""" + +from typing import Annotated, Union + +from fastapi import Depends, Query, Response +from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase + +from trolldb.database.mongodb import MongoDB + +exclude_defaults_query = Query( + True, + title="Query string", + description= + "A boolean to exclude default databases from a MongoDB instance. Refer to " + "`trolldb.database.mongodb.MongoDB.default_database_names` for more information." +) + + +async def check_database(database_name: str | None = None) -> AsyncIOMotorDatabase: + """A dependency for route handlers to check for the existence of a database given its name. + + Args: + database_name (Optional, default ``None``): + The name of the database to check. In case of ``None``, the main database will be picked. + + Returns: + The database object if it exists. + + Raises: + :class:`~trolldb.errors.errors.ResponseError`: + Check :func:`~trolldb.database.mongodb.MongoDB.get_database` for more information. + """ + return await MongoDB.get_database(database_name) + + +async def check_collection( + database_name: str | None = None, + collection_name: str | None = None) -> AsyncIOMotorCollection: + """A dependency for route handlers to check for the existence of a collection. + + It performs the check given the collection name and the name of the database it resides in. It first checks for the + existence of the database. + + Args: + database_name (Optional, default ``None``): + The name of the database to check. In case of ``None``, the main database will be picked. + collection_name (Optional, default ``None``): + The name of the collection to check. In case of ``None``, the main collection will be picked. + + Warning: + Both of ``database_name`` and ``collection_name`` must be ``None`` so that the main database and collection + will be picked. In case only one of them is ``None``, this is treated as an unacceptable request. + + Returns: + - The collection object if it exists in the designated database. + + Raises: + :class:`~trolldb.errors.errors.ResponseError`: + Check :func:`~trolldb.database.mongodb.MongoDB.get_collection` for more information. + """ + return await MongoDB.get_collection(database_name, collection_name) + + +async def get_distinct_items_in_collection( + response_or_collection: Union[Response, AsyncIOMotorCollection], + field_name: str) -> Union[Response, list[str]]: + """An auxiliary function to either return the given response; or return a list of distinct (unique) values. + + Given the ``field_name`` it conducts a search in all documents of the given collection. The latter behaviour is + equivalent to the ``distinct`` function from MongoDB. The former is the behaviour of an identity function. + + Args: + response_or_collection: + Either a response object, or a collection in which documents will be queried for the ``field_name``. + + field_name: + The name of the target field in the documents + + Returns: + - In case of a response object as input, the same response will be returned as-is. + - In case of a collection as input, all the documents of the collection will be searched for ``field_name``, + and the corresponding values will be retrieved. Finally, a list of all the distinct values is returned. + """ + if isinstance(response_or_collection, Response): + return response_or_collection + + return await response_or_collection.distinct(field_name) + + +CheckCollectionDependency = Annotated[AsyncIOMotorCollection, Depends(check_collection)] +"""Type annotation for the FastAPI dependency injection of checking a collection (function).""" + +CheckDataBaseDependency = Annotated[AsyncIOMotorDatabase, Depends(check_database)] +"""Type annotation for the FastAPI dependency injection of checking a database (function).""" diff --git a/trolldb/api/routes/databases.py b/trolldb/api/routes/databases.py new file mode 100644 index 0000000..e114ef4 --- /dev/null +++ b/trolldb/api/routes/databases.py @@ -0,0 +1,63 @@ +"""The module which handles all requests related to getting the list of `databases` and `collections`. + +Note: + For more information on the API server, see the automatically generated documentation by FastAPI. +""" + +from fastapi import APIRouter +from pymongo.collection import _DocumentType + +from trolldb.api.routes.common import CheckCollectionDependency, CheckDataBaseDependency, exclude_defaults_query +from trolldb.config.config import MongoObjectId +from trolldb.database.errors import ( + Databases, + Documents, + database_collection_document_error_descriptor, + database_collection_error_descriptor, +) +from trolldb.database.mongodb import MongoDB, get_ids + +router = APIRouter() + + +@router.get("/", + response_model=list[str], + summary="Gets the list of all database names") +async def database_names(exclude_defaults: bool = exclude_defaults_query) -> list[str]: + """Please consult the auto-generated documentation by FastAPI.""" + db_names = await MongoDB.list_database_names() + + if not exclude_defaults: + return db_names + + return [db for db in db_names if db not in MongoDB.default_database_names] + + +@router.get("/{database_name}", + response_model=list[str], + responses=Databases.union().fastapi_descriptor, + summary="Gets the list of all collection names for the given database name") +async def collection_names(db: CheckDataBaseDependency) -> list[str]: + """Please consult the auto-generated documentation by FastAPI.""" + return await db.list_collection_names() + + +@router.get("/{database_name}/{collection_name}", + response_model=list[str], + responses=database_collection_error_descriptor, + summary="Gets the object ids of all documents for the given database and collection name") +async def documents(collection: CheckCollectionDependency) -> list[str]: + """Please consult the auto-generated documentation by FastAPI.""" + return await get_ids(collection.find({})) + + +@router.get("/{database_name}/{collection_name}/{_id}", + response_model=_DocumentType, + responses=database_collection_document_error_descriptor, + summary="Gets the document content in json format given its object id, database, and collection name") +async def document_by_id(collection: CheckCollectionDependency, _id: MongoObjectId) -> _DocumentType: + """Please consult the auto-generated documentation by FastAPI.""" + if document := await collection.find_one({"_id": _id}): + return dict(document) | {"_id": str(_id)} + + raise Documents.NotFound diff --git a/trolldb/api/routes/datetime_.py b/trolldb/api/routes/datetime_.py new file mode 100644 index 0000000..fbda3bf --- /dev/null +++ b/trolldb/api/routes/datetime_.py @@ -0,0 +1,83 @@ +"""The module which handles all requests related to `datetime`. + +Note: + For more information on the API server, see the automatically generated documentation by FastAPI. +""" + +from datetime import datetime +from typing import Any, Coroutine, TypedDict + +from fastapi import APIRouter +from pydantic import BaseModel + +from trolldb.api.routes.common import CheckCollectionDependency +from trolldb.database.errors import database_collection_error_descriptor +from trolldb.database.mongodb import get_id + + +class TimeModel(TypedDict): + """Please consult the auto-generated documentation by FastAPI.""" + _id: str + _time: datetime + + +class TimeEntry(TypedDict): + """Please consult the auto-generated documentation by FastAPI.""" + _min: TimeModel + _max: TimeModel + + +class ResponseModel(BaseModel): + """Please consult the auto-generated documentation by FastAPI.""" + start_time: TimeEntry + end_time: TimeEntry + + +router = APIRouter() + + +@router.get("", + response_model=ResponseModel, + responses=database_collection_error_descriptor, + summary="Gets the the minimum and maximum values for the start and end times") +async def datetime(collection: CheckCollectionDependency) -> ResponseModel: + """Please consult the auto-generated documentation by FastAPI.""" + agg_result = await collection.aggregate([{ + "$group": { + "_id": None, + "min_start_time": {"$min": "$start_time"}, + "max_start_time": {"$max": "$start_time"}, + "min_end_time": {"$min": "$end_time"}, + "max_end_time": {"$max": "$end_time"} + }}]).next() + + def _aux(query: dict) -> Coroutine[Any, Any, str]: + """An auxiliary function that retrieves a single object UUID from the database based on the given query. + + Args: + query: + The query used to search for the desired document in the database. + + Returns: + The UUID of the document found in the database. + """ + return get_id(collection.find_one(query)) + + return ResponseModel( + start_time=TimeEntry( + _min=TimeModel( + _id=await _aux({"start_time": agg_result["min_start_time"]}), + _time=agg_result["min_start_time"]), + _max=TimeModel( + _id=await _aux({"start_time": agg_result["max_start_time"]}), + _time=agg_result["max_start_time"]) + ), + end_time=TimeEntry( + _min=TimeModel( + _id=await _aux({"end_time": agg_result["min_end_time"]}), + _time=agg_result["min_end_time"]), + _max=TimeModel( + _id=await _aux({"end_time": agg_result["max_end_time"]}), + _time=agg_result["max_end_time"]) + ) + ) diff --git a/trolldb/api/routes/platforms.py b/trolldb/api/routes/platforms.py new file mode 100644 index 0000000..7d0c756 --- /dev/null +++ b/trolldb/api/routes/platforms.py @@ -0,0 +1,21 @@ +"""The module which handles all requests regarding `platforms`. + +Note: + For more information on the API server, see the automatically generated documentation by FastAPI. +""" + +from fastapi import APIRouter + +from trolldb.api.routes.common import CheckCollectionDependency, get_distinct_items_in_collection +from trolldb.database.errors import database_collection_error_descriptor + +router = APIRouter() + + +@router.get("", + response_model=list[str], + responses=database_collection_error_descriptor, + summary="Gets the list of all platform names") +async def platform_names(collection: CheckCollectionDependency) -> list[str]: + """Please consult the auto-generated documentation by FastAPI.""" + return await get_distinct_items_in_collection(collection, "platform_name") diff --git a/trolldb/api/routes/queries.py b/trolldb/api/routes/queries.py new file mode 100644 index 0000000..e585101 --- /dev/null +++ b/trolldb/api/routes/queries.py @@ -0,0 +1,51 @@ +"""The module which handles all requests to the queries route. + +Note: + For more information on the API server, see the automatically generated documentation by FastAPI. +""" + +import datetime + +from fastapi import APIRouter, Query + +from trolldb.api.routes.common import CheckCollectionDependency +from trolldb.database.errors import database_collection_error_descriptor +from trolldb.database.mongodb import get_ids +from trolldb.database.piplines import PipelineAttribute, Pipelines + +router = APIRouter() + + +@router.get("", + response_model=list[str], + responses=database_collection_error_descriptor, + summary="Gets the database UUIDs of the documents that match specifications determined by the query string") +async def queries( + collection: CheckCollectionDependency, + # We suppress ruff for the following four lines with `Query(default=None)`. + # Reason: This is the FastAPI way of defining optional queries and ruff is not happy about it! + platform: list[str] = Query(default=None), # noqa: B008 + sensor: list[str] = Query(default=None), # noqa: B008 + time_min: datetime.datetime = Query(default=None), # noqa: B008 + time_max: datetime.datetime = Query(default=None)) -> list[str]: # noqa: B008 + """Please consult the auto-generated documentation by FastAPI.""" + # We + pipelines = Pipelines() + + if platform: + pipelines += PipelineAttribute("platform_name") == platform + + if sensor: + pipelines += PipelineAttribute("sensor") == sensor + + if [time_min, time_max] != [None, None]: + start_time = PipelineAttribute("start_time") + end_time = PipelineAttribute("end_time") + pipelines += ( + (start_time >= time_min) | + (start_time <= time_max) | + (end_time >= time_min) | + (end_time <= time_max) + ) + + return await get_ids(collection.aggregate(pipelines)) diff --git a/trolldb/api/routes/root.py b/trolldb/api/routes/root.py new file mode 100644 index 0000000..9a11da0 --- /dev/null +++ b/trolldb/api/routes/root.py @@ -0,0 +1,16 @@ +"""The module which handles all requests to the root route, i.e. "/". + +Note: + For more information on the API server, see the automatically generated documentation by FastAPI. +""" + +from fastapi import APIRouter, Response, status + +router = APIRouter() + + +@router.get("/", + summary="The root route which is mainly used to check the status of connection") +async def root() -> Response: + """Please consult the auto-generated documentation by FastAPI.""" + return Response(status_code=status.HTTP_200_OK) diff --git a/trolldb/api/routes/router.py b/trolldb/api/routes/router.py new file mode 100644 index 0000000..7f0125c --- /dev/null +++ b/trolldb/api/routes/router.py @@ -0,0 +1,17 @@ +"""The module which defines all the routes with their corresponding tags. + +Note: + For more information on the API server, see the automatically generated documentation by FastAPI. +""" + +from fastapi import APIRouter + +from trolldb.api.routes import databases, datetime_, platforms, queries, root, sensors + +api_router = APIRouter() +api_router.include_router(root.router, tags=["root"]) +api_router.include_router(databases.router, tags=["databases"], prefix="/databases") +api_router.include_router(datetime_.router, tags=["datetime"], prefix="/datetime") +api_router.include_router(platforms.router, tags=["platforms"], prefix="/platforms") +api_router.include_router(queries.router, tags=["queries"], prefix="/queries") +api_router.include_router(sensors.router, tags=["sensors"], prefix="/sensors") diff --git a/trolldb/api/routes/sensors.py b/trolldb/api/routes/sensors.py new file mode 100644 index 0000000..c3883a1 --- /dev/null +++ b/trolldb/api/routes/sensors.py @@ -0,0 +1,21 @@ +"""The module which handles all requests regarding `sensors`. + +Note: + For more information on the API server, see the automatically generated documentation by FastAPI. +""" + +from fastapi import APIRouter + +from trolldb.api.routes.common import CheckCollectionDependency, get_distinct_items_in_collection +from trolldb.database.errors import database_collection_error_descriptor + +router = APIRouter() + + +@router.get("", + response_model=list[str], + responses=database_collection_error_descriptor, + summary="Gets the list of all sensor names") +async def sensor_names(collection: CheckCollectionDependency) -> list[str]: + """Please consult the auto-generated documentation by FastAPI.""" + return await get_distinct_items_in_collection(collection, "sensor") diff --git a/trolldb/cli.py b/trolldb/cli.py new file mode 100644 index 0000000..74ae37b --- /dev/null +++ b/trolldb/cli.py @@ -0,0 +1,53 @@ +"""Main interface.""" + +import argparse +import asyncio + +from loguru import logger +from posttroll.message import Message +from posttroll.subscriber import create_subscriber_from_dict_config +from pydantic import FilePath + +from trolldb.config.config import AppConfig, parse_config_yaml_file +from trolldb.database.mongodb import MongoDB, mongodb_context + + +async def record_messages(config: AppConfig): + """Record the metadata of messages into the database.""" + async with mongodb_context(config.database): + collection = await MongoDB.get_collection( + config.database.main_database_name, config.database.main_collection_name + ) + for m in create_subscriber_from_dict_config(config.subscriber).recv(): + msg = Message.decode(str(m)) + match msg.type: + case "file": + await collection.insert_one(msg.data) + case "del": + deletion_result = await collection.delete_many({"uri": msg.data["uri"]}) + if deletion_result.deleted_count != 1: + logger.error("Recorder found multiple deletions!") # TODO: Log some data related to the msg + case _: + logger.debug(f"Don't know what to do with {msg.type} message.") + + +async def record_messages_from_config(config_file: FilePath): + """Record messages into the database, getting the configuration from a file.""" + config = parse_config_yaml_file(config_file) + await record_messages(config) + + +async def record_messages_from_command_line(args=None): + """Record messages into the database, command-line interface.""" + parser = argparse.ArgumentParser() + parser.add_argument( + "configuration_file", + help="Path to the configuration file") + cmd_args = parser.parse_args(None if args is None else [str(i) for i in args]) + + await record_messages_from_config(cmd_args.configuration_file) + + +def run_sync(): + """Runs the interface synchronously.""" + asyncio.run(record_messages_from_command_line()) diff --git a/trolldb/config/__init__.py b/trolldb/config/__init__.py new file mode 100644 index 0000000..ccb796b --- /dev/null +++ b/trolldb/config/__init__.py @@ -0,0 +1 @@ +"""This package contains utilities (e.g. parser) to handle configuration files and settings for the pytroll-db.""" diff --git a/trolldb/config/config.py b/trolldb/config/config.py new file mode 100644 index 0000000..b43d7fe --- /dev/null +++ b/trolldb/config/config.py @@ -0,0 +1,139 @@ +"""The module which handles parsing and validating the config (YAML) file. + +The validation is performed using `Pydantic `_. + +Note: + Functions in this module are decorated with + `pydantic.validate_call `_ + so that their arguments can be validated using the corresponding type hints, when calling the function at runtime. +""" + +import errno +import sys +from typing import Any, NamedTuple + +from bson import ObjectId +from bson.errors import InvalidId +from loguru import logger +from pydantic import AnyUrl, BaseModel, Field, FilePath, MongoDsn, ValidationError +from pydantic.functional_validators import AfterValidator +from typing_extensions import Annotated +from yaml import safe_load + +Timeout = Annotated[float, Field(ge=0)] +"""A type hint for the timeout in seconds (non-negative float).""" + + +def id_must_be_valid(id_like_string: str) -> ObjectId: + """Checks that the given string can be converted to a valid MongoDB ObjectId. + + Args: + id_like_string: + The string to be converted to an ObjectId. + + Returns: + The ObjectId object if successfully. + + Raises: + ValueError: + If the given string cannot be converted to a valid ObjectId. This will ultimately turn into a pydantic + validation error. + """ + try: + return ObjectId(id_like_string) + except InvalidId as e: + raise ValueError from e + + +MongoObjectId = Annotated[str, AfterValidator(id_must_be_valid)] +"""Type hint validator for object IDs.""" + + +class MongoDocument(BaseModel): + """Pydantic model for a MongoDB document.""" + _id: MongoObjectId + + +class APIServerConfig(NamedTuple): + """A named tuple to hold all the configurations of the API server (excluding the database). + + Note: + The attributes herein are a subset of the keyword arguments accepted by + `FastAPI class `_ and are directly passed + to the FastAPI class. + """ + + url: AnyUrl + """The URL of the API server including the port, e.g. ``mongodb://localhost:8000``.""" + + +class DatabaseConfig(NamedTuple): + """A named tuple to hold all the configurations of the Database which will be used by the MongoDB instance.""" + + main_database_name: str + """The name of the main database which includes the ``main_collection``, e.g. ``"satellite_database"``.""" + + main_collection_name: str + """The name of the main collection which resides inside the ``main_database`` and includes the actual data for the + files, e.g. ``"files"`` + """ + + url: MongoDsn + """The URL of the MongoDB server excluding the port part, e.g. ``"mongodb://localhost:27017"``""" + + timeout: Timeout + """The timeout in seconds (non-negative float), after which an exception is raised if a connection with the + MongoDB instance is not established successfully, e.g. ``1.5``. + """ + + +SubscriberConfig = dict[Any, Any] +"""A dictionary to hold all the configurations of the subscriber. + +TODO: This has to be moved to the `posttroll` package. +""" + + +class AppConfig(BaseModel): + """A model to hold all the configurations of the application including both the API server and the database. + + This will be used by Pydantic to validate the parsed YAML file. + """ + api_server: APIServerConfig + database: DatabaseConfig + subscriber: SubscriberConfig + + +def parse_config_yaml_file(filename: FilePath) -> AppConfig: + """Parses and validates the configurations from a YAML file. + + Args: + filename: + The filename of a valid YAML file which holds the configurations. + + Returns: + An instance of :class:`AppConfig`. + + Raises: + ParserError: + If the file cannot be properly parsed + + ValidationError: + If the successfully parsed file fails the validation, i.e. its schema or the content does not conform to + :class:`AppConfig`. + + ValidationError: + If the function is not called with arguments of valid type. + """ + logger.info("Attempt to parse the YAML file ...") + with open(filename, "r") as file: + config = safe_load(file) + logger.info("Parsing YAML file is successful.") + try: + logger.info("Attempt to validate the parsed YAML file ...") + config = AppConfig(**config) + logger.info("Validation of the parsed YAML file is successful.") + return config + except ValidationError as e: + logger.error(e) + sys.exit(errno.EIO) diff --git a/trolldb/database/__init__.py b/trolldb/database/__init__.py new file mode 100644 index 0000000..0723ff5 --- /dev/null +++ b/trolldb/database/__init__.py @@ -0,0 +1 @@ +"""This package contains the main database functionalities of the pytroll-db.""" diff --git a/trolldb/database/errors.py b/trolldb/database/errors.py new file mode 100644 index 0000000..b3830e3 --- /dev/null +++ b/trolldb/database/errors.py @@ -0,0 +1,90 @@ +"""The modules which defines the error responses that might occur while working with the MongoDB database. + +Note: + The error responses are grouped into classes, with each class representing the major + category (context) in which the errors occur. As such, the attributes of the top classes + are (expected to be) self-explanatory and require no additional documentation. +""" + +from fastapi import status + +from trolldb.errors.errors import ResponseError, ResponsesErrorGroup + + +class Client(ResponsesErrorGroup): + """Client error responses, e.g. if something goes wrong with initialization or closing the client.""" + CloseNotAllowedError = ResponseError({ + status.HTTP_405_METHOD_NOT_ALLOWED: + "Calling `close()` on a client which has not been initialized is not allowed!" + }) + + ReinitializeConfigError = ResponseError({ + status.HTTP_405_METHOD_NOT_ALLOWED: + "The client is already initialized with a different database configuration!" + }) + + AlreadyOpenError = ResponseError({ + status.HTTP_100_CONTINUE: + "The client has been already initialized with the same configuration." + }) + + InconsistencyError = ResponseError({ + status.HTTP_405_METHOD_NOT_ALLOWED: + "Something must have been wrong as we are in an inconsistent state. " + "The internal database configuration is not empty and is the same as what we just " + "received but the client is `None` or has been already closed!" + }) + + ConnectionError = ResponseError({ + status.HTTP_400_BAD_REQUEST: + "Could not connect to the database with URL." + }) + + +class Collections(ResponsesErrorGroup): + """Collections error responses, e.g. if a requested collection cannot be found.""" + NotFoundError = ResponseError({ + status.HTTP_404_NOT_FOUND: + "Could not find the given collection name inside the specified database." + }) + + WrongTypeError = ResponseError({ + status.HTTP_422_UNPROCESSABLE_ENTITY: + "Both the Database and collection name must be `None` if one of them is `None`." + }) + + +class Databases(ResponsesErrorGroup): + """Databases error responses, e.g. if a requested database cannot be found.""" + NotFoundError = ResponseError({ + status.HTTP_404_NOT_FOUND: + "Could not find the given database name." + }) + + WrongTypeError = ResponseError({ + status.HTTP_422_UNPROCESSABLE_ENTITY: + "Database name must be either of type `str` or `None.`" + }) + + +class Documents(ResponsesErrorGroup): + """Documents error responses, e.g. if a requested document cannot be found.""" + NotFound = ResponseError({ + status.HTTP_404_NOT_FOUND: + "Could not find any document with the given object id." + }) + + +database_collection_error_descriptor = ( + Databases.union() | Collections.union() +).fastapi_descriptor +"""A response descriptor for the Fast API routes. + +This combines all the error messages that might occur as result of working with databases and collections. See the +FastAPI documentation for `additional responses `_. +""" + +database_collection_document_error_descriptor = ( + Databases.union() | Collections.union() | Documents.union() +).fastapi_descriptor +"""Same as :obj:`database_collection_error_descriptor` but including documents as well.""" diff --git a/trolldb/database/mongodb.py b/trolldb/database/mongodb.py new file mode 100644 index 0000000..3ec5a79 --- /dev/null +++ b/trolldb/database/mongodb.py @@ -0,0 +1,314 @@ +"""The module which handles database CRUD operations for MongoDB. + +It is based on the following libraries: + - `PyMongo `_ + - `motor `_. +""" + +import errno +from contextlib import asynccontextmanager +from typing import Any, AsyncGenerator, Coroutine, Optional, TypeVar, Union + +from loguru import logger +from motor.motor_asyncio import ( + AsyncIOMotorClient, + AsyncIOMotorCollection, + AsyncIOMotorCommandCursor, + AsyncIOMotorCursor, + AsyncIOMotorDatabase, +) +from pydantic import BaseModel +from pymongo.collection import _DocumentType +from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError + +from trolldb.config.config import DatabaseConfig +from trolldb.database.errors import Client, Collections, Databases +from trolldb.errors.errors import ResponseError + +T = TypeVar("T") +CoroutineLike = Coroutine[Any, Any, T] +"""A simple type hint for a coroutine of any type.""" + +CoroutineDocument = CoroutineLike[_DocumentType | None] +"""Coroutine type hint for document like objects.""" + +CoroutineStrList = CoroutineLike[list[str]] +"""Coroutine type hint for a list of strings.""" + + +class DatabaseName(BaseModel): + """Pydantic model for a database name.""" + name: str | None + + +class CollectionName(BaseModel): + """Pydantic model for a collection name.""" + name: str | None + + +async def get_id(doc: CoroutineDocument) -> str: + """Retrieves the ID of a document as a simple flat string. + + Note: + The rationale behind this method is as follows. In MongoDB, each document has a unique ID which is of type + :class:`~bson.objectid.ObjectId`. This is not suitable for purposes when a simple string is needed, hence + the need for this method. + + Args: + doc: + A MongoDB document in the coroutine form. This could be e.g. the result of applying the standard + ``find_one`` method from MongoDB on a collection given a ``filter``. + + Returns: + The ID of a document as a simple string. For example, when applied on a document with + ``_id: ObjectId('000000000000000000000000')``, the method returns ``'000000000000000000000000'``. + """ + return str((await doc)["_id"]) + + +async def get_ids(docs: Union[AsyncIOMotorCommandCursor, AsyncIOMotorCursor]) -> list[str]: + """Similar to :func:`~MongoDB.get_id` but for a list of documents. + + Args: + docs: + A list of MongoDB documents as :obj:`~AsyncIOMotorCommandCursor` or :obj:`~AsyncIOMotorCursor`. + This could be e.g. the result of applying the standard ``aggregate`` method from MongoDB on a + collection given a ``pipeline``. + + Returns: + The list of all IDs, each as a simple string. + """ + return [str(doc["_id"]) async for doc in docs] + + +class MongoDB: + """A wrapper class around the `motor async driver `_ for Mongo DB. + + It includes convenience methods tailored to our specific needs. As such, the :func:`~MongoDB.initialize()`` method + returns a coroutine which needs to be awaited. + + Note: + This class is not meant to be instantiated! That's why all the methods in this class are decorated with + ``@classmethods``. This choice has been made to guarantee optimal performance, i.e. for each running process + there must be only a single motor client to handle all database operations. Having different clients which are + constantly opened/closed degrades the performance. The expected usage is that we open a client in the beginning + of the program and keep it open until the program finishes. It is okay to reopen/close the client for testing + purposes when isolation is needed. + + Note: + The main difference between this wrapper class and the original motor driver class is that we attempt to access + the database and collections during the initialization to see if we succeed or fail. This is contrary to the + behaviour of the motor driver which simply creates a client object and does not attempt to access the database + until some time later when an actual operation is performed on the database. This behaviour is not desired for + us, we would like to fail early! + """ + + __client: Optional[AsyncIOMotorClient] = None + __database_config: Optional[DatabaseConfig] = None + __main_collection: AsyncIOMotorCollection = None + __main_database: AsyncIOMotorDatabase = None + + default_database_names = ["admin", "config", "local"] + """MongoDB creates these databases by default for self usage.""" + + @classmethod + async def initialize(cls, database_config: DatabaseConfig): + """Initializes the motor client. Note that this method has to be awaited! + + Args: + database_config: + A named tuple which includes the database configurations. + + Returns: + On success ``None``. + + Raises: + SystemExit(errno.EIO): + If connection is not established (``ConnectionFailure``) + SystemExit(errno.EIO): + If the attempt times out (``ServerSelectionTimeoutError``) + SystemExit(errno.EIO): + If one attempts reinitializing the class with new (different) database configurations without calling + :func:`~close()` first. + SystemExit(errno.EIO): + If the state is not consistent, i.e. the client is closed or ``None`` but the internal database + configurations still exist and are different from the new ones which have been just provided. + + SystemExit(errno.ENODATA): + If either ``database_config.main_database`` or ``database_config.main_collection`` does not exist. + """ + logger.info("Attempt to initialize the MongoDB client ...") + logger.info("Checking the database configs ...") + if cls.__database_config: + if database_config == cls.__database_config: + if cls.__client: + return Client.AlreadyOpenError.log_as_warning() + Client.InconsistencyError.sys_exit_log(errno.EIO) + else: + Client.ReinitializeConfigError.sys_exit_log(errno.EIO) + logger.info("Database configs are OK.") + + # This only makes the reference and does not establish an actual connection until the first attempt is made + # to access the database. + cls.__client = AsyncIOMotorClient( + database_config.url.unicode_string(), + serverSelectionTimeoutMS=database_config.timeout * 1000) + + __database_names = [] + try: + logger.info("Attempt to access list of databases ...") + __database_names = await cls.__client.list_database_names() + except (ConnectionFailure, ServerSelectionTimeoutError): + Client.ConnectionError.sys_exit_log( + errno.EIO, {"url": database_config.url.unicode_string()} + ) + logger.info("Accessing the list of databases is successful.") + + err_extra_information = {"database_name": database_config.main_database_name} + + logger.info("Checking if the main database name exists ...") + if database_config.main_database_name not in __database_names: + Databases.NotFoundError.sys_exit_log(errno.ENODATA, err_extra_information) + cls.__main_database = cls.__client.get_database(database_config.main_database_name) + logger.info("The main database name exists.") + + err_extra_information |= {"collection_name": database_config.main_collection_name} + + logger.info("Checking if the main collection name exists ...") + if database_config.main_collection_name not in await cls.__main_database.list_collection_names(): + Collections.NotFoundError.sys_exit_log(errno.ENODATA, err_extra_information) + logger.info("The main collection name exists.") + + cls.__main_collection = cls.__main_database.get_collection(database_config.main_collection_name) + logger.info("MongoDB is successfully initialized.") + + @classmethod + def close(cls) -> None: + """Closes the motor client.""" + logger.info("Attempt to close the MongoDB client ...") + if cls.__client: + cls.__database_config = None + cls.__client.close() + logger.info("The MongoDB client is closed successfully.") + return + Client.CloseNotAllowedError.sys_exit_log(errno.EIO) + + @classmethod + def list_database_names(cls) -> CoroutineStrList: + """Lists all the database names.""" + return cls.__client.list_database_names() + + @classmethod + def main_collection(cls) -> AsyncIOMotorCollection: + """A convenience method to get the main collection. + + Returns: + The main collection which resides inside the main database. + Equivalent to ``MongoDB.client()[][]``. + """ + return cls.__main_collection + + @classmethod + def main_database(cls) -> AsyncIOMotorDatabase: + """A convenience method to get the main database. + + Returns: + The main database which includes the main collection, which in turn includes the desired documents. + + This is equivalent to ``MongoDB.client()[]``. + """ + return cls.__main_database + + @classmethod + async def get_collection( + cls, + database_name: str, + collection_name: str) -> Union[AsyncIOMotorCollection, ResponseError]: + """Gets the collection object given its name and the database name in which it resides. + + Args: + database_name: + The name of the parent database which includes the collection. + collection_name: + The name of the collection which resides inside the parent database labelled by ``database_name``. + + Returns: + The database object. In case of ``None`` for both the database name and collection name, the main collection + will be returned. + + Raises: + ValidationError: + If input args are invalid according to the pydantic. + + KeyError: + If the database name exists, but it does not include any collection with the given name. + + TypeError: + If only one of the database or collection names are ``None``. + + ...: + This method relies on :func:`get_database` to check for the existence of the database which can raise + exceptions. Check its documentation for more information. + """ + database_name = DatabaseName(name=database_name).name + collection_name = CollectionName(name=collection_name).name + + match database_name, collection_name: + case None, None: + return cls.main_collection() + + case str(), str(): + db = await cls.get_database(database_name) + if collection_name in await db.list_collection_names(): + return db[collection_name] + raise Collections.NotFoundError + case _: + raise Collections.WrongTypeError + + @classmethod + async def get_database(cls, database_name: str) -> Union[AsyncIOMotorDatabase, ResponseError]: + """Gets the database object given its name. + + Args: + database_name: + The name of the database to retrieve. + + Returns: + The database object. + + Raises: + KeyError: + If the database name does not exist in the list of database names. + """ + database_name = DatabaseName(name=database_name).name + + match database_name: + case None: + return cls.main_database() + case _ if database_name in await cls.list_database_names(): + return cls.__client[database_name] + case _: + raise Databases.NotFoundError + + +@asynccontextmanager +async def mongodb_context(database_config: DatabaseConfig) -> AsyncGenerator: + """An asynchronous context manager to connect to the MongoDB client. + + It can be either used in `PRODUCTION` or in `TESTING` environments. + + Note: + Since the :class:`MongoDB` is supposed to be used statically, this context manager does not yield anything! + One can simply use :class:`MongoDB` inside the context manager. + + Args: + database_config: + The configuration of the database. + """ + logger.info("Attempt to open the MongoDB context manager ...") + try: + await MongoDB.initialize(database_config) + yield + finally: + MongoDB.close() + logger.info("The MongoDB context manager is successfully closed.") diff --git a/trolldb/database/piplines.py b/trolldb/database/piplines.py new file mode 100644 index 0000000..f85fa15 --- /dev/null +++ b/trolldb/database/piplines.py @@ -0,0 +1,151 @@ +"""The module which defines some convenience classes to facilitate the use of aggregation pipelines in MongoDB.""" + +from typing import Any, Self + + +class PipelineBooleanDict(dict): + """A subclass of dict which overrides the behavior of bitwise `or` ``|`` and bitwise `and` ``&``. + + This class makes it easier to chain and nest `"and/or"` operations. + + The operators are only defined for operands of type :class:`PipelineBooleanDict`. For each of the aforementioned + operators, the result will be a dictionary with a single key/value pair. The key is either ``$or`` or ``$and`` + depending on the operator being used. The corresponding value is a list with two elements only. The first element + of the list is the content of the left operand and the second element is the content of the right operand. + + Example: + .. code-block:: python + + pd1 = PipelineBooleanDict({"number": 2}) + pd2 = PipelineBooleanDict({"kind": 1}) + + pd_and = pd1 & pd2 + pd_and_literal = PipelineBooleanDict({"$and": [{"number": 2}, {"kind": 1}]}) + # The following evaluates to True + pd_and == pd_and_literal + + pd_or = pd1 | pd2 + pd_or_literal = PipelineBooleanDict({"$or": [{"number": 2}, {"kind": 1}]}) + # The following evaluates to True + pd_or == pd_or_literal + """ + + def __or__(self, other: Self): + """Implements the bitwise or operator, i.e. ``|``.""" + return PipelineBooleanDict({"$or": [self, other]}) + + def __and__(self, other: Self): + """Implements the bitwise and operator, i.e. ``&``.""" + return PipelineBooleanDict({"$and": [self, other]}) + + +class PipelineAttribute: + """A class which defines a single pipeline attribute on which boolean operations will be performed. + + The boolean operations are in the form of boolean dicts of type :class:`PipelineBooleanDict`. + """ + + def __init__(self, key: str) -> None: + """The constructor which specifies the pipeline attribute to work with.""" + self.__key = key + + def __eq__(self, other: Any) -> PipelineBooleanDict: + """Implements the equality operator, i.e. ``==``. + + This makes a boolean filter in which the attribute can match any of the items in ``other`` if it is a list, or + the ``other`` itself, otherwise. + + Warning: + Note how ``==`` behaves differently for :class:`PipelineBooleanDict` and :class:`PipelineAttribute`. + In the former, it asserts equality as per the standard behaviour of the operator in Python. However, in the + latter it acts as a filter and not an assertion of equality. + + Example: + .. code-block:: python + + pa_list = PipelineAttribute("letter") == ["A", "B"] + pd_list = PipelineBooleanDict({"$or": [{"letter": "A"}, {"letter": "B"}] + # The following evaluates to True + pa_list == pd_list + + pa_single = PipelineAttribute("letter") == "A" + pd_single = PipelineBooleanDict({"letter": "A"}) + # The following evaluates to True + pa_single == pd_single + """ + if isinstance(other, list): + return PipelineBooleanDict(**{"$or": [{self.__key: v} for v in other]}) + return PipelineBooleanDict(**{self.__key: other}) + + def __aux_operators(self, other: Any, operator: str) -> PipelineBooleanDict: + """An auxiliary function to perform comparison operations. + + Note: + The operators herein have similar behaviour to ``==`` in the sense that they make comparison filters and are + not to be interpreted as comparison assertions. + """ + if isinstance(other, list): + return PipelineBooleanDict(**{"$or": [{self.__key: {operator: v}} for v in other]}) + + return PipelineBooleanDict(**{self.__key: {operator: other}} if other else {}) + + def __ge__(self, other: Any) -> PipelineBooleanDict: + """Implements the `greater than or equal to` operator, i.e. ``>=``.""" + return self.__aux_operators(other, "$gte") + + def __gt__(self, other: Any) -> PipelineBooleanDict: + """Implements the `greater than` operator, i.e. ``>``.""" + return self.__aux_operators(other, "$gt") + + def __le__(self, other: Any) -> PipelineBooleanDict: + """Implements the `less than or equal to` operator, i.e. ``<=``.""" + return self.__aux_operators(other, "$lte") + + def __lt__(self, other: Any) -> PipelineBooleanDict: + """Implements the `less than` operator, i.e. ``<``.""" + return self.__aux_operators(other, "$lt") + + +class Pipelines(list): + """A class which defines a list of pipelines. + + Each item in the list is a dictionary with its key being the literal string ``"$match"`` and its corresponding value + being of type :class:`PipelineBooleanDict`. The ``"$match"`` key is what actually triggers the matching operation in + the MongoDB aggregation pipeline. The condition against which the matching will be performed is given by the value + which is a simply a boolean pipeline dictionary which has a hierarchical structure. + + Example: + .. code-block:: python + + pipelines = Pipelines() + pipelines += PipelineAttribute("platform_name") == "P" + pipelines += PipelineAttribute("sensor") == ["SA", "SB"] + + pipelines_literal = [ + {"$match": + {"platform_name": "P"} + }, + {"$match": + {"$or": [{"sensor": "SA"}, {"sensor": "SB"}]} + } + ] + + # The following evaluates to True + pipelines == pipelines_literal + """ + + def __iadd__(self, other: PipelineBooleanDict) -> Self: + """Implements the augmented (aka in-place) addition operator, i.e. ``+=``. + + This is similar to :func:`extend` function of a list. + """ + self.extend([{"$match": other}]) + return self + + def __add__(self, other: PipelineBooleanDict) -> Self: + """Implements the addition operator, i.e. ``+``. + + This is similar to :func:`append` function of a list. + """ + self.append({"$match": other}) + return self diff --git a/trolldb/errors/__init__.py b/trolldb/errors/__init__.py new file mode 100644 index 0000000..4542061 --- /dev/null +++ b/trolldb/errors/__init__.py @@ -0,0 +1 @@ +"""This package provides custom error classes for the pytroll-db.""" diff --git a/trolldb/errors/errors.py b/trolldb/errors/errors.py new file mode 100644 index 0000000..95fd6bf --- /dev/null +++ b/trolldb/errors/errors.py @@ -0,0 +1,281 @@ +"""The module which defines the base functionalities for errors that will be raised when using the package or the API. + +This module only includes the generic utilities using which each module should define its own error responses +specifically. See :obj:`trolldb.database.errors` as an example on how to achieve this. +""" + +from collections import OrderedDict +from sys import exit +from typing import Self + +from fastapi import Response +from fastapi.responses import PlainTextResponse +from loguru import logger + +StatusCode = int +"""An alias for the built-in ``int`` type, which is used for HTTP status codes.""" + + +def _listify(item: str | list[str]) -> list[str]: + """Encloses the given (single) string in a list or returns the same input as-is in case of a list of strings. + + Args: + item: + The item that needs to be converted to a list. + + Returns: + If the input is itself a list of strings the same list is returned as-is, otherwise, the given input + string is enclosed in ``[]`` and returned. + + Example: + .. code-block:: python + + # The following evaluate to True + __listify("test") == ["test"] + __listify(["a", "b"]) = ["a", "b"] + __listify([]) == [] + """ + return item if isinstance(item, list) else [item] + + +def _stringify(item: str | list[str], delimiter: str) -> str: + """Makes a single string out of the item(s) by delimiting them with ``delimiter``. + + Args: + item: + A string or list of strings to be delimited. + delimiter: + A string as delimiter. + + Returns: + The same input string, or in case of a list of items, a single string delimited by ``delimiter``. + """ + return delimiter.join(_listify(item)) + + +class ResponseError(Exception): + """The base class for all error responses. + + This is a derivative of the ``Exception`` class and therefore can be used directly in ``raise`` statements. + + Attributes: + __dict (``OrderedDict[StatusCode, str]``): + An ordered dictionary in which the keys are (HTTP) status codes and the values are the corresponding + messages. + """ + + descriptor_delimiter: str = " |OR| " + """A delimiter to divide the message part of several error responses which have been combined into a single one. + + This will be shown in textual format for the response descriptors of the Fast API routes. + + Example: + .. code-block:: python + + error_a = ResponseError({400: "Bad Request"}) + error_b = ResponseError({404: "Not Found"}) + errors = error_a | error_b + + # When used in a FastAPI response descriptor, + # the following string will be generated for errors + "Bad Request |OR| Not Found" + """ + + DefaultResponseClass: Response = PlainTextResponse + """The default type of the response which will be returned when an error occurs. + + This must be a valid member (class) of ``fastapi.responses``. + """ + + def __init__(self, args_dict: OrderedDict[StatusCode, str | list[str]] | dict) -> None: + """Initializes the error object given a dictionary of error (HTTP) codes (keys) and messages (values). + + Note: + The order of items will be preserved as we use an ordered dictionary to store the items internally. + + Example: + .. code-block:: python + + # The following are all valid error objects + error_a = ResponseError({400: "Bad Request"}) + error_b = ResponseError({404: "Not Found"}) + errors = error_a | error_b + errors_a_or_b = ResponseError({400: "Bad Request", 404: "Not Found"}) + errors_list = ResponseError({404: ["Not Found", "Still Not Found"]}) + """ + self.__dict: OrderedDict = OrderedDict(args_dict) + self.extra_information: dict | None = None + + def __or__(self, other: Self): + """Implements the bitwise `or` ``|`` which combines the error objects into a single error response. + + Args: + other: + Another error response of the same base type to combine with. + + Returns: + A new error response which includes the combined error response. In case of different (HTTP) status codes, + the returned response includes the ``{: }`` pairs for both ``self`` and the ``other``. + In case of the same status codes, the messages will be combined into a list. + + Example: + .. code-block:: python + + error_a = ResponseError({400: "Bad Request"}) + error_b = ResponseError({404: "Not Found"}) + error_c = ResponseError({400: "Still Bad Request"}) + + errors_combined = error_a | error_b | error_c + + # which is equivalent to the following + errors_combined_literal = ResponseError({ + 400: ["Bad Request", "Still Bad Request"], + 404: "Not Found" + } + """ + buff = OrderedDict(self.__dict) + for key, msg in other.__dict.items(): + self_msg = buff.get(key, None) + buff[key] = _listify(self_msg) if self_msg else [] + buff[key].extend(_listify(msg)) + return ResponseError(buff) + + def __retrieve_one_from_some( + self, + status_code: StatusCode | None = None) -> (StatusCode, str): + """Retrieves a tuple ``(, )`` from the internal dictionary :obj:`ResponseError.__dict`. + + Args: + status_code (Optional, default ``None``): + The status code to retrieve from the internal dictionary. In case of ``None``, the internal dictionary + must include only a single entry which will be returned. + + Returns: + The tuple of ``(, )``. + + Raises: + ValueError: + In case of ambiguity, i.e. there are multiple items in the internal dictionary and the + ``status_code`` is ``None``. + + KeyError: + When the given ``status_code`` cannot be found. + """ + match status_code, len(self.__dict): + # Ambiguity, several items in the dictionary but the status code has not been given + case None, n if n > 1: + raise ValueError("In case of multiple response status codes, the status code must be specified.") + + # The status code has been specified + case StatusCode(), n if n >= 1: + if status_code in self.__dict.keys(): + return status_code, self.__dict[status_code] + raise KeyError(f"Status code {status_code} cannot be found.") + + # The status code has not been given and there is only a single item in the dictionary + case _, 1: + return next(iter(self.__dict.items())) + + # The internal dictionary is empty and the status code is None. + case _: + return 500, "Generic Response Error" + + def get_error_details( + self, + extra_information: dict | None = None, + status_code: int | None = None) -> (StatusCode, str): + """Gets the details of the error response. + + Args: + extra_information (Optional, default ``None``): + More information (if any) that wants to be added to the message string. + status_code (Optional, default ``None``): + The status code to retrieve. This is useful when there are several error items in the internal + dictionary. In case of ``None``, the internal dictionary must include a single entry, otherwise an error + is raised. + + Returns: + A tuple, in which the first element is the status code and the second element is a single string message. + """ + status_code, msg = self.__retrieve_one_from_some(status_code) + return status_code, msg + (f" :=> {extra_information}" if extra_information else "") + + def log_as_warning( + self, + extra_information: dict | None = None, + status_code: int | None = None): + """Same as :func:`~ResponseError.get_error_details` but logs the error as a warning and returns ``None``.""" + msg, _ = self.get_error_details(extra_information, status_code) + logger.warning(msg) + + def sys_exit_log( + self, + exit_code: int = -1, + extra_information: dict | None = None, + status_code: int | None = None) -> None: + """Same as :func:`~ResponseError.get_error_details` but logs the error and calls the ``sys.exit``. + + The arguments are the same as :func:`~ResponseError.get_error_details` with the addition of ``exit_code`` + which is optional and is set to ``-1`` by default. + + Warning: + This is supposed to be done in case of non-recoverable errors, e.g. database issues. For other cases, we try + to see if we can recover and continue. + + Returns: + Does not return anything, but logs the error and exits the program. + """ + msg, _ = self.get_error_details(extra_information, status_code) + logger.error(msg) + exit(exit_code) + + @property + def fastapi_descriptor(self) -> dict[StatusCode, dict[str, str]]: + """Gets the FastAPI descriptor (dictionary) of the error items stored in :obj:`ResponseError.__dict`. + + Example: + .. code-block:: python + + error_a = ResponseError({400: "Bad Request"}) + error_b = ResponseError({404: "Not Found"}) + error_c = ResponseError({400: "Still Bad Request"}) + + errors_combined = error_a | error_b | error_c + errors_combined.fastapi_descriptor == { + 400: {"description": "Bad Request |OR| Still Bad Request"}, + 404: {"description": "Not Found"} + } + """ + return { + status: {"description": _stringify(msg, self.descriptor_delimiter)} + for status, msg in self.__dict.items() + } + + +class ResponsesErrorGroup: + """A class which groups related errors. + + This provides a base class from which actual error groups are derived. The attributes of this class are all static. + + See :obj:`trolldb.database.errors` as an example on how to achieve this. + """ + + @classmethod + def members(cls) -> dict[str, ResponseError]: + """Retrieves a dictionary of all errors which are members of the class.""" + return {k: v for k, v in cls.__dict__.items() if isinstance(v, ResponseError)} + + @classmethod + def union(cls) -> ResponseError: + """Gets the union of all member errors in the group. + + This is useful when one wants to get the FastAPI response descriptor of all members. This function utilizes + the bitwise `or` ``|`` functionality of :obj:`ResponseError`. + """ + buff = None + for v in cls.members().values(): + if buff is None: + buff = v + else: + buff |= v + return buff diff --git a/trolldb/template_config.yaml b/trolldb/template_config.yaml new file mode 100644 index 0000000..71e2370 --- /dev/null +++ b/trolldb/template_config.yaml @@ -0,0 +1,28 @@ +# This is just a template. +# For use in production, rename this file to `config.yaml` so that the actual configurations can be correctly picked up. +# In addition, adapt the values accordingly. +# Finally, to improve readability please remove the comments! + +# Required +database: + #Required + main_database_name: satellite_database + + #Required + main_collection_name: files + + #Required + url: mongodb://localhost:27017 + + #Required + timeout: 1 # seconds + + +# Required +api_server: + # Required + url: http://localhost:8000 + +# Required +subscriber: + # As per the configurations of the posttroll diff --git a/trolldb/test_utils/__init__.py b/trolldb/test_utils/__init__.py new file mode 100644 index 0000000..e1fa351 --- /dev/null +++ b/trolldb/test_utils/__init__.py @@ -0,0 +1 @@ +"""This package provide tools to test the database and api packages.""" diff --git a/trolldb/test_utils/common.py b/trolldb/test_utils/common.py new file mode 100644 index 0000000..6e3fb21 --- /dev/null +++ b/trolldb/test_utils/common.py @@ -0,0 +1,103 @@ +"""Common functionalities for testing, shared between tests and other test utility modules.""" + +from typing import Any, Optional +from urllib.parse import urljoin + +import yaml +from pydantic import AnyUrl, FilePath +from urllib3 import BaseHTTPResponse, request + +from trolldb.config.config import AppConfig + + +def make_test_app_config(subscriber_address: Optional[FilePath] = None) -> dict: + """Makes the app configuration when used in testing. + + Args: + subscriber_address: + The address of the subscriber if it is of type ``FilePath``. Otherwise, if it is ``None`` the ``subscriber`` + config will be an empty dictionary. + + Returns: + A dictionary which resembles an object of type :obj:`AppConfig`. + """ + app_config = dict( + api_server=dict( + url="http://localhost:8080" + ), + database=dict( + main_database_name="mock_database", + main_collection_name="mock_collection", + url="mongodb://localhost:28017", + timeout=1 + ), + subscriber=dict( + nameserver=False, + addresses=[f"ipc://{subscriber_address}/in.ipc"] if subscriber_address is not None else [""], + port=3000 + ) + ) + + return app_config + + +test_app_config = AppConfig(**make_test_app_config()) +"""The app configs for testing purposes assuming an empty configuration for the subscriber.""" + + +def create_config_file(config_path: FilePath) -> FilePath: + """Creates a config file for tests.""" + config_file = config_path / "config.yaml" + with open(config_file, "w") as f: + yaml.safe_dump(make_test_app_config(config_path), f) + return config_file + + +def http_get(route: str = "", root: AnyUrl = test_app_config.api_server.url) -> BaseHTTPResponse: + """An auxiliary function to make a GET request using :func:`urllib.request`. + + Args: + route: + The desired route (excluding the root URL) which can include a query string as well. + root (Optional, default :obj:`test_app_config.api_server.url`): + The root to which the given route will be added to make the complete URL. + + Returns: + The response from the GET request. + """ + return request("GET", urljoin(root.unicode_string(), route)) + + +def compare_by_operator_name(operator: str, left: Any, right: Any) -> Any: + """Compares two operands given the binary operator name in a string format. + + Args: + operator: + Any of ``["$gte", "$gt", "$lte", "$lt", "$eq"]``. + These match the MongoDB comparison operators described + `here `_. + left: + The left operand + right: + The right operand + + Returns: + The result of the comparison operation, i.e. `` ``. + + Raises: + ValueError: + If the operator name is not valid. + """ + match operator: + case "$gte": + return left >= right + case "$gt": + return left > right + case "$lte": + return left <= right + case "$lt": + return left < right + case "$eq": + return left == right + case _: + raise ValueError(f"Unknown operator: {operator}") diff --git a/trolldb/test_utils/mongodb_database.py b/trolldb/test_utils/mongodb_database.py new file mode 100644 index 0000000..d8060e9 --- /dev/null +++ b/trolldb/test_utils/mongodb_database.py @@ -0,0 +1,189 @@ +"""The module which provides testing utilities to make MongoDB databases/collections and fill them with test data.""" + +from contextlib import contextmanager +from datetime import datetime, timedelta +from random import choices, randint, shuffle +from typing import Iterator + +from pymongo import MongoClient + +from trolldb.config.config import DatabaseConfig +from trolldb.test_utils.common import test_app_config + + +@contextmanager +def mongodb_for_test_context(database_config: DatabaseConfig = test_app_config.database) -> Iterator[MongoClient]: + """A context manager for the MongoDB client given test configurations. + + Note: + This is based on `Pymongo` and not the `motor` async driver. For testing purposes this is sufficient, and we + do not need async capabilities. + + Args: + database_config (Optional, default :obj:`test_app_config.database`): + The configuration object for the database. + + Yields: + MongoClient: + The MongoDB client object (from `Pymongo`) + """ + client = None + try: + client = MongoClient(database_config.url.unicode_string(), connectTimeoutMS=database_config.timeout * 1000) + yield client + finally: + if client is not None: + client.close() + + +class Time: + """A static class to enclose functionalities for generating random time stamps.""" + + min_start_time = datetime(2019, 1, 1, 0, 0, 0) + """The minimum timestamp which is allowed to appear in our data.""" + + max_end_time = datetime(2024, 1, 1, 0, 0, 0) + """The maximum timestamp which is allowed to appear in our data.""" + + delta_time = int((max_end_time - min_start_time).total_seconds()) + """The difference between the maximum and minimum timestamps in seconds.""" + + @staticmethod + def random_interval_secs(max_interval_secs: int) -> timedelta: + """Generates a random time interval between zero and the given max interval in seconds.""" + # We suppress ruff (S311) here as we are not generating anything cryptographic here! + return timedelta(seconds=randint(0, max_interval_secs)) # noqa: S311 + + @staticmethod + def random_start_time() -> datetime: + """Generates a random start time. + + The start time has a lower bound which is specified by :obj:`~Time.min_start_time` and an upper bound given by + :obj:`~Time.max_end_time`. + """ + return Time.min_start_time + Time.random_interval_secs(Time.delta_time) + + @staticmethod + def random_end_time(start_time: datetime, max_interval_secs: int = 300) -> datetime: + """Generates a random end time. + + The end time is within ``max_interval_secs`` seconds from the given ``start_time``. By default, the interval + is set to 300 seconds (5 minutes). + """ + return start_time + Time.random_interval_secs(max_interval_secs) + + +class Document: + """A class which defines functionalities to generate database documents/data which are similar to real data.""" + + def __init__(self, platform_name: str, sensor: str) -> None: + """Initializes the document given its platform and sensor names.""" + self.platform_name = platform_name + self.sensor = sensor + self.start_time = Time.random_start_time() + self.end_time = Time.random_end_time(self.start_time) + + def generate_dataset(self, max_count: int) -> list[dict]: + """Generates the dataset for a given document. + + This corresponds to the list of files which are stored in each document. The number of datasets is randomly + chosen from 1 to ``max_count`` for each document. + """ + dataset = [] + # We suppress ruff (S311) here as we are not generating anything cryptographic here! + n = randint(1, max_count) # noqa: S311 + for i in range(n): + txt = f"{self.platform_name}_{self.sensor}_{self.start_time}_{self.end_time}_{i}" + dataset.append({ + "uri": f"/pytroll/{txt}", + "uid": f"{txt}.EXT1", + "path": f"{txt}.EXT1.EXT2" + }) + return dataset + + def like_mongodb_document(self) -> dict: + """Returns a dictionary which resembles the format we have for our real data when saving them to MongoDB.""" + return { + "platform_name": self.platform_name, + "sensor": self.sensor, + "start_time": self.start_time, + "end_time": self.end_time, + "dataset": self.generate_dataset(30) + } + + +class TestDatabase: + """A static class which encloses functionalities to prepare and fill the test database with mock data.""" + + # We suppress ruff (S311) here as we are not generating anything cryptographic here! + platform_names = choices(["PA", "PB", "PC"], k=10) # noqa: S311 + """Example platform names.""" + + # We suppress ruff (S311) here as we are not generating anything cryptographic here! + sensors = choices(["SA", "SB", "SC"], k=10) # noqa: S311 + """Example sensor names.""" + + database_names = [test_app_config.database.main_database_name, "another_mock_database"] + """List of all database names. + + The first element is the main database that will be queried by the API and includes the mock data. The second + database is for testing scenarios when one attempts to access another existing database or collection. + """ + + collection_names = [test_app_config.database.main_collection_name, "another_mock_collection"] + """List of all collection names. + + The first element is the main collection that will be queried by the API and includes the mock data. The second + collection is for testing scenarios when one attempts to access another existing collection. + """ + + all_database_names = ["admin", "config", "local", *database_names] + """All database names including the default ones which are automatically created by MongoDB.""" + + documents: list[dict] = [] + """The list of documents which include mock data.""" + + @classmethod + def generate_documents(cls, random_shuffle: bool = True) -> None: + """Generates test documents which for practical purposes resemble real data. + + Warning: + This method is not pure! The side effect is that the :obj:`TestDatabase.documents` is filled. + """ + cls.documents = [ + Document(p, s).like_mongodb_document() for p, s in zip(cls.platform_names, cls.sensors, strict=False)] + if random_shuffle: + shuffle(cls.documents) + + @classmethod + def reset(cls): + """Resets all the databases/collections. + + This is done by deleting all documents in the collections and then inserting a single empty ``{}`` document + in them. + """ + with mongodb_for_test_context() as client: + for db_name, coll_name in zip(cls.database_names, cls.collection_names, strict=False): + db = client[db_name] + collection = db[coll_name] + collection.delete_many({}) + collection.insert_one({}) + + @classmethod + def write_mock_date(cls): + """Fills databases/collections with mock data.""" + with mongodb_for_test_context() as client: + # The following function call has side effects! + cls.generate_documents() + collection = client[ + test_app_config.database.main_database_name + ][ + test_app_config.database.main_collection_name + ] + collection.insert_many(cls.documents) + + @classmethod + def prepare(cls): + """Prepares the MongoDB instance by first resetting the database and then filling it with mock data.""" + cls.reset() + cls.write_mock_date() diff --git a/trolldb/test_utils/mongodb_instance.py b/trolldb/test_utils/mongodb_instance.py new file mode 100644 index 0000000..1b16f04 --- /dev/null +++ b/trolldb/test_utils/mongodb_instance.py @@ -0,0 +1,138 @@ +"""The module which defines functionalities to run a MongoDB instance which is to be used in the testing environment.""" +import errno +import subprocess +import sys +import tempfile +import time +from contextlib import contextmanager +from os import mkdir, path +from shutil import rmtree + +from loguru import logger + +from trolldb.config.config import DatabaseConfig, Timeout +from trolldb.test_utils.common import test_app_config +from trolldb.test_utils.mongodb_database import TestDatabase + + +class TestMongoInstance: + """A static class to enclose functionalities for running a MongoDB instance.""" + + log_dir: str = tempfile.mkdtemp("__pytroll_db_temp_test_log") + """Temp directory for logging messages by the MongoDB instance. + + Warning: + The value of this attribute as shown above is just an example and will change in an unpredictable (secure) way! + """ + + storage_dir: str = tempfile.mkdtemp("__pytroll_db_temp_test_storage") + """Temp directory for storing database files by the MongoDB instance. + + Warning: + The value of this attribute as shown above is just an example and will change in an unpredictable (secure) way! + """ + + port: int = 28017 + """The port on which the instance will run. + + Warning: + This must be always hard-coded. + """ + + process: subprocess.Popen | None = None + """The process which is used to run the MongoDB instance.""" + + @classmethod + def __prepare_dir(cls, directory: str): + """An auxiliary function to prepare a single directory. + + It creates a directory if it does not exist, or removes it first if it exists and then recreates it. + """ + cls.__remove_dir(directory) + mkdir(directory) + + @classmethod + def __remove_dir(cls, directory: str): + """An auxiliary function to remove a directory and all its content recursively.""" + if path.exists(directory) and path.isdir(directory): + rmtree(directory) + + @classmethod + def run_subprocess(cls, args: list[str], wait=True): + """Runs the subprocess in shell given its arguments.""" + # We suppress ruff (S603) here as we are not receiving any args from outside, e.g. port is hard-coded. + # Therefore, sanitization of arguments is not required. + cls.process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # noqa: S603 + if wait: + outs, errs = cls.process.communicate() + return outs, errs + return None + + @classmethod + def mongodb_exists(cls) -> bool: + """Checks if ``mongod`` command exists.""" + outs, errs = cls.run_subprocess(["which", "mongod"]) + if outs and not errs: + return True + return False + + @classmethod + def prepare_dirs(cls) -> None: + """Prepares the temp directories.""" + for d in [cls.log_dir, cls.storage_dir]: + cls.__prepare_dir(d) + + @classmethod + def run_instance(cls): + """Runs the MongoDB instance and does not wait for it, i.e. the process runs in the background.""" + cls.run_subprocess( + ["mongod", "--dbpath", cls.storage_dir, "--logpath", f"{cls.log_dir}/mongod.log", "--port", f"{cls.port}"] + , wait=False) + + @classmethod + def shutdown_instance(cls): + """Shuts down the MongoDB instance by terminating its process.""" + cls.process.terminate() + cls.process.wait() + for d in [cls.log_dir, cls.storage_dir]: + cls.__remove_dir(d) + + +@contextmanager +def mongodb_instance_server_process_context( + database_config: DatabaseConfig = test_app_config.database, + startup_time: Timeout = 2): + """A synchronous context manager to run the MongoDB instance in a separate process (non-blocking). + + It uses the `subprocess `_ package. The main use case is + envisaged to be in testing environments. + + Args: + database_config: + The configuration of the database. + + startup_time: + The overall time in seconds that is expected for the MongoDB server instance to run before the database + content can be accessed. + """ + TestMongoInstance.port = database_config.url.hosts()[0]["port"] + TestMongoInstance.prepare_dirs() + + if not TestMongoInstance.mongodb_exists(): + logger.error("`mongod` is not available!") + sys.exit(errno.EIO) + + try: + TestMongoInstance.run_instance() + time.sleep(startup_time) + yield + finally: + TestMongoInstance.shutdown_instance() + + +@contextmanager +def running_prepared_database_context(): + """A synchronous context manager to start and prepare a database instance for tests.""" + with mongodb_instance_server_process_context(): + TestDatabase.prepare() + yield diff --git a/trolldb/tests/conftest.py b/trolldb/tests/conftest.py new file mode 100644 index 0000000..6a1e80c --- /dev/null +++ b/trolldb/tests/conftest.py @@ -0,0 +1,35 @@ +"""Pytest config for database tests. + +This module provides fixtures for running a Mongo DB instance in test mode and filling the database with test data. +""" + +import pytest +import pytest_asyncio + +from trolldb.api.api import api_server_process_context +from trolldb.database.mongodb import mongodb_context +from trolldb.test_utils.common import test_app_config +from trolldb.test_utils.mongodb_database import TestDatabase +from trolldb.test_utils.mongodb_instance import running_prepared_database_context + + +@pytest.fixture(scope="session") +def _run_mongodb_server_instance(): + """Encloses all tests (session scope) in a context manager of a running MongoDB instance (in a separate process).""" + with running_prepared_database_context(): + yield + + +@pytest.fixture(scope="session") +def _test_server_fixture(_run_mongodb_server_instance): + """Encloses all tests (session scope) in a context manager of a running API server (in a separate process).""" + with api_server_process_context(test_app_config, startup_time=2): + yield + + +@pytest_asyncio.fixture() +async def mongodb_fixture(_run_mongodb_server_instance): + """Fills the database with test data and then enclose each test in a mongodb context manager.""" + TestDatabase.prepare() + async with mongodb_context(test_app_config.database): + yield diff --git a/trolldb/tests/pytest.ini b/trolldb/tests/pytest.ini new file mode 100644 index 0000000..4088045 --- /dev/null +++ b/trolldb/tests/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +asyncio_mode=auto diff --git a/trolldb/tests/test_recorder.py b/trolldb/tests/test_recorder.py new file mode 100644 index 0000000..cf90da2 --- /dev/null +++ b/trolldb/tests/test_recorder.py @@ -0,0 +1,91 @@ +"""Tests for the message recording into database.""" + +import pytest +from posttroll.message import Message +from posttroll.testing import patched_subscriber_recv +from pytest_lazy_fixtures import lf + +from trolldb.cli import record_messages, record_messages_from_command_line, record_messages_from_config +from trolldb.database.mongodb import MongoDB, mongodb_context +from trolldb.test_utils.common import AppConfig, create_config_file, make_test_app_config, test_app_config +from trolldb.test_utils.mongodb_instance import running_prepared_database_context + + +@pytest.fixture() +def file_message(tmp_data_filename): + """Create a string for a file message.""" + return ('pytroll://segment/raster/L2/SAR file a001673@c20969.ad.smhi.se 2019-11-05T13:00:10.366023 v1.01 ' + 'application/json {"platform_name": "S1B", "scan_mode": "EW", "type": "GRDM", "data_source": "1SDH", ' + '"start_time": "2019-11-03T15:39:36.543000", "end_time": "2019-11-03T15:40:40.821000", "orbit_number": ' + '18765, "random_string1": "0235EA", "random_string2": "747D", "uri": ' + f'"{str(tmp_data_filename)}", "uid": "20191103_153936-s1b-ew-hh.tiff", ' + '"polarization": "hh", "sensor": "sar-c", "format": "GeoTIFF", "pass_direction": "ASCENDING"}') + + +@pytest.fixture() +def del_message(tmp_data_filename): + """Create a string for a delete message.""" + return ('pytroll://deletion del a001673@c20969.ad.smhi.se 2019-11-05T13:00:10.366023 v1.01 ' + 'application/json {"platform_name": "S1B", "scan_mode": "EW", "type": "GRDM", "data_source": "1SDH", ' + '"start_time": "2019-11-03T15:39:36.543000", "end_time": "2019-11-03T15:40:40.821000", "orbit_number": ' + '18765, "random_string1": "0235EA", "random_string2": "747D", "uri": ' + f'"{str(tmp_data_filename)}", "uid": "20191103_153936-s1b-ew-hh.tiff", ' + '"polarization": "hh", "sensor": "sar-c", "format": "GeoTIFF", "pass_direction": "ASCENDING"}') + + +@pytest.fixture() +def tmp_data_filename(tmp_path): + """Create a filename for the messages.""" + filename = "20191103_153936-s1b-ew-hh.tiff" + return tmp_path / filename + + +@pytest.fixture() +def config_file(tmp_path): + """A fixture to create a config file for the tests.""" + return create_config_file(tmp_path) + + +async def message_in_database_and_delete_count_is_one(msg) -> bool: + """Checks if there is exactly one item in the database which matches the data of the message.""" + async with mongodb_context(test_app_config.database): + collection = await MongoDB.get_collection("mock_database", "mock_collection") + result = await collection.find_one(dict(scan_mode="EW")) + result.pop("_id") + deletion_result = await collection.delete_many({"uri": msg.data["uri"]}) + return result == msg.data and deletion_result.deleted_count == 1 + + +@pytest.mark.parametrize(("function", "args"), [ + (record_messages_from_config, lf("config_file")), + (record_messages_from_command_line, [lf("config_file")]) +]) +async def test_record_from_cli_and_config(tmp_path, file_message, tmp_data_filename, function, args): + """Tests that message recording adds a message to the database either via configs from a file or the CLI.""" + msg = Message.decode(file_message) + with running_prepared_database_context(): + with patched_subscriber_recv([file_message]): + await function(args) + assert await message_in_database_and_delete_count_is_one(msg) + + +async def test_record_messages(config_file, tmp_path, file_message, tmp_data_filename): + """Tests that message recording adds a message to the database.""" + config = AppConfig(**make_test_app_config(tmp_path)) + msg = Message.decode(file_message) + with running_prepared_database_context(): + with patched_subscriber_recv([file_message]): + await record_messages(config) + assert await message_in_database_and_delete_count_is_one(msg) + + +async def test_record_deletes_message(tmp_path, file_message, del_message): + """Tests that message recording can delete a record in the database.""" + config = AppConfig(**make_test_app_config(tmp_path)) + with running_prepared_database_context(): + with patched_subscriber_recv([file_message, del_message]): + await record_messages(config) + async with mongodb_context(config.database): + collection = await MongoDB.get_collection("mock_database", "mock_collection") + result = await collection.find_one(dict(scan_mode="EW")) + assert result is None diff --git a/trolldb/tests/tests_api/test_api.py b/trolldb/tests/tests_api/test_api.py new file mode 100644 index 0000000..c721345 --- /dev/null +++ b/trolldb/tests/tests_api/test_api.py @@ -0,0 +1,81 @@ +"""Tests for the API server. + +Note: + The functionalities of the API server is not mocked! For the tests herein an actual API server will be running in a + separate process. Moreover, a MongoDB instance is run with databases which are pre-filled with random data having + similar characteristics to the real data. Actual requests will be sent to the API and the results will be asserted + against expectations. +""" + +from collections import Counter + +import pytest +from fastapi import status + +from trolldb.test_utils.common import http_get +from trolldb.test_utils.mongodb_database import TestDatabase, mongodb_for_test_context + + +def collections_exists(test_collection_names: list[str], expected_collection_name: list[str]) -> bool: + """Checks if the test and expected list of collection names match.""" + return Counter(test_collection_names) == Counter(expected_collection_name) + + +def document_ids_are_correct(test_ids: list[str], expected_ids: list[str]) -> bool: + """Checks if the test (retrieved from the API) and expected list of (document) ids match.""" + return Counter(test_ids) == Counter(expected_ids) + + +@pytest.mark.usefixtures("_test_server_fixture") +def test_root(): + """Checks that the server is up and running, i.e. the root routes responds with 200.""" + assert http_get().status == status.HTTP_200_OK + + +@pytest.mark.usefixtures("_test_server_fixture") +def test_platforms(): + """Checks that the retrieved platform names match the expected names.""" + assert set(http_get("platforms").json()) == set(TestDatabase.platform_names) + + +@pytest.mark.usefixtures("_test_server_fixture") +def test_sensors(): + """Checks that the retrieved sensor names match the expected names.""" + assert set(http_get("sensors").json()) == set(TestDatabase.sensors) + + +@pytest.mark.usefixtures("_test_server_fixture") +def test_database_names(): + """Checks that the retrieved database names match the expected names.""" + assert Counter(http_get("databases").json()) == Counter(TestDatabase.database_names) + assert Counter(http_get("databases?exclude_defaults=True").json()) == Counter(TestDatabase.database_names) + assert Counter(http_get("databases?exclude_defaults=False").json()) == Counter(TestDatabase.all_database_names) + + +@pytest.mark.usefixtures("_test_server_fixture") +def test_database_names_negative(): + """Checks that the non-existing databases cannot be found.""" + assert http_get("databases/non_existing_database").status == status.HTTP_404_NOT_FOUND + + +@pytest.mark.usefixtures("_test_server_fixture") +def test_collections(): + """Checks the presence of existing collections and that the ids of documents therein can be correctly retrieved.""" + with mongodb_for_test_context() as client: + for database_name, collection_name in zip(TestDatabase.database_names, TestDatabase.collection_names, + strict=False): + assert collections_exists( + http_get(f"databases/{database_name}").json(), + [collection_name] + ) + assert document_ids_are_correct( + http_get(f"databases/{database_name}/{collection_name}").json(), + [str(doc["_id"]) for doc in client[database_name][collection_name].find({})] + ) + + +@pytest.mark.usefixtures("_test_server_fixture") +def test_collections_negative(): + """Checks that the non-existing collections cannot be found.""" + for database_name in TestDatabase.database_names: + assert http_get(f"databases/{database_name}/non_existing_collection").status == status.HTTP_404_NOT_FOUND diff --git a/trolldb/tests/tests_database/test_mongodb.py b/trolldb/tests/tests_database/test_mongodb.py new file mode 100644 index 0000000..78958c1 --- /dev/null +++ b/trolldb/tests/tests_database/test_mongodb.py @@ -0,0 +1,91 @@ +"""Direct tests for :obj:`trolldb.database.mongodb` module without an API server connection. + +Note: + The functionalities of the MongoDB client is not mocked! For the tests herein an actual MongoDB instance will be + run. It includes databases which are pre-filled with random data having similar characteristics to the real data. + Actual calls will be made to the running MongoDB instance via the client. +""" + +import errno +import time + +import pytest +from pydantic import AnyUrl +from pymongo.errors import InvalidOperation + +from trolldb.database.mongodb import DatabaseConfig, MongoDB, mongodb_context +from trolldb.test_utils.common import test_app_config + + +async def test_connection_timeout_negative(): + """Expect to see the connection attempt times out since the MongoDB URL is invalid.""" + timeout = 3 + t1 = time.time() + with pytest.raises(SystemExit) as pytest_wrapped_e: + async with mongodb_context( + DatabaseConfig(url=AnyUrl("mongodb://invalid_url_that_does_not_exist:8000"), + timeout=timeout, main_database_name=" ", main_collection_name=" ")): + pass + t2 = time.time() + assert pytest_wrapped_e.value.code == errno.EIO + assert t2 - t1 >= timeout + + +@pytest.mark.usefixtures("_run_mongodb_server_instance") +async def test_main_database_negative(): + """Expect to fail when giving an invalid name for the main database, given a valid collection name.""" + with pytest.raises(SystemExit) as pytest_wrapped_e: + async with mongodb_context(DatabaseConfig( + timeout=1, + url=test_app_config.database.url, + main_database_name=" ", + main_collection_name=test_app_config.database.main_collection_name)): + pass + assert pytest_wrapped_e.value.code == errno.ENODATA + + +@pytest.mark.usefixtures("_run_mongodb_server_instance") +async def test_main_collection_negative(): + """Expect to fail when giving an invalid name for the main collection, given a valid database name.""" + with pytest.raises(SystemExit) as pytest_wrapped_e: + async with mongodb_context(DatabaseConfig( + timeout=1, + url=test_app_config.database.url, + main_database_name=test_app_config.database.main_database_name, + main_collection_name=" ")): + pass + assert pytest_wrapped_e.value.code == errno.ENODATA + + +async def test_get_client(mongodb_fixture): + """This is our way of testing that MongoDB.client() returns the valid client object. + + Expect: + - The `close` method can be called on the client and leads to the closure of the client + - Further attempts to access the database after closing the client fails. + """ + MongoDB.close() + with pytest.raises(InvalidOperation): + await MongoDB.list_database_names() + + +async def test_main_collection(mongodb_fixture): + """Tests the properties of the main collection. + + Expect: + - The retrieved main collection is not `None` + - It has the correct name + - It is the same object that can be accessed via the `client` object of the MongoDB. + """ + assert MongoDB.main_collection() is not None + assert MongoDB.main_collection().name == test_app_config.database.main_collection_name + assert MongoDB.main_collection() == \ + (await MongoDB.get_database(test_app_config.database.main_database_name))[ + test_app_config.database.main_collection_name] + + +async def test_main_database(mongodb_fixture): + """Same as test_main_collection but for the main database.""" + assert MongoDB.main_database() is not None + assert MongoDB.main_database().name == test_app_config.database.main_database_name + assert MongoDB.main_database() == await MongoDB.get_database(test_app_config.database.main_database_name) diff --git a/trolldb/tests/tests_database/test_pipelines.py b/trolldb/tests/tests_database/test_pipelines.py new file mode 100644 index 0000000..b993aff --- /dev/null +++ b/trolldb/tests/tests_database/test_pipelines.py @@ -0,0 +1,52 @@ +"""Tests for the pipelines and applying comparison operations on them.""" +from trolldb.database.piplines import PipelineAttribute, PipelineBooleanDict, Pipelines +from trolldb.test_utils.common import compare_by_operator_name + + +def test_pipeline_boolean_dict(): + """Checks the pipeline boolean dict for bitwise `and/or` operators.""" + pd1 = PipelineBooleanDict({"number": 2}) + pd2 = PipelineBooleanDict({"kind": 1}) + + pd_and = pd1 & pd2 + pd_and_literal = PipelineBooleanDict({"$and": [{"number": 2}, {"kind": 1}]}) + assert pd_and == pd_and_literal + + pd_or = pd1 | pd2 + pd_or_literal = PipelineBooleanDict({"$or": [{"number": 2}, {"kind": 1}]}) + assert pd_or == pd_or_literal + + +def test_pipeline_attribute(): + """Tests different comparison operators for a pipeline attribute in a list and as a single item.""" + for op in ["$eq", "$gte", "$gt", "$lte", "$lt"]: + assert ( + compare_by_operator_name(op, PipelineAttribute("letter"), "A") == + PipelineBooleanDict({"letter": {op: "A"}} if op != "$eq" else {"letter": "A"}) + ) + assert ( + compare_by_operator_name(op, PipelineAttribute("letter"), ["A", "B"]) == + PipelineBooleanDict({"$or": [ + {"letter": {op: "A"} if op != "$eq" else "A"}, + {"letter": {op: "B"} if op != "$eq" else "B"} + ]}) + ) + + +def test_pipelines(): + """Tests the elements of Pipelines.""" + pipelines = Pipelines() + pipelines += PipelineAttribute("platform_name") == "P" + pipelines += PipelineAttribute("sensor") == ["SA", "SB"] + + pipelines_literal = [ + {"$match": + {"platform_name": "P"} + }, + {"$match": + {"$or": [{"sensor": "SA"}, {"sensor": "SB"}]} + } + ] + + for p1, p2 in zip(pipelines, pipelines_literal, strict=False): + assert p1 == p2 diff --git a/trolldb/version.py b/trolldb/version.py deleted file mode 100644 index 6ca7299..0000000 --- a/trolldb/version.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -# Copyright (c) 2012, 2014, 2015 Martin Raspaud - -# Author(s): - -# Martin Raspaud - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -"""Version file. -""" - -__version__ = "0.2.0"