From 5e1b45b8f6fbbf1a37448e7c2fef83597ce878e8 Mon Sep 17 00:00:00 2001 From: Dan Birman Date: Tue, 5 Nov 2024 13:33:23 -0800 Subject: [PATCH 1/2] feat: convenience function to return qualitycontrol object (#95) * feat: initial push of function and partially functional test * feat: adding a get_qc convenience function * chore: increase line length * tests: full coverage for helpers * chore: lint * chore: remove helpers, move data-schema to dependencies * feat: add utility functions * feat: cleaning up helpers, refactor to use utility functions * refactor: moving utils files to resolve circular dependencies * tests: full coverage for helpers.py * fix: revert get_record_from_docdb * chore: lint * chore: revert flake8 line length * refactor: re-organize to revert utils changes * chore: fully revert utils * refactor: re-organize * fix: add check for missing qc * chore: lint * chore: LINT line length... * refactor: move shared resources * refactor: rename util->helpers * fix: cover null qc field * tests: coverage for null * refactor: separate name/id functions * chore: lint, rename record_id * chore: remove copied version * chore: replace name * fix: record_id -> _id in docdb.py * fix: remove unused param * tests: record_id -> _id * refactor/tests/docs: various fixes Improved naming conventions Fixed some incorrect docstrings Added missing test * chore: lint * refactor: use pathlib in tests * chore: lint * refactor: rename test files, use relative paths * refactor: use logging module --- .flake8 | 2 +- pyproject.toml | 5 +- src/aind_data_access_api/helpers/__init__.py | 1 + .../helpers/data_schema.py | 76 ++++++++ src/aind_data_access_api/helpers/docdb.py | 114 ++++++++++++ tests/resources/helpers/quality_control.json | 176 ++++++++++++++++++ .../helpers/quality_control_invalid.json | 5 + tests/test_helpers_data_schema.py | 138 ++++++++++++++ tests/test_helpers_docdb.py | 55 ++++++ 9 files changed, 569 insertions(+), 3 deletions(-) create mode 100644 src/aind_data_access_api/helpers/__init__.py create mode 100644 src/aind_data_access_api/helpers/data_schema.py create mode 100644 src/aind_data_access_api/helpers/docdb.py create mode 100644 tests/resources/helpers/quality_control.json create mode 100644 tests/resources/helpers/quality_control_invalid.json create mode 100644 tests/test_helpers_data_schema.py create mode 100644 tests/test_helpers_docdb.py diff --git a/.flake8 b/.flake8 index 6d5ce4f..a9194bd 100644 --- a/.flake8 +++ b/.flake8 @@ -3,4 +3,4 @@ exclude = .git, __pycache__, build -max-complexity = 10 +max-complexity = 10 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e56acf6..108f2fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,8 @@ dependencies = [ "requests", "aind-codeocean-api>=0.4.0", "pydantic>=2.0", - "pydantic-settings>=2.0" + "pydantic-settings>=2.0", + "aind-data-schema", ] [project.optional-dependencies] @@ -32,7 +33,7 @@ dev = [ "isort", "Sphinx", "furo", - "aind-data-access-api[full]" + "aind-data-access-api[full]", ] secrets = [ "boto3", diff --git a/src/aind_data_access_api/helpers/__init__.py b/src/aind_data_access_api/helpers/__init__.py new file mode 100644 index 0000000..1ed314d --- /dev/null +++ b/src/aind_data_access_api/helpers/__init__.py @@ -0,0 +1 @@ +"""Init module""" diff --git a/src/aind_data_access_api/helpers/data_schema.py b/src/aind_data_access_api/helpers/data_schema.py new file mode 100644 index 0000000..875178c --- /dev/null +++ b/src/aind_data_access_api/helpers/data_schema.py @@ -0,0 +1,76 @@ +"""Module for convenience functions for the data access API.""" + +from aind_data_access_api.document_db import MetadataDbClient +from aind_data_access_api.helpers.docdb import ( + get_field_by_id, + get_id_from_name, +) +from aind_data_schema.core.quality_control import QualityControl +import json + + +def get_quality_control_by_id( + client: MetadataDbClient, + _id: str, + allow_invalid: bool = False, +): + """Using a connected DocumentDB client, retrieve the QualityControl object + for a given record. + + Parameters + ---------- + client : MetadataDbClient + A connected DocumentDB client. + _id : str, optional + _id field in DocDB, by default empty + allow_invalid : bool, optional + return invalid QualityControl as dict if True, by default False + """ + record = get_field_by_id(client, _id=_id, field="quality_control") + if not record: + raise ValueError(f"No record found with id {_id}") + + if "quality_control" not in record or not record["quality_control"]: + raise ValueError( + f"No quality_control field found in record with id {_id}" + ) + + return validate_qc(record["quality_control"], allow_invalid=allow_invalid) + + +def get_quality_control_by_name( + client: MetadataDbClient, + name: str, + allow_invalid: bool = False, +): + """Using a connected DocumentDB client, retrieve the QualityControl object + for a given record. + + Parameters + ---------- + client : MetadataDbClient + A connected DocumentDB client. + name : str, optional + name field in DocDB, by default empty + allow_invalid : bool, optional + return invalid QualityControl as dict if True, by default False + """ + _id = get_id_from_name(client, name=name) + if not _id: + raise ValueError(f"No record found with name {name}") + + return get_quality_control_by_id( + client, _id=_id, allow_invalid=allow_invalid + ) + + +def validate_qc(qc_data: dict, allow_invalid: bool = False): + """Validate a quality control dict.""" + + try: + return QualityControl.model_validate_json(json.dumps(qc_data)) + except Exception as e: + if allow_invalid: + return qc_data + else: + raise e diff --git a/src/aind_data_access_api/helpers/docdb.py b/src/aind_data_access_api/helpers/docdb.py new file mode 100644 index 0000000..29a6e41 --- /dev/null +++ b/src/aind_data_access_api/helpers/docdb.py @@ -0,0 +1,114 @@ +"""Utilities that go through the MetadataDBClient """ + +from typing import Optional +from aind_data_access_api.document_db import MetadataDbClient +import logging + + +def get_record_by_id( + client: MetadataDbClient, + _id: str, +) -> Optional[dict]: + """Download a record from docdb using the record _id. + + Parameters + ---------- + client : MetadataDbClient + _id : str + + Returns + ------- + Optional[dict] + _description_ + """ + records = client.retrieve_docdb_records(filter_query={"_id": _id}, limit=1) + if len(records) > 0: + return records[0] + else: + return None + + +def get_projection_by_id( + client: MetadataDbClient, + _id: str, + projection: dict, +) -> Optional[dict]: + """ + Download a record from docdb using the record _id and a projection. + + Projections return fields set to 1 {"field": 1} + + Parameters + ---------- + client : MetadataDbClient + _id : str + projection : dict + + Returns + ------- + Optional[dict] + None if record does not exist. Otherwise, it will return the projected + record as a dict. + """ + records = client.retrieve_docdb_records( + filter_query={"_id": _id}, projection=projection, limit=1 + ) + if len(records) > 0: + return records[0] + else: + return None + + +def get_field_by_id( + client: MetadataDbClient, + _id: str, + field: str, +) -> Optional[dict]: + """Download a single field from docdb using the record _id + + Parameters + ---------- + client : MetadataDbClient + _id : str + field : str + + Returns + ------- + Optional[dict] + None if a record does not exist. Otherwise returns the field in a dict. + """ + return get_projection_by_id(client, _id=_id, projection={field: 1}) + + +def get_id_from_name( + client: MetadataDbClient, + name: str, +) -> Optional[str]: + """ + Get the _id of a record in DocDb from its name field. + + Parameters + ---------- + client : MetadataDbClient + name : str + + Returns + ------- + Optional[str] + None if record does not exist. Otherwise, it will return the _id of + the record. + """ + records = client.retrieve_docdb_records( + filter_query={"name": name}, projection={"_id": 1}, limit=0 + ) + + if len(records) > 1: + logging.warning( + "Multiple records share the name {name}, ", + "only the first record will be returned.", + ) + + if len(records) > 0: + return records[0]["_id"] + else: + return None diff --git a/tests/resources/helpers/quality_control.json b/tests/resources/helpers/quality_control.json new file mode 100644 index 0000000..83bf6aa --- /dev/null +++ b/tests/resources/helpers/quality_control.json @@ -0,0 +1,176 @@ +{ + "describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/quality_control.py", + "schema_version": "1.1.1", + "evaluations": [ + { + "modality": { + "name": "Extracellular electrophysiology", + "abbreviation": "ecephys" + }, + "stage": "Raw data", + "name": "Drift map", + "description": "Qualitative check that drift map shows minimal movement", + "metrics": [ + { + "name": "Probe A drift", + "value": { + "value": "", + "options": [ + "Low", + "Medium", + "High" + ], + "status": [ + "Pass", + "Fail", + "Fail" + ], + "type": "dropdown" + }, + "description": null, + "reference": "ecephys-drift-map", + "status_history": [ + { + "evaluator": "", + "status": "Pending", + "timestamp": "2022-11-22T00:00:00Z" + } + ] + }, + { + "name": "Probe B drift", + "value": { + "value": "", + "options": [ + "Drift visible in entire session", + "Drift visible in part of session", + "Sudden movement event" + ], + "status": [ + "Fail", + "Pass", + "Fail" + ], + "type": "checkbox" + }, + "description": null, + "reference": "ecephys-drift-map", + "status_history": [ + { + "evaluator": "", + "status": "Pending", + "timestamp": "2022-11-22T00:00:00Z" + } + ] + }, + { + "name": "Probe C drift", + "value": "Low", + "description": null, + "reference": "ecephys-drift-map", + "status_history": [ + { + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ] + } + ], + "notes": "", + "allow_failed_metrics": false + }, + { + "modality": { + "name": "Behavior videos", + "abbreviation": "behavior-videos" + }, + "stage": "Raw data", + "name": "Video frame count check", + "description": null, + "metrics": [ + { + "name": "video_1_num_frames", + "value": 662, + "description": null, + "reference": null, + "status_history": [ + { + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ] + }, + { + "name": "video_2_num_frames", + "value": 662, + "description": null, + "reference": null, + "status_history": [ + { + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ] + } + ], + "notes": "Pass when video_1_num_frames==video_2_num_frames", + "allow_failed_metrics": false + }, + { + "modality": { + "name": "Extracellular electrophysiology", + "abbreviation": "ecephys" + }, + "stage": "Raw data", + "name": "Probes present", + "description": null, + "metrics": [ + { + "name": "ProbeA_success", + "value": true, + "description": null, + "reference": null, + "status_history": [ + { + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ] + }, + { + "name": "ProbeB_success", + "value": true, + "description": null, + "reference": null, + "status_history": [ + { + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ] + }, + { + "name": "ProbeC_success", + "value": true, + "description": null, + "reference": null, + "status_history": [ + { + "evaluator": "Automated", + "status": "Pass", + "timestamp": "2022-11-22T00:00:00Z" + } + ] + } + ], + "notes": null, + "allow_failed_metrics": false + } + ], + "notes": null + } \ No newline at end of file diff --git a/tests/resources/helpers/quality_control_invalid.json b/tests/resources/helpers/quality_control_invalid.json new file mode 100644 index 0000000..54dfb52 --- /dev/null +++ b/tests/resources/helpers/quality_control_invalid.json @@ -0,0 +1,5 @@ +{ + "describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/quality_control.py", + "schema_version": "1.1.1", + "notes": null + } \ No newline at end of file diff --git a/tests/test_helpers_data_schema.py b/tests/test_helpers_data_schema.py new file mode 100644 index 0000000..6d29f3f --- /dev/null +++ b/tests/test_helpers_data_schema.py @@ -0,0 +1,138 @@ +"""Test util.data_schema module.""" + +from pathlib import Path +import unittest +import json +from unittest.mock import MagicMock +from aind_data_access_api.helpers.data_schema import ( + get_quality_control_by_id, + get_quality_control_by_name, +) +from aind_data_schema.core.quality_control import QualityControl +import os + +TEST_DIR = Path(os.path.dirname(os.path.realpath(__file__))) +TEST_HELPERS_DIR = TEST_DIR / "resources" / "helpers" + + +class TestUtilDataSchema(unittest.TestCase): + """Test methods in data schema.""" + + @classmethod + def setUpClass(cls) -> None: + """Set up the class by extracting contents from example files.""" + + valid_path = TEST_HELPERS_DIR / "quality_control.json" + with valid_path.open("r") as f: + cls.example_quality_control = json.load(f) + + invalid_path = TEST_HELPERS_DIR / "quality_control_invalid.json" + with invalid_path.open("r") as f: + cls.example_quality_control_invalid = json.load(f) + + def test_get_qc_id(self): + """Test get_quality_control function.""" + # Get json dict from test file + client = MagicMock() + client.retrieve_docdb_records.return_value = [ + {"_id": "abcd", "quality_control": self.example_quality_control} + ] + + qc = get_quality_control_by_id(client, _id="123") + + self.assertEqual( + qc, + QualityControl.model_validate_json( + json.dumps(self.example_quality_control) + ), + ) + + def test_get_qc_name(self): + """Test get_quality_control function.""" + # Get json dict from test file + client = MagicMock() + client.retrieve_docdb_records.return_value = [ + {"_id": "abcd", "quality_control": self.example_quality_control} + ] + + qc = get_quality_control_by_name(client, name="123") + + self.assertEqual( + qc, + QualityControl.model_validate_json( + json.dumps(self.example_quality_control) + ), + ) + + def test_get_qc_no_record(self): + """Test that a value error is raised when no record exists.""" + # Get json dict from test file + client = MagicMock() + client.retrieve_docdb_records.return_value = [] + + self.assertRaises( + ValueError, get_quality_control_by_id, client, _id="123" + ) + + def test_get_qc_invalid(self): + """Test that a value error is raised when qc is invalid.""" + # Get json dict from test file + + client = MagicMock() + client.retrieve_docdb_records.return_value = [ + { + "_id": "abcd", + "quality_control": self.example_quality_control_invalid, + } + ] + + self.assertRaises( + ValueError, get_quality_control_by_id, client, _id="123" + ) + + def test_get_qc_invalid_allowed(self): + """Test that a dict is returned when we allow invalid.""" + # Get json dict from test file + client = MagicMock() + client.retrieve_docdb_records.return_value = [ + { + "_id": "abcd", + "quality_control": self.example_quality_control_invalid, + } + ] + + qc = get_quality_control_by_id(client, _id="123", allow_invalid=True) + + self.assertEqual(qc, self.example_quality_control_invalid) + + def test_get_qc_no_name(self): + """Test that a value error is raised when no record exists.""" + # Get json dict from test file + client = MagicMock() + client.retrieve_docdb_records.return_value = [] + + self.assertRaises( + ValueError, get_quality_control_by_name, client, name="123" + ) + + def test_get_qc_no_qc(self): + """Test that a value error is raised when no qc exists.""" + # Get json dict from test file + client = MagicMock() + client.retrieve_docdb_records.return_value = [{"_id": "abcd"}] + + self.assertRaises( + ValueError, get_quality_control_by_id, client, _id="123" + ) + + client.retrieve_docdb_records.return_value = [ + {"_id": "abcd", "quality_control": None} + ] + + self.assertRaises( + ValueError, get_quality_control_by_id, client, _id="123" + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_helpers_docdb.py b/tests/test_helpers_docdb.py new file mode 100644 index 0000000..9d78209 --- /dev/null +++ b/tests/test_helpers_docdb.py @@ -0,0 +1,55 @@ +"""Tests methods in util.docdb module""" + +import unittest +from unittest.mock import MagicMock + +from aind_data_access_api.helpers.docdb import ( + get_record_by_id, + get_id_from_name, + get_projection_by_id, + get_field_by_id, +) + + +class TestUtilDocDB(unittest.TestCase): + """Class to test methods in util.docdb module.""" + + def test_get_id_from_name(self): + """Tests get_id_from_name""" + client = MagicMock() + client.retrieve_docdb_records.return_value = [ + {"_id": "abcd", "name": "123"} + ] + self.assertEqual("abcd", get_id_from_name(client, name="123")) + + def test_get_record_from_docdb(self): + """Tests get_record_from_docdb""" + client = MagicMock() + client.retrieve_docdb_records.return_value = [{"_id": "abcd"}] + record = get_record_by_id(client, _id="abcd") + self.assertEqual({"_id": "abcd"}, record) + + # test the empty case + client.retrieve_docdb_records.return_value = [] + record = get_record_by_id(client, _id="abcd") + self.assertIsNone(record) + + def test_get_projected_record_from_docdb(self): + """Tests get_projected_record_from_docdb""" + client = MagicMock() + client.retrieve_docdb_records.return_value = [ + {"quality_control": {"a": 1}} + ] + record = get_projection_by_id( + client, _id="abcd", projection={"quality_control": 1} + ) + self.assertEqual({"quality_control": {"a": 1}}, record) + + def test_get_field_from_docdb(self): + """Tests get_field_from_docdb""" + client = MagicMock() + client.retrieve_docdb_records.return_value = [ + {"quality_control": {"a": 1}} + ] + field = get_field_by_id(client, _id="abcd", field="quality_control") + self.assertEqual({"quality_control": {"a": 1}}, field) From ea5474d4f7dd412cb10b2192b84193121ff3cf73 Mon Sep 17 00:00:00 2001 From: Helen Lin Date: Mon, 18 Nov 2024 15:35:46 -0800 Subject: [PATCH 2/2] build: bump to version 0.16.0 --- src/aind_data_access_api/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aind_data_access_api/__init__.py b/src/aind_data_access_api/__init__.py index b335a61..89fa2b1 100644 --- a/src/aind_data_access_api/__init__.py +++ b/src/aind_data_access_api/__init__.py @@ -1,3 +1,3 @@ """Init package""" -__version__ = "0.15.0" +__version__ = "0.16.0"