From 5e1b45b8f6fbbf1a37448e7c2fef83597ce878e8 Mon Sep 17 00:00:00 2001
From: Dan Birman <danbirman@gmail.com>
Date: Tue, 5 Nov 2024 13:33:23 -0800
Subject: [PATCH 1/2] feat: convenience function to return qualitycontrol
 object (#95)

* feat: initial push of function and partially functional test

* feat: adding a get_qc convenience function

* chore: increase line length

* tests: full coverage for helpers

* chore: lint

* chore: remove helpers, move data-schema to dependencies

* feat: add utility functions

* feat: cleaning up helpers, refactor to use utility functions

* refactor: moving utils files to resolve circular dependencies

* tests: full coverage for helpers.py

* fix: revert get_record_from_docdb

* chore: lint

* chore: revert flake8 line length

* refactor: re-organize to revert utils changes

* chore: fully revert utils

* refactor: re-organize

* fix: add check for missing qc

* chore: lint

* chore: LINT

line length...

* refactor: move shared resources

* refactor: rename util->helpers

* fix: cover null qc field

* tests: coverage for null

* refactor: separate name/id functions

* chore: lint, rename record_id

* chore: remove copied version

* chore: replace name

* fix: record_id -> _id in docdb.py

* fix: remove unused param

* tests: record_id -> _id

* refactor/tests/docs: various fixes

Improved naming conventions
Fixed some incorrect docstrings
Added missing test

* chore: lint

* refactor: use pathlib in tests

* chore: lint

* refactor: rename test files, use relative paths

* refactor: use logging module
---
 .flake8                                       |   2 +-
 pyproject.toml                                |   5 +-
 src/aind_data_access_api/helpers/__init__.py  |   1 +
 .../helpers/data_schema.py                    |  76 ++++++++
 src/aind_data_access_api/helpers/docdb.py     | 114 ++++++++++++
 tests/resources/helpers/quality_control.json  | 176 ++++++++++++++++++
 .../helpers/quality_control_invalid.json      |   5 +
 tests/test_helpers_data_schema.py             | 138 ++++++++++++++
 tests/test_helpers_docdb.py                   |  55 ++++++
 9 files changed, 569 insertions(+), 3 deletions(-)
 create mode 100644 src/aind_data_access_api/helpers/__init__.py
 create mode 100644 src/aind_data_access_api/helpers/data_schema.py
 create mode 100644 src/aind_data_access_api/helpers/docdb.py
 create mode 100644 tests/resources/helpers/quality_control.json
 create mode 100644 tests/resources/helpers/quality_control_invalid.json
 create mode 100644 tests/test_helpers_data_schema.py
 create mode 100644 tests/test_helpers_docdb.py

diff --git a/.flake8 b/.flake8
index 6d5ce4f..a9194bd 100644
--- a/.flake8
+++ b/.flake8
@@ -3,4 +3,4 @@ exclude =
     .git,
     __pycache__,
     build
-max-complexity = 10
+max-complexity = 10
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index e56acf6..108f2fb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,8 @@ dependencies = [
     "requests",
     "aind-codeocean-api>=0.4.0",
     "pydantic>=2.0",
-    "pydantic-settings>=2.0"
+    "pydantic-settings>=2.0",
+    "aind-data-schema",
 ]
 
 [project.optional-dependencies]
@@ -32,7 +33,7 @@ dev = [
     "isort",
     "Sphinx",
     "furo",
-    "aind-data-access-api[full]"
+    "aind-data-access-api[full]",
 ]
 secrets = [
     "boto3",
diff --git a/src/aind_data_access_api/helpers/__init__.py b/src/aind_data_access_api/helpers/__init__.py
new file mode 100644
index 0000000..1ed314d
--- /dev/null
+++ b/src/aind_data_access_api/helpers/__init__.py
@@ -0,0 +1 @@
+"""Init module"""
diff --git a/src/aind_data_access_api/helpers/data_schema.py b/src/aind_data_access_api/helpers/data_schema.py
new file mode 100644
index 0000000..875178c
--- /dev/null
+++ b/src/aind_data_access_api/helpers/data_schema.py
@@ -0,0 +1,76 @@
+"""Module for convenience functions for the data access API."""
+
+from aind_data_access_api.document_db import MetadataDbClient
+from aind_data_access_api.helpers.docdb import (
+    get_field_by_id,
+    get_id_from_name,
+)
+from aind_data_schema.core.quality_control import QualityControl
+import json
+
+
+def get_quality_control_by_id(
+    client: MetadataDbClient,
+    _id: str,
+    allow_invalid: bool = False,
+):
+    """Using a connected DocumentDB client, retrieve the QualityControl object
+    for a given record.
+
+    Parameters
+    ----------
+    client : MetadataDbClient
+        A connected DocumentDB client.
+    _id : str, optional
+        _id field in DocDB, by default empty
+    allow_invalid : bool, optional
+        return invalid QualityControl as dict if True, by default False
+    """
+    record = get_field_by_id(client, _id=_id, field="quality_control")
+    if not record:
+        raise ValueError(f"No record found with id {_id}")
+
+    if "quality_control" not in record or not record["quality_control"]:
+        raise ValueError(
+            f"No quality_control field found in record with id {_id}"
+        )
+
+    return validate_qc(record["quality_control"], allow_invalid=allow_invalid)
+
+
+def get_quality_control_by_name(
+    client: MetadataDbClient,
+    name: str,
+    allow_invalid: bool = False,
+):
+    """Using a connected DocumentDB client, retrieve the QualityControl object
+    for a given record.
+
+    Parameters
+    ----------
+    client : MetadataDbClient
+        A connected DocumentDB client.
+    name : str, optional
+        name field in DocDB, by default empty
+    allow_invalid : bool, optional
+        return invalid QualityControl as dict if True, by default False
+    """
+    _id = get_id_from_name(client, name=name)
+    if not _id:
+        raise ValueError(f"No record found with name {name}")
+
+    return get_quality_control_by_id(
+        client, _id=_id, allow_invalid=allow_invalid
+    )
+
+
+def validate_qc(qc_data: dict, allow_invalid: bool = False):
+    """Validate a quality control dict."""
+
+    try:
+        return QualityControl.model_validate_json(json.dumps(qc_data))
+    except Exception as e:
+        if allow_invalid:
+            return qc_data
+        else:
+            raise e
diff --git a/src/aind_data_access_api/helpers/docdb.py b/src/aind_data_access_api/helpers/docdb.py
new file mode 100644
index 0000000..29a6e41
--- /dev/null
+++ b/src/aind_data_access_api/helpers/docdb.py
@@ -0,0 +1,114 @@
+"""Utilities that go through the MetadataDBClient """
+
+from typing import Optional
+from aind_data_access_api.document_db import MetadataDbClient
+import logging
+
+
+def get_record_by_id(
+    client: MetadataDbClient,
+    _id: str,
+) -> Optional[dict]:
+    """Download a record from docdb using the record _id.
+
+    Parameters
+    ----------
+    client : MetadataDbClient
+    _id : str
+
+    Returns
+    -------
+    Optional[dict]
+        _description_
+    """
+    records = client.retrieve_docdb_records(filter_query={"_id": _id}, limit=1)
+    if len(records) > 0:
+        return records[0]
+    else:
+        return None
+
+
+def get_projection_by_id(
+    client: MetadataDbClient,
+    _id: str,
+    projection: dict,
+) -> Optional[dict]:
+    """
+    Download a record from docdb using the record _id and a projection.
+
+    Projections return fields set to 1 {"field": 1}
+
+    Parameters
+    ----------
+    client : MetadataDbClient
+    _id : str
+    projection : dict
+
+    Returns
+    -------
+    Optional[dict]
+        None if record does not exist. Otherwise, it will return the projected
+        record as a dict.
+    """
+    records = client.retrieve_docdb_records(
+        filter_query={"_id": _id}, projection=projection, limit=1
+    )
+    if len(records) > 0:
+        return records[0]
+    else:
+        return None
+
+
+def get_field_by_id(
+    client: MetadataDbClient,
+    _id: str,
+    field: str,
+) -> Optional[dict]:
+    """Download a single field from docdb using the record _id
+
+    Parameters
+    ----------
+    client : MetadataDbClient
+    _id : str
+    field : str
+
+    Returns
+    -------
+    Optional[dict]
+        None if a record does not exist. Otherwise returns the field in a dict.
+    """
+    return get_projection_by_id(client, _id=_id, projection={field: 1})
+
+
+def get_id_from_name(
+    client: MetadataDbClient,
+    name: str,
+) -> Optional[str]:
+    """
+    Get the _id of a record in DocDb from its name field.
+
+    Parameters
+    ----------
+    client : MetadataDbClient
+    name : str
+
+    Returns
+    -------
+    Optional[str]
+        None if record does not exist. Otherwise, it will return the _id of
+        the record.
+    """
+    records = client.retrieve_docdb_records(
+        filter_query={"name": name}, projection={"_id": 1}, limit=0
+    )
+
+    if len(records) > 1:
+        logging.warning(
+            "Multiple records share the name {name}, ",
+            "only the first record will be returned.",
+        )
+
+    if len(records) > 0:
+        return records[0]["_id"]
+    else:
+        return None
diff --git a/tests/resources/helpers/quality_control.json b/tests/resources/helpers/quality_control.json
new file mode 100644
index 0000000..83bf6aa
--- /dev/null
+++ b/tests/resources/helpers/quality_control.json
@@ -0,0 +1,176 @@
+{
+    "describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/quality_control.py",
+    "schema_version": "1.1.1",
+    "evaluations": [
+       {
+          "modality": {
+             "name": "Extracellular electrophysiology",
+             "abbreviation": "ecephys"
+          },
+          "stage": "Raw data",
+          "name": "Drift map",
+          "description": "Qualitative check that drift map shows minimal movement",
+          "metrics": [
+             {
+                "name": "Probe A drift",
+                "value": {
+                   "value": "",
+                   "options": [
+                      "Low",
+                      "Medium",
+                      "High"
+                   ],
+                   "status": [
+                      "Pass",
+                      "Fail",
+                      "Fail"
+                   ],
+                   "type": "dropdown"
+                },
+                "description": null,
+                "reference": "ecephys-drift-map",
+                "status_history": [
+                   {
+                      "evaluator": "",
+                      "status": "Pending",
+                      "timestamp": "2022-11-22T00:00:00Z"
+                   }
+                ]
+             },
+             {
+                "name": "Probe B drift",
+                "value": {
+                   "value": "",
+                   "options": [
+                      "Drift visible in entire session",
+                      "Drift visible in part of session",
+                      "Sudden movement event"
+                   ],
+                   "status": [
+                      "Fail",
+                      "Pass",
+                      "Fail"
+                   ],
+                   "type": "checkbox"
+                },
+                "description": null,
+                "reference": "ecephys-drift-map",
+                "status_history": [
+                   {
+                      "evaluator": "",
+                      "status": "Pending",
+                      "timestamp": "2022-11-22T00:00:00Z"
+                   }
+                ]
+             },
+             {
+                "name": "Probe C drift",
+                "value": "Low",
+                "description": null,
+                "reference": "ecephys-drift-map",
+                "status_history": [
+                   {
+                      "evaluator": "Automated",
+                      "status": "Pass",
+                      "timestamp": "2022-11-22T00:00:00Z"
+                   }
+                ]
+             }
+          ],
+          "notes": "",
+          "allow_failed_metrics": false
+       },
+       {
+          "modality": {
+             "name": "Behavior videos",
+             "abbreviation": "behavior-videos"
+          },
+          "stage": "Raw data",
+          "name": "Video frame count check",
+          "description": null,
+          "metrics": [
+             {
+                "name": "video_1_num_frames",
+                "value": 662,
+                "description": null,
+                "reference": null,
+                "status_history": [
+                   {
+                      "evaluator": "Automated",
+                      "status": "Pass",
+                      "timestamp": "2022-11-22T00:00:00Z"
+                   }
+                ]
+             },
+             {
+                "name": "video_2_num_frames",
+                "value": 662,
+                "description": null,
+                "reference": null,
+                "status_history": [
+                   {
+                      "evaluator": "Automated",
+                      "status": "Pass",
+                      "timestamp": "2022-11-22T00:00:00Z"
+                   }
+                ]
+             }
+          ],
+          "notes": "Pass when video_1_num_frames==video_2_num_frames",
+          "allow_failed_metrics": false
+       },
+       {
+          "modality": {
+             "name": "Extracellular electrophysiology",
+             "abbreviation": "ecephys"
+          },
+          "stage": "Raw data",
+          "name": "Probes present",
+          "description": null,
+          "metrics": [
+             {
+                "name": "ProbeA_success",
+                "value": true,
+                "description": null,
+                "reference": null,
+                "status_history": [
+                   {
+                      "evaluator": "Automated",
+                      "status": "Pass",
+                      "timestamp": "2022-11-22T00:00:00Z"
+                   }
+                ]
+             },
+             {
+                "name": "ProbeB_success",
+                "value": true,
+                "description": null,
+                "reference": null,
+                "status_history": [
+                   {
+                      "evaluator": "Automated",
+                      "status": "Pass",
+                      "timestamp": "2022-11-22T00:00:00Z"
+                   }
+                ]
+             },
+             {
+                "name": "ProbeC_success",
+                "value": true,
+                "description": null,
+                "reference": null,
+                "status_history": [
+                   {
+                      "evaluator": "Automated",
+                      "status": "Pass",
+                      "timestamp": "2022-11-22T00:00:00Z"
+                   }
+                ]
+             }
+          ],
+          "notes": null,
+          "allow_failed_metrics": false
+       }
+    ],
+    "notes": null
+ }
\ No newline at end of file
diff --git a/tests/resources/helpers/quality_control_invalid.json b/tests/resources/helpers/quality_control_invalid.json
new file mode 100644
index 0000000..54dfb52
--- /dev/null
+++ b/tests/resources/helpers/quality_control_invalid.json
@@ -0,0 +1,5 @@
+{
+    "describedBy": "https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/quality_control.py",
+    "schema_version": "1.1.1",
+    "notes": null
+ }
\ No newline at end of file
diff --git a/tests/test_helpers_data_schema.py b/tests/test_helpers_data_schema.py
new file mode 100644
index 0000000..6d29f3f
--- /dev/null
+++ b/tests/test_helpers_data_schema.py
@@ -0,0 +1,138 @@
+"""Test util.data_schema module."""
+
+from pathlib import Path
+import unittest
+import json
+from unittest.mock import MagicMock
+from aind_data_access_api.helpers.data_schema import (
+    get_quality_control_by_id,
+    get_quality_control_by_name,
+)
+from aind_data_schema.core.quality_control import QualityControl
+import os
+
+TEST_DIR = Path(os.path.dirname(os.path.realpath(__file__)))
+TEST_HELPERS_DIR = TEST_DIR / "resources" / "helpers"
+
+
+class TestUtilDataSchema(unittest.TestCase):
+    """Test methods in data schema."""
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        """Set up the class by extracting contents from example files."""
+
+        valid_path = TEST_HELPERS_DIR / "quality_control.json"
+        with valid_path.open("r") as f:
+            cls.example_quality_control = json.load(f)
+
+        invalid_path = TEST_HELPERS_DIR / "quality_control_invalid.json"
+        with invalid_path.open("r") as f:
+            cls.example_quality_control_invalid = json.load(f)
+
+    def test_get_qc_id(self):
+        """Test get_quality_control function."""
+        # Get json dict from test file
+        client = MagicMock()
+        client.retrieve_docdb_records.return_value = [
+            {"_id": "abcd", "quality_control": self.example_quality_control}
+        ]
+
+        qc = get_quality_control_by_id(client, _id="123")
+
+        self.assertEqual(
+            qc,
+            QualityControl.model_validate_json(
+                json.dumps(self.example_quality_control)
+            ),
+        )
+
+    def test_get_qc_name(self):
+        """Test get_quality_control function."""
+        # Get json dict from test file
+        client = MagicMock()
+        client.retrieve_docdb_records.return_value = [
+            {"_id": "abcd", "quality_control": self.example_quality_control}
+        ]
+
+        qc = get_quality_control_by_name(client, name="123")
+
+        self.assertEqual(
+            qc,
+            QualityControl.model_validate_json(
+                json.dumps(self.example_quality_control)
+            ),
+        )
+
+    def test_get_qc_no_record(self):
+        """Test that a value error is raised when no record exists."""
+        # Get json dict from test file
+        client = MagicMock()
+        client.retrieve_docdb_records.return_value = []
+
+        self.assertRaises(
+            ValueError, get_quality_control_by_id, client, _id="123"
+        )
+
+    def test_get_qc_invalid(self):
+        """Test that a value error is raised when qc is invalid."""
+        # Get json dict from test file
+
+        client = MagicMock()
+        client.retrieve_docdb_records.return_value = [
+            {
+                "_id": "abcd",
+                "quality_control": self.example_quality_control_invalid,
+            }
+        ]
+
+        self.assertRaises(
+            ValueError, get_quality_control_by_id, client, _id="123"
+        )
+
+    def test_get_qc_invalid_allowed(self):
+        """Test that a dict is returned when we allow invalid."""
+        # Get json dict from test file
+        client = MagicMock()
+        client.retrieve_docdb_records.return_value = [
+            {
+                "_id": "abcd",
+                "quality_control": self.example_quality_control_invalid,
+            }
+        ]
+
+        qc = get_quality_control_by_id(client, _id="123", allow_invalid=True)
+
+        self.assertEqual(qc, self.example_quality_control_invalid)
+
+    def test_get_qc_no_name(self):
+        """Test that a value error is raised when no record exists."""
+        # Get json dict from test file
+        client = MagicMock()
+        client.retrieve_docdb_records.return_value = []
+
+        self.assertRaises(
+            ValueError, get_quality_control_by_name, client, name="123"
+        )
+
+    def test_get_qc_no_qc(self):
+        """Test that a value error is raised when no qc exists."""
+        # Get json dict from test file
+        client = MagicMock()
+        client.retrieve_docdb_records.return_value = [{"_id": "abcd"}]
+
+        self.assertRaises(
+            ValueError, get_quality_control_by_id, client, _id="123"
+        )
+
+        client.retrieve_docdb_records.return_value = [
+            {"_id": "abcd", "quality_control": None}
+        ]
+
+        self.assertRaises(
+            ValueError, get_quality_control_by_id, client, _id="123"
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_helpers_docdb.py b/tests/test_helpers_docdb.py
new file mode 100644
index 0000000..9d78209
--- /dev/null
+++ b/tests/test_helpers_docdb.py
@@ -0,0 +1,55 @@
+"""Tests methods in util.docdb module"""
+
+import unittest
+from unittest.mock import MagicMock
+
+from aind_data_access_api.helpers.docdb import (
+    get_record_by_id,
+    get_id_from_name,
+    get_projection_by_id,
+    get_field_by_id,
+)
+
+
+class TestUtilDocDB(unittest.TestCase):
+    """Class to test methods in util.docdb module."""
+
+    def test_get_id_from_name(self):
+        """Tests get_id_from_name"""
+        client = MagicMock()
+        client.retrieve_docdb_records.return_value = [
+            {"_id": "abcd", "name": "123"}
+        ]
+        self.assertEqual("abcd", get_id_from_name(client, name="123"))
+
+    def test_get_record_from_docdb(self):
+        """Tests get_record_from_docdb"""
+        client = MagicMock()
+        client.retrieve_docdb_records.return_value = [{"_id": "abcd"}]
+        record = get_record_by_id(client, _id="abcd")
+        self.assertEqual({"_id": "abcd"}, record)
+
+        # test the empty case
+        client.retrieve_docdb_records.return_value = []
+        record = get_record_by_id(client, _id="abcd")
+        self.assertIsNone(record)
+
+    def test_get_projected_record_from_docdb(self):
+        """Tests get_projected_record_from_docdb"""
+        client = MagicMock()
+        client.retrieve_docdb_records.return_value = [
+            {"quality_control": {"a": 1}}
+        ]
+        record = get_projection_by_id(
+            client, _id="abcd", projection={"quality_control": 1}
+        )
+        self.assertEqual({"quality_control": {"a": 1}}, record)
+
+    def test_get_field_from_docdb(self):
+        """Tests get_field_from_docdb"""
+        client = MagicMock()
+        client.retrieve_docdb_records.return_value = [
+            {"quality_control": {"a": 1}}
+        ]
+        field = get_field_by_id(client, _id="abcd", field="quality_control")
+        self.assertEqual({"quality_control": {"a": 1}}, field)

From ea5474d4f7dd412cb10b2192b84193121ff3cf73 Mon Sep 17 00:00:00 2001
From: Helen Lin <helen.lin@alleninstitute.org>
Date: Mon, 18 Nov 2024 15:35:46 -0800
Subject: [PATCH 2/2] build: bump to version 0.16.0

---
 src/aind_data_access_api/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/aind_data_access_api/__init__.py b/src/aind_data_access_api/__init__.py
index b335a61..89fa2b1 100644
--- a/src/aind_data_access_api/__init__.py
+++ b/src/aind_data_access_api/__init__.py
@@ -1,3 +1,3 @@
 """Init package"""
 
-__version__ = "0.15.0"
+__version__ = "0.16.0"