Skip to content

Commit

Permalink
Merge pull request #89 from AllenNeuralDynamics/release-v0.15.0
Browse files Browse the repository at this point in the history
Release v0.15.0
  • Loading branch information
yosefmaru authored Sep 5, 2024
2 parents a7ed9b4 + b09daae commit 47c0fdc
Show file tree
Hide file tree
Showing 7 changed files with 228 additions and 4 deletions.
4 changes: 4 additions & 0 deletions docs/source/ExamplesDocDBDirectConnection.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ Aggregation Example 1: Get all subjects per breeding group
"$group": {
"_id": "$subject.breeding_info.breeding_group",
"subject_ids": {"$addToSet": "$subject.subject_id"},
"count": {"$sum": 1},
}
}
]
Expand All @@ -117,6 +118,9 @@ Aggregation Example 1: Get all subjects per breeding group
print(f"First 3 breeding groups and corresponding subjects:")
print(json.dumps(result[:3], indent=3))
For more info about aggregations, please see MongoDB documentation:
https://www.mongodb.com/docs/manual/aggregation/


Updating Metadata
~~~~~~~~~~~~~~~~~~~~~~
Expand Down
123 changes: 123 additions & 0 deletions docs/source/ExamplesDocDBRestApi.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
Examples - DocDB REST API
==================================

This page provides examples for interact with the Document Database (DocDB)
REST API using the provided Python client.


Querying Metadata
~~~~~~~~~~~~~~~~~~~~~~

Count Example 1: Get # of records with a certain subject_id
-----------------------------------------------------------

.. code:: python
import json
from aind_data_access_api.document_db import MetadataDbClient
API_GATEWAY_HOST = "api.allenneuraldynamics.org"
DATABASE = "metadata_index"
COLLECTION = "data_assets"
docdb_api_client = MetadataDbClient(
host=API_GATEWAY_HOST,
database=DATABASE,
collection=COLLECTION,
)
filter = {"subject.subject_id": "689418"}
count = docdb_api_client._count_records(
filter_query=filter,
)
print(count)
Filter Example 1: Get records with a certain subject_id
-------------------------------------------------------

.. code:: python
filter = {"subject.subject_id": "689418"}
records = docdb_api_client.retrieve_docdb_records(
filter_query=filter,
)
print(json.dumps(records, indent=3))
With projection (recommended):

.. code:: python
filter = {"subject.subject_id": "689418"}
projection = {
"name": 1,
"created": 1,
"location": 1,
"subject.subject_id": 1,
"subject.date_of_birth": 1,
}
records = docdb_api_client.retrieve_docdb_records(
filter_query=filter,
projection=projection,
)
print(json.dumps(records, indent=3))
Filter Example 2: Get records with a certain breeding group
-----------------------------------------------------------

.. code:: python
filter = {
"subject.breeding_info.breeding_group": "Chat-IRES-Cre_Jax006410"
}
records = docdb_api_client.retrieve_docdb_records(
filter_query=filter
)
print(json.dumps(records, indent=3))
With projection (recommended):

.. code:: python
filter = {
"subject.breeding_info.breeding_group": "Chat-IRES-Cre_Jax006410"
}
projection = {
"name": 1,
"created": 1,
"location": 1,
"subject.subject_id": 1,
"subject.breeding_info.breeding_group": 1,
}
records = docdb_api_client.retrieve_docdb_records(
filter_query=filter,
projection=projection,
)
print(json.dumps(records, indent=3))
Aggregation Example 1: Get all subjects per breeding group
----------------------------------------------------------

.. code:: python
agg_pipeline = [
{
"$group": {
"_id": "$subject.breeding_info.breeding_group",
"subject_ids": {"$addToSet": "$subject.subject_id"},
"count": {"$sum": 1},
}
}
]
result = docdb_api_client.aggregate_docdb_records(
pipeline=agg_pipeline
)
print(f"Total breeding groups: {len(result)}")
print(f"First 3 breeding groups and corresponding subjects:")
print(json.dumps(result[:3], indent=3))
For more info about aggregations, please see MongoDB documentation:
https://www.mongodb.com/docs/manual/aggregation/
2 changes: 1 addition & 1 deletion docs/source/UserGuide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ REST API (Read-Only)
URL = "https://api.allenneuraldynamics.org/v1/metadata_index/data_assets"
filter = {"subject.subject_id": "123456"}
limit = 1000
limit = 500
response = requests.get(URL, params={"filter": json.dumps(filter), "limit": limit})
print(response.json())
Expand Down
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Welcome to this repository's documentation!
:caption: Contents:

UserGuide
ExamplesDocDBRestApi
ExamplesDocDBDirectConnection
Contributing
modules
Expand Down
2 changes: 1 addition & 1 deletion src/aind_data_access_api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Init package"""

__version__ = "0.14.0"
__version__ = "0.15.0"
28 changes: 26 additions & 2 deletions src/aind_data_access_api/document_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ def _base_url(self):
f"{self.collection}"
)

@property
def _aggregate_url(self):
"""Url to aggregate records."""
return (
f"https://{self.host}/{self.version}/{self.database}/"
f"{self.collection}/aggregate"
)

@property
def _update_one_url(self):
"""Url to update one record"""
Expand Down Expand Up @@ -178,6 +186,18 @@ def _get_records(
response_body = response.json()
return response_body

def _aggregate_records(self, pipeline: List[dict]) -> List[dict]:
"""Aggregate records from collection using an aggregation pipeline."""
# Do not need to sign request since API supports readonly aggregations
response = requests.post(url=self._aggregate_url, json=pipeline)
if response.status_code != 200:
error_msg = response.text if response.text else "Unknown error"
raise ValueError(f"{response.status_code} Error: {error_msg}")
if not response.content:
raise ValueError("No payload in response")
response_body = response.json()
return response_body

def _upsert_one_record(
self, record_filter: dict, update: dict
) -> Response:
Expand Down Expand Up @@ -242,7 +262,7 @@ def retrieve_docdb_records(
sort: Optional[dict] = None,
limit: int = 0,
paginate: bool = True,
paginate_batch_size: int = 1000,
paginate_batch_size: int = 500,
paginate_max_iterations: int = 20000,
) -> List[dict]:
"""
Expand All @@ -263,7 +283,7 @@ def retrieve_docdb_records(
be faster to set to false if the number of records expected to be
returned is small.
paginate_batch_size : int
Number of records to return at a time. Default is 1000.
Number of records to return at a time. Default is 500.
paginate_max_iterations : int
Max number of iterations to run to prevent indefinite calls to the
API Gateway. Default is 20000.
Expand Down Expand Up @@ -324,6 +344,10 @@ def retrieve_docdb_records(
)
return records

def aggregate_docdb_records(self, pipeline: List[dict]) -> List[dict]:
"""Aggregate records using an aggregation pipeline."""
return self._aggregate_records(pipeline=pipeline)

# TODO: remove this method
def retrieve_data_asset_records(
self,
Expand Down
72 changes: 72 additions & 0 deletions tests/test_document_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,57 @@ def test_get_records_error(self, mock_get: MagicMock):
"ValueError('No payload in response')", repr(e.exception)
)

@patch("requests.post")
def test_aggregate_records(self, mock_post: MagicMock):
"""Tests _aggregate_records method"""
pipeline = [{"$match": {"_id": "abc123"}}]
client = Client(**self.example_client_args)
mock_response = Response()
mock_response.status_code = 200
mock_response._content = json.dumps(
[{"_id": "abc123", "message": "hi"}]
).encode("utf-8")

mock_post.return_value = mock_response
result = client._aggregate_records(pipeline=pipeline)
self.assertEqual(
[{"_id": "abc123", "message": "hi"}],
result,
)

@patch("requests.post")
def test_aggregate_records_error(self, mock_post: MagicMock):
"""Tests _aggregate_records method when there is an HTTP error or
no payload in response"""
invalid_pipeline = [{"$match_invalid": {"_id": "abc123"}}]
client = Client(**self.example_client_args)
mock_response1 = Response()
mock_response1.status_code = 400
mock_error = {
"error": {
"name": "MongoServerError",
"message": (
"Unrecognized pipeline stage name: '$match_invalid'"
),
}
}
mock_response1._content = json.dumps(mock_error).encode("utf-8")
mock_response2 = Response()
mock_response2.status_code = 200
mock_response2._content = None
mock_post.side_effect = [mock_response1, mock_response2]
with self.assertRaises(ValueError) as e:
client._aggregate_records(pipeline=invalid_pipeline)
self.assertEqual(
f"400 Error: {json.dumps(mock_error)}",
str(e.exception),
)
with self.assertRaises(ValueError) as e:
client._aggregate_records(pipeline=invalid_pipeline)
self.assertEqual(
"ValueError('No payload in response')", repr(e.exception)
)

@patch("boto3.session.Session")
@patch("botocore.auth.SigV4Auth.add_auth")
@patch("requests.post")
Expand Down Expand Up @@ -456,6 +507,27 @@ def test_retrieve_many_data_asset_records(
)
self.assertEqual(expected_response, list(records))

@patch("aind_data_access_api.document_db.Client._aggregate_records")
def test_aggregate_docdb_records(self, mock_aggregate: MagicMock):
"""Tests aggregating docdb records"""
expected_result = [
{
"_id": "abc-123",
"name": "modal_00000_2000-10-10_10-10-10",
"created": datetime(2000, 10, 10, 10, 10, 10),
"location": "some_url",
"subject": {"subject_id": "00000", "sex": "Female"},
}
]
client = MetadataDbClient(**self.example_client_args)
mock_aggregate.return_value = expected_result
pipeline = [{"$match": {"_id": "abc-123"}}]
result = client.aggregate_docdb_records(pipeline)
self.assertEqual(result, expected_result)
mock_aggregate.assert_called_once_with(
pipeline=pipeline,
)

@patch("aind_data_access_api.document_db.Client._upsert_one_record")
def test_upsert_one_docdb_record(self, mock_upsert: MagicMock):
"""Tests upserting one docdb record"""
Expand Down

0 comments on commit 47c0fdc

Please sign in to comment.