Skip to content

Commit

Permalink
DEV-2762: optional submitter id in partial documents validation (#1068)
Browse files Browse the repository at this point in the history
Update partial validators to allow one of submitter_id and id as required fields. This is enable partial submissions work by either specifying the existing node_id or the submitter_id
  • Loading branch information
kulgan authored Jun 4, 2024
1 parent 90362b4 commit 08b5658
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 13 deletions.
1 change: 0 additions & 1 deletion src/gdcdictionary/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def get_schema_directory(local_path: Optional[str] = None) -> pathlib.Path:

# use default embedded location
with files("gdcdictionary").joinpath("schemas") as path:
logger.info(path)
return path


Expand Down
35 changes: 29 additions & 6 deletions src/gdcdictionary/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,21 @@
Partial documents are json documents for a specific type but potentially
missing some required fields. All supplied values should be valid based on
the constraints defined in the target schema. The example instance above
is a partial case document and can be partially validated using
is a partial case document and can be partially validated using.
The minimum required fields for partial documents are:
* type
* one of:
** id
** submitter_id
>>> import uuid
>>> import gdcdictionary
>>> instance = {'type': 'case', 'submitter_id': 'UNSC-1', 'disease_type': 'Not Applicable'}
>>> gdcdictionary.validate(instance, partial=True)
>>> instances = [\
{'type': 'case', 'submitter_id': 'UNSC-1', 'disease_type': 'Not Applicable'},\
{'type': 'case', 'id': str(uuid.uuid4()), 'disease_type': 'Not Applicable'}\
]
>>> gdcdictionary.validate_instances(instances, partial=True)
[]
"""

Expand Down Expand Up @@ -64,7 +74,7 @@ def is_required_field_violation(self) -> bool:

@property
def is_ignored_for_partials(self) -> bool:
return self.is_required_field_violation and "submitter_id" not in self.keys
return self.is_required_field_violation

@classmethod
def from_values(cls, schema: str, message: str, keys: List[str]) -> SchemaValidationError:
Expand Down Expand Up @@ -156,12 +166,25 @@ def validate(instance: Dict[str, Any], partial: bool = False) -> List[SchemaVali
schema="", message="'type' is a required property", keys=["type"]
)
]
validator = _get_validator(instance["type"])
schema_type = instance["type"]
if (
schema_type not in ["program", "project"]
and "submitter_id" not in instance
and "id" not in instance
):
return [
SchemaValidationError(
schema=schema_type,
message="one of ['submitter_id', 'id'] is required.",
keys=["submitter_id", "id"],
)
]
validator = _get_validator(schema_type)
if not validator:
return [
SchemaValidationError(
schema="",
message=f"specified type: {instance['type']} is not in the current data model",
message=f"specified type: {schema_type} is not in the current data model",
keys=["type"],
)
]
Expand Down
18 changes: 12 additions & 6 deletions tests/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
"""

import uuid

try:
from importlib.resources import files
except ImportError:
Expand Down Expand Up @@ -66,7 +68,8 @@ def test_validate_instances__invalid_types(partial: bool) -> None:
"""
instances = [
{"type": "species"},
{"type": "case", "python_version": "38"},
{"type": "case", "submitter_id": "unsc-0", "python_version": "38"},
{"type": "aliquot", "python_version": "38"},
{"name": "species"},
]
violations = gdcdictionary.validate_instances(instances, partial)
Expand All @@ -75,12 +78,12 @@ def test_validate_instances__invalid_types(partial: bool) -> None:
assert len(violations) > 2

unknown_types = [v for v in violations if v.schema == ""]
assert len(unknown_types) == 2
assert len(unknown_types) == 1

case_required_field_violation = next(
v for v in violations if v.schema == "case" and v.keys == ["submitter_id"]
v for v in violations if v.schema == "aliquot" and v.keys == ["submitter_id", "id"]
)
assert case_required_field_violation.message == "'submitter_id' is a required property"
assert case_required_field_violation.message == "one of ['submitter_id', 'id'] is required."

case_extra_field_violation = next(
v for v in violations if v.schema == "case" and v.keys == ["python_version"]
Expand All @@ -93,6 +96,9 @@ def test_validate_instances__invalid_types(partial: bool) -> None:

def test_partials_validation():
# example missing required fields
instance = {"type": "case", "days_to_consent": 123, "submitter_id": "UNSC-2"}
violations = gdcdictionary.validate_instances(instances=[instance], partial=True)
instances = [
{"type": "case", "days_to_consent": 123, "submitter_id": "UNSC-2"},
{"type": "case", "days_to_consent": 123, "id": str(uuid.uuid4())},
]
violations = gdcdictionary.validate_instances(instances, partial=True)
assert len(violations) == 0

0 comments on commit 08b5658

Please sign in to comment.