Skip to content

Commit

Permalink
Merge pull request #63 from scaleapi/da/validation_serialization
Browse files Browse the repository at this point in the history
Da/validation serialization
  • Loading branch information
ardila authored Apr 26, 2021
2 parents 66d2b49 + c4815d3 commit 40f9531
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 16 deletions.
37 changes: 24 additions & 13 deletions nucleus/annotation.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
import json
from dataclasses import dataclass
from enum import Enum
from typing import Dict, Optional, Any, Union, List
from typing import Any, Dict, List, Optional, Union

from .constants import (
ANNOTATION_ID_KEY,
ANNOTATIONS_KEY,
BOX_TYPE,
DATASET_ITEM_ID_KEY,
REFERENCE_ID_KEY,
METADATA_KEY,
X_KEY,
Y_KEY,
WIDTH_KEY,
HEIGHT_KEY,
GEOMETRY_KEY,
BOX_TYPE,
POLYGON_TYPE,
HEIGHT_KEY,
INDEX_KEY,
ITEM_ID_KEY,
LABEL_KEY,
MASK_URL_KEY,
METADATA_KEY,
POLYGON_TYPE,
REFERENCE_ID_KEY,
TYPE_KEY,
VERTICES_KEY,
ITEM_ID_KEY,
MASK_URL_KEY,
INDEX_KEY,
ANNOTATIONS_KEY,
WIDTH_KEY,
X_KEY,
Y_KEY,
)


Expand All @@ -42,6 +44,15 @@ def from_json(cls, payload: dict):
else:
return SegmentationAnnotation.from_json(payload)

def to_payload(self):
raise NotImplementedError(
"For serialization, use a specific subclass (i.e. SegmentationAnnotation), "
"not the base annotation class."
)

def to_json(self) -> str:
return json.dumps(self.to_payload())


@dataclass
class Segment:
Expand Down
11 changes: 8 additions & 3 deletions nucleus/dataset_item.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from dataclasses import dataclass
import json
import os.path
from dataclasses import dataclass
from typing import Optional

from .constants import (
DATASET_ITEM_ID_KEY,
IMAGE_URL_KEY,
METADATA_KEY,
REFERENCE_ID_KEY,
ORIGINAL_IMAGE_URL_KEY,
DATASET_ITEM_ID_KEY,
REFERENCE_ID_KEY,
)


Expand Down Expand Up @@ -51,3 +53,6 @@ def to_payload(self) -> dict:
if self.item_id:
payload[DATASET_ITEM_ID_KEY] = self.item_id
return payload

def to_json(self) -> str:
return json.dumps(self.to_payload())
22 changes: 22 additions & 0 deletions nucleus/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,25 @@ def format_dataset_item_response(response: dict) -> dict:
ITEM_KEY: DatasetItem.from_json(item),
ANNOTATIONS_KEY: annotation_response,
}


def serialize_and_write(
upload_unit: List[Union[DatasetItem, Annotation]], file_pointer
):
for unit in upload_unit:
try:
file_pointer.write(unit.to_json())
except TypeError as e:
type_name = type(unit).__name__
message = (
f"The following {type_name} could not be serialized: {unit}\n"
)
message += (
"This is usally an issue with a custom python object being "
"present in the metadata. Please inspect this error and adjust the "
"metadata so it is json-serializable: only python primitives such as "
"strings, ints, floats, lists, and dicts. For example, you must "
"convert numpy arrays into list or lists of lists.\n"
)
message += f"The specific error was {e}"
raise ValueError(message) from e
39 changes: 39 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import pytest
from nucleus import DatasetItem
from nucleus import utils

import io


class TestNonSerializableObject:
def weird_function():
print("can't touch this. Dun dun dun dun.")


def test_serialize():

test_items = [
DatasetItem("fake_url1", "fake_id1"),
DatasetItem(
"fake_url2",
"fake_id2",
metadata={
"ok": "field",
"bad": TestNonSerializableObject(),
},
),
]

with io.StringIO() as in_memory_filelike:
with pytest.raises(ValueError) as error:
utils.serialize_and_write(
test_items,
in_memory_filelike,
)
assert "DatasetItem" in str(error.value)
assert "fake_id2" in str(error.value)
assert "fake_id1" not in str(error.value)

test_items[1].metadata["bad"] = "fixed"

utils.serialize_and_write(test_items, in_memory_filelike)

0 comments on commit 40f9531

Please sign in to comment.