Skip to content

Commit

Permalink
paginate export methods (#341)
Browse files Browse the repository at this point in the history
* paginate

* docstring

* bump semver and changelog

* lint
  • Loading branch information
drakejwong authored Aug 6, 2022
1 parent e18f313 commit 0ddc3f5
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 14 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.14.11](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.12) - 2022-08-05

### Added
- Added auto-paginated `Slice.export_predictions_generator`
### Fixed
- Change `{Dataset,Slice}.items_and_annotation_generator` to work with improved paginate endpoint

## [0.14.11](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.11) - 2022-07-20

### Fixed
Expand Down
1 change: 1 addition & 0 deletions nucleus/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
AUTOTAG_SCORE_THRESHOLD = "score_threshold"
EXPORTED_ROWS = "exportedRows"
EXPORTED_SCALE_TASK_INFO_ROWS = "exportedScaleTaskInfoRows"
EXPORT_FOR_TRAINING_KEY = "data"
CAMERA_MODEL_KEY = "camera_model"
CAMERA_PARAMS_KEY = "camera_params"
CLASS_PDF_KEY = "class_pdf"
Expand Down
12 changes: 10 additions & 2 deletions nucleus/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
DEFAULT_ANNOTATION_UPDATE_MODE,
EMBEDDING_DIMENSION_KEY,
EMBEDDINGS_URL_KEY,
EXPORT_FOR_TRAINING_KEY,
EXPORTED_ROWS,
FRAME_RATE_KEY,
ITEMS_KEY,
Expand Down Expand Up @@ -1250,8 +1251,15 @@ def items_and_annotation_generator(
}
}]
"""
for item in self.items_generator():
yield self.refloc(reference_id=item.reference_id)
json_generator = paginate_generator(
client=self._client,
endpoint=f"dataset/{self.id}/exportForTrainingPage",
result_key=EXPORT_FOR_TRAINING_KEY,
page_size=100000,
)
for data in json_generator:
for ia in convert_export_payload([data], has_predictions=False):
yield ia

def export_embeddings(
self,
Expand Down
55 changes: 45 additions & 10 deletions nucleus/slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@
import requests

from nucleus.annotation import Annotation
from nucleus.constants import EXPORTED_ROWS, ITEMS_KEY
from nucleus.constants import EXPORT_FOR_TRAINING_KEY, EXPORTED_ROWS, ITEMS_KEY
from nucleus.dataset_item import DatasetItem
from nucleus.errors import NucleusAPIError
from nucleus.job import AsyncJob
from nucleus.utils import (
KeyErrorDict,
convert_export_payload,
format_dataset_item_response,
format_scale_task_info_response,
paginate_generator,
)
Expand Down Expand Up @@ -203,13 +202,15 @@ def items_and_annotation_generator(
}
}]
"""
for item in self.items_generator():
yield format_dataset_item_response(
self._client.dataitem_ref_id(
dataset_id=self.dataset_id,
reference_id=item.reference_id,
)
)
json_generator = paginate_generator(
client=self._client,
endpoint=f"slice/{self.id}/exportForTrainingPage",
result_key=EXPORT_FOR_TRAINING_KEY,
page_size=100000,
)
for data in json_generator:
for ia in convert_export_payload([data], has_predictions=False):
yield ia

def items_and_annotations(
self,
Expand Down Expand Up @@ -256,7 +257,7 @@ def export_predictions(
List[{
"item": DatasetItem,
"predicions": {
"predictions": {
"box": List[BoxAnnotation],
"polygon": List[PolygonAnnotation],
"cuboid": List[CuboidAnnotation],
Expand All @@ -272,6 +273,40 @@ def export_predictions(
)
return convert_export_payload(api_payload[EXPORTED_ROWS], True)

def export_predictions_generator(
self, model
) -> Iterable[Dict[str, Union[DatasetItem, Dict[str, List[Annotation]]]]]:
"""Provides a list of all DatasetItems and Predictions in the Slice for the given Model.
Parameters:
model (Model): the nucleus model objects representing the model for which to export predictions.
Returns:
Iterable where each element is a dict containing the DatasetItem
and all of its associated Predictions, grouped by type (e.g. box).
::
List[{
"item": DatasetItem,
"predictions": {
"box": List[BoxAnnotation],
"polygon": List[PolygonAnnotation],
"cuboid": List[CuboidAnnotation],
"segmentation": List[SegmentationAnnotation],
"category": List[CategoryAnnotation],
}
}]
"""
json_generator = paginate_generator(
client=self._client,
endpoint=f"slice/{self.id}/{model.id}/exportForTrainingPage",
result_key=EXPORT_FOR_TRAINING_KEY,
page_size=100000,
)
for data in json_generator:
for ip in convert_export_payload([data], has_predictions=True):
yield ip

def export_scale_task_info(self):
"""Fetches info for all linked Scale tasks of items/scenes in the slice.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ exclude = '''

[tool.poetry]
name = "scale-nucleus"
version = "0.14.11"
version = "0.14.12"
description = "The official Python client library for Nucleus, the Data Platform for AI"
license = "MIT"
authors = ["Scale AI Nucleus Team <[email protected]>"]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ def sort_labelmap(segmentation_annotation):
assert row[ITEM_KEY] == ds_items[0]
assert row[ANNOTATIONS_KEY][BOX_TYPE][0] == box_annotation
assert sort_labelmap(
row[ANNOTATIONS_KEY][SEGMENTATION_TYPE][0]
row[ANNOTATIONS_KEY][SEGMENTATION_TYPE]
) == sort_labelmap(clear_fields(segmentation_annotation))
assert row[ANNOTATIONS_KEY][POLYGON_TYPE][0] == polygon_annotation
assert row[ANNOTATIONS_KEY][CATEGORY_TYPE][0] == category_annotation
Expand Down

0 comments on commit 0ddc3f5

Please sign in to comment.