fix job.status() in tests and CircleCI caching (#347)

* fix test and circleci caching * fix page size * fix AsyncJob test * lint * changelog * fix job test
scaleapi · Aug 16, 2022 · 95681af · 95681af
1 parent 275c6a5
commit 95681af
Show file tree

Hide file tree

Showing 7 changed files with 36 additions and 9 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -23,15 +23,16 @@ jobs:
             apt-get -y install curl libgeos-dev
             pip install --upgrade pip
             pip install poetry
-      - python/install-packages:
-          include-python-in-cache-key: false
-          pkg-manager: poetry
+      - run:
+          name: Install Python Dependencies
+          command: |
+              poetry install
       - run:
           name: Test Imports (extras need to be guarded!)
           command: | # Make sure that importing works without extras installed
             poetry run python -c 'import nucleus'
       - run:
-          name: Install Extra Dependencies
+          name: Install Extra Python Dependencies
           command: | # install dependencies
             poetry install -E metrics -E launch
       - run:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,7 +5,15 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.14.16](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.15) - 2022-08-12
+## [0.14.17](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.17) - 2022-08-15
+
+### Fixed
+- Fix `AsyncJob` status payload keys causing test failures
+- Fix `AsyncJob` export test
+- Fix `page_size` for `{Dataset,Slice}.items_and_annotatation_generator()`
+- Change to simple dependency install step to fix CircleCI caching failures
+
+## [0.14.16](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.14.16) - 2022-08-12
 
 ### Added
 - Scene cateogorization support

diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -1256,7 +1256,7 @@ def items_and_annotation_generator(
             client=self._client,
             endpoint=f"dataset/{self.id}/exportForTrainingPage",
             result_key=EXPORT_FOR_TRAINING_KEY,
-            page_size=100000,
+            page_size=10000,  # max ES page size
         )
         for data in json_generator:
             for ia in convert_export_payload([data], has_predictions=False):

diff --git a/nucleus/slice.py b/nucleus/slice.py
@@ -246,7 +246,7 @@ def items_and_annotation_generator(
             client=self._client,
             endpoint=f"slice/{self.id}/exportForTrainingPage",
             result_key=EXPORT_FOR_TRAINING_KEY,
-            page_size=100000,
+            page_size=10000,  # max ES page size
         )
         for data in json_generator:
             for ia in convert_export_payload([data], has_predictions=False):

diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ exclude = '''
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.14.16"
+version = "0.14.17"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <[email protected]>"]

diff --git a/tests/test_jobs.py b/tests/test_jobs.py
@@ -23,7 +23,7 @@ def test_repr(test_object: any):
     )
 
 
-def test_job_creation_and_listing_and_retrieval(CLIENT):
+def test_job_listing_and_retrieval(CLIENT):
     jobs = CLIENT.list_jobs()
 
     if not jobs:

diff --git a/tests/test_scene.py b/tests/test_scene.py
@@ -420,6 +420,7 @@ def test_scene_upload_async(dataset_scene):
     job.sleep_until_complete()
     status = job.status()
 
+    del status["job_creation_time"]  # HACK: too flaky to try syncing
     assert status == {
         "job_id": job.job_id,
         "status": "Completed",
@@ -436,6 +437,8 @@ def test_scene_upload_async(dataset_scene):
         "job_progress": "1.00",
         "completed_steps": 1,
         "total_steps": 1,
+        "job_last_known_status": "Completed",
+        "job_type": "uploadLidarScene",
     }
 
     uploaded_scenes = dataset_scene.scenes
@@ -462,6 +465,7 @@ def test_scene_upload_and_update(dataset_scene):
     job.sleep_until_complete()
     status = job.status()
 
+    del status["job_creation_time"]  # HACK: too flaky to try syncing
     assert status == {
         "job_id": job.job_id,
         "status": "Completed",
@@ -478,6 +482,8 @@ def test_scene_upload_and_update(dataset_scene):
         "job_progress": "1.00",
         "completed_steps": 1,
         "total_steps": 1,
+        "job_last_known_status": "Completed",
+        "job_type": "uploadLidarScene",
     }
 
     uploaded_scenes = dataset_scene.scenes
@@ -495,6 +501,7 @@ def test_scene_upload_and_update(dataset_scene):
     job2.sleep_until_complete()
     status2 = job2.status()
 
+    del status2["job_creation_time"]  # HACK: too flaky to try syncing
     assert status2 == {
         "job_id": job2.job_id,
         "status": "Completed",
@@ -511,6 +518,8 @@ def test_scene_upload_and_update(dataset_scene):
         "job_progress": "1.00",
         "completed_steps": 1,
         "total_steps": 1,
+        "job_last_known_status": "Completed",
+        "job_type": "uploadLidarScene",
     }
 
 
@@ -585,6 +594,7 @@ def test_video_scene_upload_async(dataset_scene):
     job.sleep_until_complete()
     status = job.status()
 
+    del status["job_creation_time"]  # HACK: too flaky to try syncing
     assert status == {
         "job_id": job.job_id,
         "status": "Completed",
@@ -601,6 +611,8 @@ def test_video_scene_upload_async(dataset_scene):
         "job_progress": "1.00",
         "completed_steps": len(scenes),
         "total_steps": len(scenes),
+        "job_last_known_status": "Completed",
+        "job_type": "uploadVideoScene",
     }
 
     uploaded_scenes = dataset_scene.scenes
@@ -681,6 +693,7 @@ def test_video_scene_upload_and_update(dataset_scene):
     job.sleep_until_complete()
     status = job.status()
 
+    del status["job_creation_time"]  # HACK: too flaky to try syncing
     assert status == {
         "job_id": job.job_id,
         "status": "Completed",
@@ -697,6 +710,8 @@ def test_video_scene_upload_and_update(dataset_scene):
         "job_progress": "1.00",
         "completed_steps": len(scenes),
         "total_steps": len(scenes),
+        "job_last_known_status": "Completed",
+        "job_type": "uploadVideoScene",
     }
 
     uploaded_scenes = dataset_scene.scenes
@@ -715,6 +730,7 @@ def test_video_scene_upload_and_update(dataset_scene):
     job2.sleep_until_complete()
     status2 = job2.status()
 
+    del status2["job_creation_time"]  # HACK: too flaky to try syncing
     assert status2 == {
         "job_id": job2.job_id,
         "status": "Completed",
@@ -731,6 +747,8 @@ def test_video_scene_upload_and_update(dataset_scene):
         "job_progress": "1.00",
         "completed_steps": len(scenes),
         "total_steps": len(scenes),
+        "job_last_known_status": "Completed",
+        "job_type": "uploadVideoScene",
     }