From 317d2bf408d38340036e1f3303c3437674f39d7e Mon Sep 17 00:00:00 2001 From: Jean Lucas Date: Wed, 1 Nov 2023 16:44:40 +0100 Subject: [PATCH] allow for private dataset creation (#403) --- CHANGELOG.md | 8 ++++++++ nucleus/__init__.py | 5 +++++ nucleus/constants.py | 1 + pyproject.toml | 2 +- 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fd0b5351..e58d4253 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.16.6](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.6) - 2023-11-01 + +### Added +- Allow datasets to be created in "privacy mode". For example, `client.create_dataset('name', use_privacy_mode=True)`. +- Privacy Mode lets customers use Nucleus without sensitive raw data ever leaving their servers. +- When set to `True`, you can submit URLs to Nucleus that link to raw data assets like images or point clouds, instead of transferring that data to Scale. Access control is then completely in the hands of users: URLs may optionally be protected behind your corporate VPN or an IP whitelist. When you load a Nucleus web page, your browser will directly fetch the raw data from your servers without it ever being accessible to Scale. + + ## [0.16.5](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.5) - 2023-10-30 ### Added diff --git a/nucleus/__init__.py b/nucleus/__init__.py index 5f876dad..d0386567 100644 --- a/nucleus/__init__.py +++ b/nucleus/__init__.py @@ -79,6 +79,7 @@ AUTOTAGS_KEY, DATASET_ID_KEY, DATASET_IS_SCENE_KEY, + DATASET_PRIVACY_MODE_KEY, DEFAULT_NETWORK_TIMEOUT_SEC, EMBEDDING_DIMENSION_KEY, EMBEDDINGS_URL_KEY, @@ -429,6 +430,7 @@ def create_dataset( self, name: str, is_scene: Optional[bool] = None, + use_privacy_mode: bool = False, item_metadata_schema: Optional[Dict] = None, annotation_metadata_schema: Optional[Dict] = None, ) -> Dataset: @@ -443,6 +445,8 @@ def create_dataset( is_scene: Whether the dataset contains strictly :class:`scenes ` or :class:`items `. This value is immutable. Default is False (dataset of items). + use_privacy_mode: Whether the images of this dataset should be uploaded to Scale. If set to True, + customer will have to adjust their file access policy with Scale. item_metadata_schema: Dict defining item-level metadata schema. See below. annotation_metadata_schema: Dict defining annotation-level metadata schema. @@ -473,6 +477,7 @@ def create_dataset( { NAME_KEY: name, DATASET_IS_SCENE_KEY: is_scene, + DATASET_PRIVACY_MODE_KEY: use_privacy_mode, ANNOTATION_METADATA_SCHEMA_KEY: annotation_metadata_schema, ITEM_METADATA_SCHEMA_KEY: item_metadata_schema, }, diff --git a/nucleus/constants.py b/nucleus/constants.py index 31d1e710..d6acc8f4 100644 --- a/nucleus/constants.py +++ b/nucleus/constants.py @@ -43,6 +43,7 @@ DATASET_LENGTH_KEY = "length" DATASET_MODEL_RUNS_KEY = "model_run_ids" DATASET_NAME_KEY = "name" +DATASET_PRIVACY_MODE_KEY = "use_privacy_mode" DATASET_SLICES_KEY = "slice_ids" DEFAULT_ANNOTATION_UPDATE_MODE = False DEFAULT_NETWORK_TIMEOUT_SEC = 120 diff --git a/pyproject.toml b/pyproject.toml index f183b9a8..09672787 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"] # Easy ignore for getting it running [tool.poetry] name = "scale-nucleus" -version = "0.16.5" +version = "0.16.6" description = "The official Python client library for Nucleus, the Data Platform for AI" license = "MIT" authors = ["Scale AI Nucleus Team "]