From 01a4f959be15e7e11124b208069b38a0c57bccee Mon Sep 17 00:00:00 2001
From: Angela <angela@cleanlab.ai>
Date: Tue, 30 Jul 2024 16:54:57 -0700
Subject: [PATCH 1/6] cleanlab studio beta api

---
 cleanlab_studio/errors.py                   |   8 +
 cleanlab_studio/internal/api/api.py         | 118 ++++++---------
 cleanlab_studio/internal/api/api_helper.py  |  52 ++++++-
 cleanlab_studio/internal/api/beta_api.py    |  95 ++++++++++++
 cleanlab_studio/internal/studio_base.py     |  37 +++++
 cleanlab_studio/internal/upload_helpers.py  |  10 +-
 cleanlab_studio/studio/studio.py            |  51 ++-----
 cleanlab_studio/studio_beta/__init__.py     |   1 +
 cleanlab_studio/studio_beta/beta_dataset.py |  68 +++++++++
 cleanlab_studio/studio_beta/beta_job.py     | 157 ++++++++++++++++++++
 cleanlab_studio/studio_beta/studio_beta.py  |  64 ++++++++
 11 files changed, 545 insertions(+), 116 deletions(-)
 create mode 100644 cleanlab_studio/internal/api/beta_api.py
 create mode 100644 cleanlab_studio/internal/studio_base.py
 create mode 100644 cleanlab_studio/studio_beta/__init__.py
 create mode 100644 cleanlab_studio/studio_beta/beta_dataset.py
 create mode 100644 cleanlab_studio/studio_beta/beta_job.py
 create mode 100644 cleanlab_studio/studio_beta/studio_beta.py

diff --git a/cleanlab_studio/errors.py b/cleanlab_studio/errors.py
index c7829ecf..42d8aaeb 100644
--- a/cleanlab_studio/errors.py
+++ b/cleanlab_studio/errors.py
@@ -152,3 +152,11 @@ def __init__(self, filepath: Union[str, pathlib.Path] = "") -> None:
         if isinstance(filepath, pathlib.Path):
             filepath = str(filepath)
         super().__init__(f"File could not be found at {filepath}. Please check the file path.")
+
+
+class BetaJobError(HandledError):
+    pass
+
+
+class DownloadResultsError(HandledError):
+    pass
diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 9e749205..24011763 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -40,52 +40,22 @@
     pyspark_exists = False
 
 from cleanlab_studio.errors import NotInstalledError
-from cleanlab_studio.internal.api.api_helper import check_uuid_well_formed
+from cleanlab_studio.internal.api.api_helper import (
+    check_uuid_well_formed,
+    construct_headers,
+    handle_api_error,
+)
 from cleanlab_studio.internal.types import JSONDict, SchemaOverride
 from cleanlab_studio.version import __version__
 
-base_url = os.environ.get("CLEANLAB_API_BASE_URL", "https://api.cleanlab.ai/api")
-cli_base_url = f"{base_url}/cli/v0"
-upload_base_url = f"{base_url}/upload/v1"
-dataset_base_url = f"{base_url}/datasets"
-project_base_url = f"{base_url}/projects"
-cleanset_base_url = f"{base_url}/cleansets"
-model_base_url = f"{base_url}/v1/deployment"
-tlm_base_url = f"{base_url}/v0/trustworthy_llm"
-
-
-def _construct_headers(
-    api_key: Optional[str], content_type: Optional[str] = "application/json"
-) -> JSONDict:
-    retval = dict()
-    if api_key:
-        retval["Authorization"] = f"bearer {api_key}"
-    if content_type:
-        retval["Content-Type"] = content_type
-    retval["Client-Type"] = "python-api"
-    return retval
-
-
-def handle_api_error(res: requests.Response) -> None:
-    handle_api_error_from_json(res.json(), res.status_code)
-
-
-def handle_api_error_from_json(res_json: JSONDict, status_code: Optional[int] = None) -> None:
-    if "code" in res_json and "description" in res_json:  # AuthError or UserQuotaError format
-        if res_json["code"] == "user_soft_quota_exceeded":
-            pass  # soft quota limit is going away soon, so ignore it
-        else:
-            raise APIError(res_json["description"])
-
-    if res_json.get("error", None) is not None:
-        error = res_json["error"]
-        if (
-            status_code == 422
-            and isinstance(error, dict)
-            and error.get("code", None) == "UNSUPPORTED_PROJECT_CONFIGURATION"
-        ):
-            raise InvalidProjectConfiguration(error["description"])
-        raise APIError(res_json["error"])
+API_BASE_URL = os.environ.get("CLEANLAB_API_BASE_URL", "https://api.cleanlab.ai/api")
+cli_base_url = f"{API_BASE_URL}/cli/v0"
+upload_base_url = f"{API_BASE_URL}/upload/v1"
+dataset_base_url = f"{API_BASE_URL}/datasets"
+project_base_url = f"{API_BASE_URL}/projects"
+cleanset_base_url = f"{API_BASE_URL}/cleansets"
+model_base_url = f"{API_BASE_URL}/v1/deployment"
+tlm_base_url = f"{API_BASE_URL}/v0/trustworthy_llm"
 
 
 def handle_rate_limit_error_from_resp(resp: aiohttp.ClientResponse) -> None:
@@ -134,7 +104,7 @@ def validate_api_key(api_key: str) -> bool:
     res = requests.get(
         cli_base_url + "/validate",
         json=dict(api_key=api_key),
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     valid: bool = res.json()["valid"]
@@ -154,7 +124,7 @@ def initialize_upload(
     res = requests.post(
         f"{upload_base_url}/file/initialize",
         json=dict(size_in_bytes=str(file_size), filename=filename, file_type=file_type),
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     upload_id: str = res.json()["upload_id"]
@@ -169,7 +139,7 @@ def complete_file_upload(api_key: str, upload_id: str, upload_parts: List[JSONDi
     res = requests.post(
         f"{upload_base_url}/file/complete",
         json=request_json,
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
 
@@ -184,7 +154,7 @@ def confirm_upload(
     res = requests.post(
         f"{upload_base_url}/confirm",
         json=request_json,
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
 
@@ -199,7 +169,7 @@ def update_schema(
     res = requests.patch(
         f"{upload_base_url}/schema",
         json=request_json,
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
 
@@ -209,7 +179,7 @@ def get_ingestion_status(api_key: str, upload_id: str) -> JSONDict:
     res = requests.get(
         f"{upload_base_url}/total_progress",
         params=dict(upload_id=upload_id),
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     res_json: JSONDict = res.json()
@@ -221,7 +191,7 @@ def get_dataset_id(api_key: str, upload_id: str) -> JSONDict:
     res = requests.get(
         f"{upload_base_url}/dataset_id",
         params=dict(upload_id=upload_id),
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     res_json: JSONDict = res.json()
@@ -232,7 +202,7 @@ def get_project_of_cleanset(api_key: str, cleanset_id: str) -> str:
     check_uuid_well_formed(cleanset_id, "cleanset ID")
     res = requests.get(
         cli_base_url + f"/cleansets/{cleanset_id}/project",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     project_id: str = res.json()["project_id"]
@@ -243,7 +213,7 @@ def get_label_column_of_project(api_key: str, project_id: str) -> str:
     check_uuid_well_formed(project_id, "project ID")
     res = requests.get(
         cli_base_url + f"/projects/{project_id}/label_column",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     label_column: str = res.json()["label_column"]
@@ -274,7 +244,7 @@ def download_cleanlab_columns(
             include_cleanlab_columns=include_cleanlab_columns,
             include_project_details=include_project_details,
         ),
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     id_col = get_id_column(api_key, cleanset_id)
@@ -306,7 +276,7 @@ def download_array(
     check_uuid_well_formed(cleanset_id, "cleanset ID")
     res = requests.get(
         cli_base_url + f"/cleansets/{cleanset_id}/{name}",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     res_json: JSONDict = res.json()
@@ -323,7 +293,7 @@ def get_id_column(api_key: str, cleanset_id: str) -> str:
     check_uuid_well_formed(cleanset_id, "cleanset ID")
     res = requests.get(
         cli_base_url + f"/cleansets/{cleanset_id}/id_column",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     id_column: str = res.json()["id_column"]
@@ -334,7 +304,7 @@ def get_dataset_of_project(api_key: str, project_id: str) -> str:
     check_uuid_well_formed(project_id, "project ID")
     res = requests.get(
         cli_base_url + f"/projects/{project_id}/dataset",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     dataset_id: str = res.json()["dataset_id"]
@@ -345,7 +315,7 @@ def get_dataset_schema(api_key: str, dataset_id: str) -> JSONDict:
     check_uuid_well_formed(dataset_id, "dataset ID")
     res = requests.get(
         cli_base_url + f"/datasets/{dataset_id}/schema",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     schema: JSONDict = res.json()["schema"]
@@ -357,7 +327,7 @@ def get_dataset_details(api_key: str, dataset_id: str, task_type: Optional[str])
     res = requests.get(
         project_base_url + f"/dataset_details/{dataset_id}",
         params=dict(tasktype=task_type),
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     dataset_details: JSONDict = res.json()
@@ -368,7 +338,7 @@ def check_column_diversity(api_key: str, dataset_id: str, column_name: str) -> J
     check_uuid_well_formed(dataset_id, "dataset ID")
     res = requests.get(
         dataset_base_url + f"/diversity/{dataset_id}/{column_name}",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     column_diversity: JSONDict = res.json()
@@ -379,7 +349,7 @@ def is_valid_multilabel_column(api_key: str, dataset_id: str, column_name: str)
     check_uuid_well_formed(dataset_id, "dataset ID")
     res = requests.get(
         dataset_base_url + f"/check_valid_multilabel/{dataset_id}/{column_name}",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     multilabel_column: JSONDict = res.json()
@@ -410,7 +380,7 @@ def clean_dataset(
     )
     res = requests.post(
         project_base_url + f"/clean",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
         json=request_json,
     )
     handle_api_error(res)
@@ -422,7 +392,7 @@ def get_latest_cleanset_id(api_key: str, project_id: str) -> str:
     check_uuid_well_formed(project_id, "project ID")
     res = requests.get(
         cleanset_base_url + f"/project/{project_id}/latest_cleanset_id",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     cleanset_id = res.json()["cleanset_id"]
@@ -448,7 +418,7 @@ def get_dataset_id_for_name(
     res = requests.get(
         dataset_base_url + f"/dataset_id_for_name",
         params=dict(dataset_name=dataset_name),
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     return cast(Optional[str], res.json().get("dataset_id", None))
@@ -458,7 +428,7 @@ def get_cleanset_status(api_key: str, cleanset_id: str) -> JSONDict:
     check_uuid_well_formed(cleanset_id, "cleanset ID")
     res = requests.get(
         cleanset_base_url + f"/{cleanset_id}/status",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     status: JSONDict = res.json()
@@ -467,13 +437,13 @@ def get_cleanset_status(api_key: str, cleanset_id: str) -> JSONDict:
 
 def delete_dataset(api_key: str, dataset_id: str) -> None:
     check_uuid_well_formed(dataset_id, "dataset ID")
-    res = requests.delete(dataset_base_url + f"/{dataset_id}", headers=_construct_headers(api_key))
+    res = requests.delete(dataset_base_url + f"/{dataset_id}", headers=construct_headers(api_key))
     handle_api_error(res)
 
 
 def delete_project(api_key: str, project_id: str) -> None:
     check_uuid_well_formed(project_id, "project ID")
-    res = requests.delete(project_base_url + f"/{project_id}", headers=_construct_headers(api_key))
+    res = requests.delete(project_base_url + f"/{project_id}", headers=construct_headers(api_key))
     handle_api_error(res)
 
 
@@ -528,7 +498,7 @@ def deploy_model(api_key: str, cleanset_id: str, model_name: str) -> str:
     check_uuid_well_formed(cleanset_id, "cleanset ID")
     res = requests.post(
         model_base_url,
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
         json=dict(cleanset_id=cleanset_id, deployment_name=model_name),
     )
 
@@ -542,7 +512,7 @@ def get_deployment_status(api_key: str, model_id: str) -> str:
     check_uuid_well_formed(model_id, "model ID")
     res = requests.get(
         f"{model_base_url}/{model_id}",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
     deployment: JSONDict = res.json()
@@ -555,7 +525,7 @@ def upload_predict_batch(api_key: str, model_id: str, batch: io.StringIO) -> str
     url = f"{model_base_url}/{model_id}/upload"
     res = requests.post(
         url,
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
 
     handle_api_error(res)
@@ -573,7 +543,7 @@ def start_prediction(api_key: str, model_id: str, query_id: str) -> None:
     check_uuid_well_formed(query_id, "query ID")
     res = requests.post(
         f"{model_base_url}/{model_id}/predict/{query_id}",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
 
     handle_api_error(res)
@@ -584,7 +554,7 @@ def get_prediction_status(api_key: str, query_id: str) -> Dict[str, str]:
     check_uuid_well_formed(query_id, "query ID")
     res = requests.get(
         f"{model_base_url}/predict/{query_id}",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
 
@@ -596,7 +566,7 @@ def get_deployed_model_info(api_key: str, model_id: str) -> Dict[str, str]:
     check_uuid_well_formed(model_id, "model ID")
     res = requests.get(
         f"{model_base_url}/{model_id}",
-        headers=_construct_headers(api_key),
+        headers=construct_headers(api_key),
     )
     handle_api_error(res)
 
@@ -672,7 +642,7 @@ async def tlm_prompt(
             res = await client_session.post(
                 f"{tlm_base_url}/prompt",
                 json=dict(prompt=prompt, quality=quality_preset, options=options or {}),
-                headers=_construct_headers(api_key),
+                headers=construct_headers(api_key),
             )
 
             res_json = await res.json()
@@ -733,7 +703,7 @@ async def tlm_get_confidence_score(
                     quality=quality_preset,
                     options=options or {},
                 ),
-                headers=_construct_headers(api_key),
+                headers=construct_headers(api_key),
             )
 
             res_json = await res.json()
diff --git a/cleanlab_studio/internal/api/api_helper.py b/cleanlab_studio/internal/api/api_helper.py
index a531e5e2..2864657f 100644
--- a/cleanlab_studio/internal/api/api_helper.py
+++ b/cleanlab_studio/internal/api/api_helper.py
@@ -1,6 +1,22 @@
 import uuid
+from typing import List, Optional, TypedDict
 
-from cleanlab_studio.errors import InvalidUUIDError
+import requests
+
+from cleanlab_studio.errors import (
+    APIError,
+    InvalidProjectConfiguration,
+    InvalidUUIDError,
+)
+from cleanlab_studio.internal.types import JSONDict
+
+
+class UploadPart(TypedDict):
+    ETag: str
+    PartNumber: int
+
+
+UploadParts = List[UploadPart]
 
 
 def check_uuid_well_formed(uuid_string: str, id_name: str) -> None:
@@ -10,3 +26,37 @@ def check_uuid_well_formed(uuid_string: str, id_name: str) -> None:
         raise InvalidUUIDError(
             f"{uuid_string} is not a well-formed {id_name}, please double check and try again."
         )
+
+
+def construct_headers(
+    api_key: Optional[str], content_type: Optional[str] = "application/json"
+) -> JSONDict:
+    retval = dict()
+    if api_key:
+        retval["Authorization"] = f"bearer {api_key}"
+    if content_type:
+        retval["Content-Type"] = content_type
+    retval["Client-Type"] = "python-api"
+    return retval
+
+
+def handle_api_error(res: requests.Response) -> None:
+    handle_api_error_from_json(res.json(), res.status_code)
+
+
+def handle_api_error_from_json(res_json: JSONDict, status_code: Optional[int] = None) -> None:
+    if "code" in res_json and "description" in res_json:  # AuthError or UserQuotaError format
+        if res_json["code"] == "user_soft_quota_exceeded":
+            pass  # soft quota limit is going away soon, so ignore it
+        else:
+            raise APIError(res_json["description"])
+
+    if res_json.get("error", None) is not None:
+        error = res_json["error"]
+        if (
+            status_code == 422
+            and isinstance(error, dict)
+            and error.get("code", None) == "UNSUPPORTED_PROJECT_CONFIGURATION"
+        ):
+            raise InvalidProjectConfiguration(error["description"])
+        raise APIError(res_json["error"])
diff --git a/cleanlab_studio/internal/api/beta_api.py b/cleanlab_studio/internal/api/beta_api.py
new file mode 100644
index 00000000..311ac9be
--- /dev/null
+++ b/cleanlab_studio/internal/api/beta_api.py
@@ -0,0 +1,95 @@
+from typing import Any, Dict, List
+
+import requests
+
+from .api import API_BASE_URL, construct_headers
+from .api_helper import JSONDict, UploadParts, handle_api_error
+
+experimental_jobs_base_url = f"{API_BASE_URL}/v0/experimental_jobs"
+
+
+def initialize_upload(
+    api_key: str, filename: str, file_type: str, file_size: int
+) -> Dict[str, Any]:
+    url = f"{experimental_jobs_base_url}/upload/initialize"
+    headers = construct_headers(api_key)
+    data = {
+        "filename": filename,
+        "file_type": file_type,
+        "size_in_bytes": file_size,
+    }
+    resp = requests.post(url, headers=headers, json=data)
+    resp.raise_for_status()
+    return resp.json()
+
+
+def complete_upload(api_key: str, dataset_id: str, upload_parts: UploadParts) -> JSONDict:
+    url = f"{experimental_jobs_base_url}/upload/complete"
+    headers = construct_headers(api_key)
+    data = {
+        "dataset_id": dataset_id,
+        "upload_parts": upload_parts,
+    }
+    resp = requests.post(url, headers=headers, json=data)
+    handle_api_error(resp)
+    return resp.json()
+
+
+def get_dataset(api_key: str, dataset_id: str) -> JSONDict:
+    url = f"{experimental_jobs_base_url}/datasets/{dataset_id}"
+    headers = construct_headers(api_key)
+    resp = requests.get(url, headers=headers)
+    handle_api_error(resp)
+    return resp.json()
+
+
+def run_job(api_key: str, dataset_id: str, job_definition_name: str) -> JSONDict:
+    url = f"{experimental_jobs_base_url}/run"
+    headers = construct_headers(api_key)
+    data = {
+        "dataset_id": dataset_id,
+        "job_definition_name": job_definition_name,
+    }
+    resp = requests.post(url, headers=headers, json=data)
+    handle_api_error(resp)
+    return resp.json()
+
+
+def get_job(api_key: str, job_id: str) -> JSONDict:
+    url = f"{experimental_jobs_base_url}/{job_id}"
+    headers = construct_headers(api_key)
+    resp = requests.get(url, headers=headers)
+    handle_api_error(resp)
+    return resp.json()
+
+
+def get_job_status(api_key: str, job_id: str) -> JSONDict:
+    url = f"{experimental_jobs_base_url}/{job_id}/status"
+    headers = construct_headers(api_key)
+    resp = requests.get(url, headers=headers)
+    resp.raise_for_status()
+    return resp.json()
+
+
+def get_results(api_key: str, job_id: str) -> JSONDict:
+    url = f"{experimental_jobs_base_url}/{job_id}/results"
+    headers = construct_headers(api_key)
+    resp = requests.get(url, headers=headers)
+    resp.raise_for_status()
+    return resp.json()
+
+
+def list_datasets(api_key: str) -> List[JSONDict]:
+    url = f"{experimental_jobs_base_url}/datasets"
+    headers = construct_headers(api_key)
+    resp = requests.get(url, headers=headers)
+    handle_api_error(resp)
+    return resp.json()["datasets"]
+
+
+def list_jobs(api_key: str) -> List[JSONDict]:
+    url = f"{experimental_jobs_base_url}/jobs"
+    headers = construct_headers(api_key)
+    resp = requests.get(url, headers=headers)
+    handle_api_error(resp)
+    return resp.json()["jobs"]
diff --git a/cleanlab_studio/internal/studio_base.py b/cleanlab_studio/internal/studio_base.py
new file mode 100644
index 00000000..2d6c6d2b
--- /dev/null
+++ b/cleanlab_studio/internal/studio_base.py
@@ -0,0 +1,37 @@
+from aiohttp_retry import Optional
+
+from cleanlab_studio.errors import MissingAPIKeyError, VersionError
+from cleanlab_studio.internal.api import api
+from cleanlab_studio.internal.settings import CleanlabSettings
+
+
+class StudioBase:
+    _api_key: str
+
+    def __init__(self, api_key: Optional[str]):
+        """
+        Creates a Cleanlab Studio client.
+
+        Args:
+            api_key: You can find your API key on your [account page](https://app.cleanlab.ai/account) in Cleanlab Studio. Instead of specifying the API key here, you can also log in with `cleanlab login` on the command-line.
+
+        """
+        if not api.is_valid_client_version():
+            raise VersionError(
+                "CLI is out of date and must be updated. Run 'pip install --upgrade cleanlab-studio'."
+            )
+        if api_key is None:
+            try:
+                api_key = CleanlabSettings.load().api_key
+                if api_key is None:
+                    raise ValueError
+            except (FileNotFoundError, KeyError, ValueError):
+                raise MissingAPIKeyError(
+                    "No API key found; either specify API key or log in with 'cleanlab login' first"
+                )
+        if not api.validate_api_key(api_key):
+            raise ValueError(
+                f"Invalid API key, please check if it is properly specified: {api_key}"
+            )
+
+        self._api_key = api_key
diff --git a/cleanlab_studio/internal/upload_helpers.py b/cleanlab_studio/internal/upload_helpers.py
index 2a70190d..5cade0c4 100644
--- a/cleanlab_studio/internal/upload_helpers.py
+++ b/cleanlab_studio/internal/upload_helpers.py
@@ -2,17 +2,19 @@
 import functools
 import json
 from typing import Any, Dict, List, Optional
-from tqdm import tqdm
 
 import aiohttp
-from multidict import CIMultiDictProxy
 import requests
+from multidict import CIMultiDictProxy
 from requests.adapters import HTTPAdapter, Retry
+from tqdm import tqdm
+
+from cleanlab_studio.errors import InvalidSchemaTypeError
 
 from .api import api
+from .api.api_helper import UploadParts
 from .dataset_source import DatasetSource
 from .types import JSONDict, SchemaOverride
-from cleanlab_studio.errors import InvalidSchemaTypeError
 
 
 def upload_dataset(
@@ -64,7 +66,7 @@ async def upload_file_parts_async(
 
 def upload_file_parts(
     dataset_source: DatasetSource, part_sizes: List[int], presigned_posts: List[str]
-) -> List[JSONDict]:
+) -> UploadParts:
     session = requests.Session()
     session.mount("https://", adapter=HTTPAdapter(max_retries=Retry(total=3, backoff_factor=1)))
 
diff --git a/cleanlab_studio/studio/studio.py b/cleanlab_studio/studio/studio.py
index 6f41dd7f..788bfeec 100644
--- a/cleanlab_studio/studio/studio.py
+++ b/cleanlab_studio/studio/studio.py
@@ -2,30 +2,29 @@
 Python API for Cleanlab Studio.
 """
 
-from typing import Any, List, Literal, Optional, Union
-from types import FunctionType
 import warnings
+from types import FunctionType
+from typing import Any, List, Literal, Optional, Union
 
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
 
-from . import inference
-from . import trustworthy_language_model
-from cleanlab_studio.utils import tlm_hybrid
-from cleanlab_studio.errors import CleansetError
+from cleanlab_studio.errors import CleansetError, InvalidDatasetError
 from cleanlab_studio.internal import clean_helpers, deploy_helpers, upload_helpers
 from cleanlab_studio.internal.api import api
+from cleanlab_studio.internal.studio_base import StudioBase
+from cleanlab_studio.internal.types import SchemaOverride, TLMQualityPreset
 from cleanlab_studio.internal.util import (
-    init_dataset_source,
-    telemetry,
+    apply_corrections_pd_df,
     apply_corrections_snowpark_df,
     apply_corrections_spark_df,
-    apply_corrections_pd_df,
+    init_dataset_source,
+    telemetry,
 )
-from cleanlab_studio.internal.settings import CleanlabSettings
-from cleanlab_studio.internal.types import SchemaOverride, TLMQualityPreset
-from cleanlab_studio.errors import VersionError, MissingAPIKeyError, InvalidDatasetError
+from cleanlab_studio.utils import tlm_hybrid
+
+from . import inference, trustworthy_language_model
 
 _snowflake_exists = api.snowflake_exists
 if _snowflake_exists:
@@ -36,36 +35,14 @@
     import pyspark.sql
 
 
-class Studio:
-    _api_key: str
-
-    def __init__(self, api_key: Optional[str]):
+class Studio(StudioBase):
+    def __init__(self, api_key: str) -> None:
         """
         Creates a Cleanlab Studio client.
-
         Args:
             api_key: You can find your API key on your [account page](https://app.cleanlab.ai/account) in Cleanlab Studio. Instead of specifying the API key here, you can also log in with `cleanlab login` on the command-line.
-
         """
-        if not api.is_valid_client_version():
-            raise VersionError(
-                "CLI is out of date and must be updated. Run 'pip install --upgrade cleanlab-studio'."
-            )
-        if api_key is None:
-            try:
-                api_key = CleanlabSettings.load().api_key
-                if api_key is None:
-                    raise ValueError
-            except (FileNotFoundError, KeyError, ValueError):
-                raise MissingAPIKeyError(
-                    "No API key found; either specify API key or log in with 'cleanlab login' first"
-                )
-        if not api.validate_api_key(api_key):
-            raise ValueError(
-                f"Invalid API key, please check if it is properly specified: {api_key}"
-            )
-
-        self._api_key = api_key
+        super().__init__(api_key)
 
     def upload_dataset(
         self,
diff --git a/cleanlab_studio/studio_beta/__init__.py b/cleanlab_studio/studio_beta/__init__.py
new file mode 100644
index 00000000..18721a11
--- /dev/null
+++ b/cleanlab_studio/studio_beta/__init__.py
@@ -0,0 +1 @@
+from cleanlab_studio.studio_beta.studio_beta import StudioBeta as StudioBeta
diff --git a/cleanlab_studio/studio_beta/beta_dataset.py b/cleanlab_studio/studio_beta/beta_dataset.py
new file mode 100644
index 00000000..34d3c393
--- /dev/null
+++ b/cleanlab_studio/studio_beta/beta_dataset.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+import pathlib
+from dataclasses import dataclass
+from typing import List
+
+from cleanlab_studio.internal.api.beta_api import (
+    complete_upload,
+    get_dataset,
+    initialize_upload,
+    list_datasets,
+)
+from cleanlab_studio.internal.dataset_source import FilepathDatasetSource
+from cleanlab_studio.internal.upload_helpers import upload_file_parts
+
+
+@dataclass
+class BetaDataset:
+    id: str
+    filename: str
+    upload_complete: bool
+    upload_date: int
+
+    @classmethod
+    def from_id(cls, api_key: str, dataset_id: str) -> "BetaDataset":
+        dataset = get_dataset(api_key, dataset_id)
+        return cls(
+            id=dataset_id,
+            filename=dataset["filename"],
+            upload_complete=dataset["complete"],
+            upload_date=dataset["upload_date"],
+        )
+
+    @classmethod
+    def from_filepath(cls, api_key: str, filepath: str) -> "BetaDataset":
+        dataset_source = FilepathDatasetSource(filepath=pathlib.Path(filepath))
+        initialize_response = initialize_upload(
+            api_key,
+            dataset_source.get_filename(),
+            dataset_source.file_type,
+            dataset_source.file_size,
+        )
+        dataset_id = initialize_response["id"]
+        part_sizes = initialize_response["part_sizes"]
+        presigned_posts = initialize_response["presigned_posts"]
+
+        # TODO: upload file parts
+        upload_parts = upload_file_parts(dataset_source, part_sizes, presigned_posts)
+        dataset = complete_upload(api_key, dataset_id, upload_parts)
+        return cls(
+            id=dataset_id,
+            filename=dataset["filename"],
+            upload_complete=dataset["complete"],
+            upload_date=dataset["upload_date"],
+        )
+
+    @classmethod
+    def list(cls, api_key: str) -> List[BetaDataset]:
+        datasets = list_datasets(api_key)
+        return [
+            cls(
+                id=dataset["id"],
+                filename=dataset["filename"],
+                upload_complete=dataset["complete"],
+                upload_date=dataset["upload_date"],
+            )
+            for dataset in datasets
+        ]
diff --git a/cleanlab_studio/studio_beta/beta_job.py b/cleanlab_studio/studio_beta/beta_job.py
new file mode 100644
index 00000000..3a271dad
--- /dev/null
+++ b/cleanlab_studio/studio_beta/beta_job.py
@@ -0,0 +1,157 @@
+from __future__ import annotations
+
+import enum
+import itertools
+import pathlib
+import time
+from dataclasses import dataclass
+from typing import Optional
+
+import requests
+from tqdm import tqdm
+
+from cleanlab_studio.errors import BetaJobError, DownloadResultsError
+from cleanlab_studio.internal.api.beta_api import (
+    get_job,
+    get_job_status,
+    get_results,
+    list_jobs,
+    run_job,
+)
+
+
+class JobStatus(enum.Enum):
+    CREATED = 0
+    RUNNING = 1
+    READY = 2
+    FAILED = -1
+
+    @classmethod
+    def from_name(cls, name: str) -> "JobStatus":
+        return cls[name.upper()]
+
+
+@dataclass
+class BetaJob:
+    id: str
+    status: JobStatus
+    dataset_id: str
+    job_definition_name: str
+    created_at: int
+    _api_key: str
+
+    @classmethod
+    def from_id(cls, api_key: str, job_id: str) -> "BetaJob":
+        """Loads an existing job by ID.
+        Args:
+            api_key: Your API key.
+            job_id: The ID of the job to load.
+        Returns:
+            The job object.
+        """
+        job_resp = get_job(api_key, job_id)
+        job = cls(
+            _api_key=api_key,
+            id=job_resp["id"],
+            dataset_id=job_resp["dataset_id"],
+            job_definition_name=job_resp["job_definition_name"],
+            status=JobStatus.from_name(job_resp["status"]),
+            created_at=job_resp["created_at"],
+        )
+        return job
+
+    @classmethod
+    def run(cls, api_key: str, dataset_id: str, job_definition_name: str) -> "BetaJob":
+        """Creates and runs a new job with the given dataset and job definition.
+
+        Args:
+            api_key: Your API key.
+            dataset_id: The ID of the dataset to run the job on.
+            job_definition_name: The name of the job definition to run.
+        """
+        job_resp = run_job(api_key, dataset_id, job_definition_name)
+        job = cls(
+            _api_key=api_key,
+            id=job_resp["id"],
+            dataset_id=dataset_id,
+            job_definition_name=job_definition_name,
+            status=JobStatus.from_name(job_resp["status"]),
+            created_at=job_resp["created_at"],
+        )
+        return job
+
+    def wait_until_ready(self, timeout: Optional[int] = None) -> None:
+        """Blocks until a job is ready or the timeout is reached.
+
+        Args:
+            timeout (Optional[float], optional): timeout for polling, in seconds. Defaults to None.
+
+        Raises:
+            TimeoutError: if job is not ready by end of timeout
+            BetaJobError: if job fails
+        """
+        start_time = time.time()
+        res = get_job_status(self._api_key, self.id)
+        self.status = JobStatus.from_name(res["status"])
+        spinner = itertools.cycle("|/-\\")
+
+        with tqdm(
+            total=JobStatus.READY.value,
+            desc="Job Progress: \\",
+            bar_format="{desc} {postfix}",
+        ) as pbar:
+            while self.status != JobStatus.READY and self.status != JobStatus.FAILED:
+                pbar.set_postfix_str(self.status.name.capitalize())
+                pbar.update(int(self.status.value) - pbar.n)
+
+                if timeout is not None and time.time() - start_time > timeout:
+                    raise TimeoutError("Result not ready before timeout")
+
+                for _ in range(50):
+                    time.sleep(0.1)
+                    pbar.set_description_str(f"Job Progress: {next(spinner)}")
+
+                res = get_job_status(self._api_key, self.id)
+                self.status = JobStatus.from_name(res["status"])
+
+            if self.status == JobStatus.READY:
+                pbar.update(pbar.total - pbar.n)
+                pbar.set_postfix_str(self.status.name.capitalize())
+                return
+
+            if self.status == JobStatus.FAILED:
+                pbar.set_postfix_str(self.status.name.capitalize())
+                raise BetaJobError(f"Experimental job {self.id} failed to complete")
+
+    def download_results(self, output_filepath: str) -> None:
+        output_path = pathlib.Path(output_filepath)
+        if self.status != JobStatus.READY:
+            raise BetaJobError("Job must be ready to download results")
+
+        if self.status == JobStatus.FAILED:
+            raise BetaJobError("Job failed, cannot download results")
+
+        results = get_results(self._api_key, self.id)
+        if output_path.suffix != results["result_file_type"]:
+            raise DownloadResultsError(
+                f"Output file extension does not match result file type {results['result_file_type']}"
+            )
+
+        resp = requests.get(results["result_url"])
+        resp.raise_for_status()
+        output_path.write_bytes(resp.content)
+
+    @classmethod
+    def list(cls, api_key: str) -> None:
+        jobs = list_jobs(api_key)
+        return [
+            cls(
+                _api_key=api_key,
+                id=job["id"],
+                dataset_id=job["dataset_id"],
+                job_definition_name=job["job_definition_name"],
+                status=JobStatus.from_name(job["status"]),
+                created_at=job["created_at"],
+            )
+            for job in jobs
+        ]
diff --git a/cleanlab_studio/studio_beta/studio_beta.py b/cleanlab_studio/studio_beta/studio_beta.py
new file mode 100644
index 00000000..85db729a
--- /dev/null
+++ b/cleanlab_studio/studio_beta/studio_beta.py
@@ -0,0 +1,64 @@
+from typing import List
+
+from cleanlab_studio.internal.studio_base import StudioBase
+from cleanlab_studio.studio_beta.beta_dataset import BetaDataset
+from cleanlab_studio.studio_beta.beta_job import BetaJob
+
+
+class StudioBeta(StudioBase):
+    def __init__(self, api_key: str):
+        """
+        Creates a client to interact with the Cleanlab Studio Beta API.
+        Args:
+            api_key: You can find your API key on your [account page](https://app.cleanlab.ai/account) in Cleanlab Studio. Instead of specifying the API key here, you can also log in with `cleanlab login` on the command-line.
+        """
+        super().__init__(api_key)
+
+    def upload_dataset(
+        self,
+        filepath: str,
+    ) -> BetaDataset:
+        """Uploads a dataset from the given filepath for use in the Cleanlab Studio Beta API.
+        Args:
+            filepath: The path to the dataset file.
+
+        Returns:
+            The uploaded dataset object. You can use this object to obtain the ID of the uploaded dataset.
+            Use this ID to run jobs on the dataset.
+        """
+        return BetaDataset.from_filepath(self._api_key, filepath)
+
+    def run_job(self, dataset_id: str, job_definition_name: str) -> BetaJob:
+        """Runs a Cleanlab Studio Beta job with the given dataset and job definition.
+        Args:
+            dataset_id: The ID of the dataset to run the job on.
+            job_definition_name: The name of the job definition to run.
+
+        Returns:
+            The object representing the job. You can use this object to check the status of the job and download the results.
+        """
+        return BetaJob.run(self._api_key, dataset_id, job_definition_name)
+
+    def download_results(self, job_id: str, output_filename: str) -> None:
+        """Downloads the results of an experimental job to the given output filename.
+        Args:
+            job_id: The ID of the job to download the results for.
+            output_filename: The path to save the downloaded results to.
+        """
+        BetaJob.from_id(self._api_key, job_id).download_results(output_filename)
+
+    def list_datasets(self) -> List[BetaDataset]:
+        """Lists all datasets you have uploaded through the Beta API.
+
+        Returns:
+            A list of all the datasets you have uploaded through the Cleanlab Studio Beta API.
+        """
+        return BetaDataset.list(self._api_key)
+
+    def list_jobs(self) -> List[BetaJob]:
+        """Lists all jobs you have run through the Beta API.
+
+        Returns:
+            A list of all the jobs you have run through the Cleanlab Studio Beta API.
+        """
+        return BetaJob.list(self._api_key)

From ac216fd0723b980b9e85d1c2fd88c256e390eefe Mon Sep 17 00:00:00 2001
From: Angela <angela@cleanlab.ai>
Date: Wed, 31 Jul 2024 23:23:25 -0700
Subject: [PATCH 2/6] mypy

---
 cleanlab_studio/internal/api/api.py     | 3 ++-
 cleanlab_studio/studio_beta/beta_job.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/cleanlab_studio/internal/api/api.py b/cleanlab_studio/internal/api/api.py
index 24011763..37cf0983 100644
--- a/cleanlab_studio/internal/api/api.py
+++ b/cleanlab_studio/internal/api/api.py
@@ -41,6 +41,7 @@
 
 from cleanlab_studio.errors import NotInstalledError
 from cleanlab_studio.internal.api.api_helper import (
+    UploadParts,
     check_uuid_well_formed,
     construct_headers,
     handle_api_error,
@@ -133,7 +134,7 @@ def initialize_upload(
     return upload_id, part_sizes, presigned_posts
 
 
-def complete_file_upload(api_key: str, upload_id: str, upload_parts: List[JSONDict]) -> None:
+def complete_file_upload(api_key: str, upload_id: str, upload_parts: UploadParts) -> None:
     check_uuid_well_formed(upload_id, "upload ID")
     request_json = dict(upload_id=upload_id, upload_parts=upload_parts)
     res = requests.post(
diff --git a/cleanlab_studio/studio_beta/beta_job.py b/cleanlab_studio/studio_beta/beta_job.py
index 3a271dad..b95195ba 100644
--- a/cleanlab_studio/studio_beta/beta_job.py
+++ b/cleanlab_studio/studio_beta/beta_job.py
@@ -5,7 +5,7 @@
 import pathlib
 import time
 from dataclasses import dataclass
-from typing import Optional
+from typing import List, Optional
 
 import requests
 from tqdm import tqdm
@@ -142,7 +142,7 @@ def download_results(self, output_filepath: str) -> None:
         output_path.write_bytes(resp.content)
 
     @classmethod
-    def list(cls, api_key: str) -> None:
+    def list(cls, api_key: str) -> List[BetaJob]:
         jobs = list_jobs(api_key)
         return [
             cls(

From 655870b2946353a034b0ed9ccf6c642823e1babc Mon Sep 17 00:00:00 2001
From: Angela <angela@cleanlab.ai>
Date: Thu, 1 Aug 2024 10:31:51 -0700
Subject: [PATCH 3/6] add types-requests dev requirement

---
 requirements_dev.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements_dev.txt b/requirements_dev.txt
index 45e64956..d9a9c809 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -2,3 +2,4 @@ black
 pre-commit
 twine
 mypy
+types-requests

From 9cd16471eb542e75fbbeac9b3e18eddac751c34f Mon Sep 17 00:00:00 2001
From: Angela <angela@cleanlab.ai>
Date: Thu, 1 Aug 2024 11:12:41 -0700
Subject: [PATCH 4/6] mypy

---
 requirements_dev.txt | 1 -
 setup.py             | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements_dev.txt b/requirements_dev.txt
index d9a9c809..45e64956 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -2,4 +2,3 @@ black
 pre-commit
 twine
 mypy
-types-requests
diff --git a/setup.py b/setup.py
index 789bf2cc..9bac45b0 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
 from os import path
 
-from setuptools import setup, find_packages
+from setuptools import find_packages, setup
 
 here = path.abspath(path.dirname(__file__))
 with open(path.join(here, "README.md"), encoding="utf-8") as f:
@@ -46,6 +46,7 @@
     python_requires=">=3.8",
     install_requires=[
         "aiohttp>=3.8.1",
+        "aiohttp-retry>=2.4.0",
         "Click>=8.1.0,<=8.1.3",
         "colorama>=0.4.4",
         "nest_asyncio>=1.5.0",

From 1570a619a1965dd705e008ef3d32c8a7b74e0a76 Mon Sep 17 00:00:00 2001
From: Angela <angela@cleanlab.ai>
Date: Thu, 1 Aug 2024 12:31:47 -0700
Subject: [PATCH 5/6] mypy

---
 cleanlab_studio/internal/api/beta_api.py | 30 ++++++++++++------------
 cleanlab_studio/internal/studio_base.py  |  2 +-
 cleanlab_studio/studio_beta/beta_job.py  |  5 ++--
 setup.py                                 |  1 -
 4 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/cleanlab_studio/internal/api/beta_api.py b/cleanlab_studio/internal/api/beta_api.py
index 311ac9be..d88821e5 100644
--- a/cleanlab_studio/internal/api/beta_api.py
+++ b/cleanlab_studio/internal/api/beta_api.py
@@ -1,16 +1,16 @@
-from typing import Any, Dict, List
+from typing import List, cast
 
 import requests
 
-from .api import API_BASE_URL, construct_headers
-from .api_helper import JSONDict, UploadParts, handle_api_error
+from cleanlab_studio.internal.types import JSONDict
+
+from .api import API_BASE_URL
+from .api_helper import UploadParts, construct_headers, handle_api_error
 
 experimental_jobs_base_url = f"{API_BASE_URL}/v0/experimental_jobs"
 
 
-def initialize_upload(
-    api_key: str, filename: str, file_type: str, file_size: int
-) -> Dict[str, Any]:
+def initialize_upload(api_key: str, filename: str, file_type: str, file_size: int) -> JSONDict:
     url = f"{experimental_jobs_base_url}/upload/initialize"
     headers = construct_headers(api_key)
     data = {
@@ -20,7 +20,7 @@ def initialize_upload(
     }
     resp = requests.post(url, headers=headers, json=data)
     resp.raise_for_status()
-    return resp.json()
+    return cast(JSONDict, resp.json())
 
 
 def complete_upload(api_key: str, dataset_id: str, upload_parts: UploadParts) -> JSONDict:
@@ -32,7 +32,7 @@ def complete_upload(api_key: str, dataset_id: str, upload_parts: UploadParts) ->
     }
     resp = requests.post(url, headers=headers, json=data)
     handle_api_error(resp)
-    return resp.json()
+    return cast(JSONDict, resp.json())
 
 
 def get_dataset(api_key: str, dataset_id: str) -> JSONDict:
@@ -40,7 +40,7 @@ def get_dataset(api_key: str, dataset_id: str) -> JSONDict:
     headers = construct_headers(api_key)
     resp = requests.get(url, headers=headers)
     handle_api_error(resp)
-    return resp.json()
+    return cast(JSONDict, resp.json())
 
 
 def run_job(api_key: str, dataset_id: str, job_definition_name: str) -> JSONDict:
@@ -52,7 +52,7 @@ def run_job(api_key: str, dataset_id: str, job_definition_name: str) -> JSONDict
     }
     resp = requests.post(url, headers=headers, json=data)
     handle_api_error(resp)
-    return resp.json()
+    return cast(JSONDict, resp.json())
 
 
 def get_job(api_key: str, job_id: str) -> JSONDict:
@@ -60,7 +60,7 @@ def get_job(api_key: str, job_id: str) -> JSONDict:
     headers = construct_headers(api_key)
     resp = requests.get(url, headers=headers)
     handle_api_error(resp)
-    return resp.json()
+    return cast(JSONDict, resp.json())
 
 
 def get_job_status(api_key: str, job_id: str) -> JSONDict:
@@ -68,7 +68,7 @@ def get_job_status(api_key: str, job_id: str) -> JSONDict:
     headers = construct_headers(api_key)
     resp = requests.get(url, headers=headers)
     resp.raise_for_status()
-    return resp.json()
+    return cast(JSONDict, resp.json())
 
 
 def get_results(api_key: str, job_id: str) -> JSONDict:
@@ -76,7 +76,7 @@ def get_results(api_key: str, job_id: str) -> JSONDict:
     headers = construct_headers(api_key)
     resp = requests.get(url, headers=headers)
     resp.raise_for_status()
-    return resp.json()
+    return cast(JSONDict, resp.json())
 
 
 def list_datasets(api_key: str) -> List[JSONDict]:
@@ -84,7 +84,7 @@ def list_datasets(api_key: str) -> List[JSONDict]:
     headers = construct_headers(api_key)
     resp = requests.get(url, headers=headers)
     handle_api_error(resp)
-    return resp.json()["datasets"]
+    return cast(List[JSONDict], resp.json()["datasets"])
 
 
 def list_jobs(api_key: str) -> List[JSONDict]:
@@ -92,4 +92,4 @@ def list_jobs(api_key: str) -> List[JSONDict]:
     headers = construct_headers(api_key)
     resp = requests.get(url, headers=headers)
     handle_api_error(resp)
-    return resp.json()["jobs"]
+    return cast(List[JSONDict], resp.json()["jobs"])
diff --git a/cleanlab_studio/internal/studio_base.py b/cleanlab_studio/internal/studio_base.py
index 2d6c6d2b..adb5a5e1 100644
--- a/cleanlab_studio/internal/studio_base.py
+++ b/cleanlab_studio/internal/studio_base.py
@@ -1,4 +1,4 @@
-from aiohttp_retry import Optional
+from typing import Optional
 
 from cleanlab_studio.errors import MissingAPIKeyError, VersionError
 from cleanlab_studio.internal.api import api
diff --git a/cleanlab_studio/studio_beta/beta_job.py b/cleanlab_studio/studio_beta/beta_job.py
index b95195ba..7931afe2 100644
--- a/cleanlab_studio/studio_beta/beta_job.py
+++ b/cleanlab_studio/studio_beta/beta_job.py
@@ -125,12 +125,13 @@ def wait_until_ready(self, timeout: Optional[int] = None) -> None:
 
     def download_results(self, output_filepath: str) -> None:
         output_path = pathlib.Path(output_filepath)
-        if self.status != JobStatus.READY:
-            raise BetaJobError("Job must be ready to download results")
 
         if self.status == JobStatus.FAILED:
             raise BetaJobError("Job failed, cannot download results")
 
+        if self.status != JobStatus.READY:
+            raise BetaJobError("Job must be ready to download results")
+
         results = get_results(self._api_key, self.id)
         if output_path.suffix != results["result_file_type"]:
             raise DownloadResultsError(
diff --git a/setup.py b/setup.py
index 9bac45b0..794a3f9a 100644
--- a/setup.py
+++ b/setup.py
@@ -46,7 +46,6 @@
     python_requires=">=3.8",
     install_requires=[
         "aiohttp>=3.8.1",
-        "aiohttp-retry>=2.4.0",
         "Click>=8.1.0,<=8.1.3",
         "colorama>=0.4.4",
         "nest_asyncio>=1.5.0",

From aa0948b63a322bdd9c5779174035d885b8846a32 Mon Sep 17 00:00:00 2001
From: Angela <angela@cleanlab.ai>
Date: Mon, 12 Aug 2024 10:25:02 -0700
Subject: [PATCH 6/6] update docstrings

---
 cleanlab_studio/studio_beta/beta_dataset.py | 25 +++++++++++++++++++++
 cleanlab_studio/studio_beta/beta_job.py     | 25 ++++++++++++++++++---
 cleanlab_studio/studio_beta/studio_beta.py  |  6 ++++-
 3 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/cleanlab_studio/studio_beta/beta_dataset.py b/cleanlab_studio/studio_beta/beta_dataset.py
index 34d3c393..1fe72e05 100644
--- a/cleanlab_studio/studio_beta/beta_dataset.py
+++ b/cleanlab_studio/studio_beta/beta_dataset.py
@@ -16,6 +16,8 @@
 
 @dataclass
 class BetaDataset:
+    """Represents a dataset uploaded to the Cleanlab Studio Beta API."""
+
     id: str
     filename: str
     upload_complete: bool
@@ -23,6 +25,14 @@ class BetaDataset:
 
     @classmethod
     def from_id(cls, api_key: str, dataset_id: str) -> "BetaDataset":
+        """Loads a dataset from the Cleanlab Studio Beta API by its ID.
+
+        Args:
+            api_key: Your Cleanlab Studio API key.
+            dataset_id: The ID of the dataset to load.
+        Returns:
+            The dataset object if you've uploaded a dataset to the Cleanlab Studio Beta API with that ID.
+        """
         dataset = get_dataset(api_key, dataset_id)
         return cls(
             id=dataset_id,
@@ -33,6 +43,15 @@ def from_id(cls, api_key: str, dataset_id: str) -> "BetaDataset":
 
     @classmethod
     def from_filepath(cls, api_key: str, filepath: str) -> "BetaDataset":
+        """Uploads a dataset from the given filepath for use in the Cleanlab Studio Beta API.
+
+        Args:
+            api_key: Your Cleanlab Studio API key.
+            filepath: The path to the dataset file.
+        Returns:
+            The uploaded dataset object. You can use this object to obtain the ID of the uploaded dataset.
+            Use this ID to run jobs on the dataset.
+        """
         dataset_source = FilepathDatasetSource(filepath=pathlib.Path(filepath))
         initialize_response = initialize_upload(
             api_key,
@@ -56,6 +75,12 @@ def from_filepath(cls, api_key: str, filepath: str) -> "BetaDataset":
 
     @classmethod
     def list(cls, api_key: str) -> List[BetaDataset]:
+        """Lists all datasets you have uploaded through the Beta API.
+
+        Args:
+            api_key: Your Cleanlab Studio API key.
+        Returns:
+            A list of all the datasets you have uploaded through the Cleanlab Studio Beta API."""
         datasets = list_datasets(api_key)
         return [
             cls(
diff --git a/cleanlab_studio/studio_beta/beta_job.py b/cleanlab_studio/studio_beta/beta_job.py
index 7931afe2..39ab975f 100644
--- a/cleanlab_studio/studio_beta/beta_job.py
+++ b/cleanlab_studio/studio_beta/beta_job.py
@@ -21,6 +21,8 @@
 
 
 class JobStatus(enum.Enum):
+    """The status of a job in the Cleanlab Studio Beta API."""
+
     CREATED = 0
     RUNNING = 1
     READY = 2
@@ -33,6 +35,8 @@ def from_name(cls, name: str) -> "JobStatus":
 
 @dataclass
 class BetaJob:
+    """Represents a job in the Cleanlab Studio Beta API."""
+
     id: str
     status: JobStatus
     dataset_id: str
@@ -62,7 +66,7 @@ def from_id(cls, api_key: str, job_id: str) -> "BetaJob":
 
     @classmethod
     def run(cls, api_key: str, dataset_id: str, job_definition_name: str) -> "BetaJob":
-        """Creates and runs a new job with the given dataset and job definition.
+        """Creates and runs a new job with the given dataset and job definition. Raises an error if the job definition name is invalid.
 
         Args:
             api_key: Your API key.
@@ -87,8 +91,8 @@ def wait_until_ready(self, timeout: Optional[int] = None) -> None:
             timeout (Optional[float], optional): timeout for polling, in seconds. Defaults to None.
 
         Raises:
-            TimeoutError: if job is not ready by end of timeout
-            BetaJobError: if job fails
+            TimeoutError: if job is not ready by end of timeout.
+            BetaJobError: if job fails.
         """
         start_time = time.time()
         res = get_job_status(self._api_key, self.id)
@@ -124,6 +128,14 @@ def wait_until_ready(self, timeout: Optional[int] = None) -> None:
                 raise BetaJobError(f"Experimental job {self.id} failed to complete")
 
     def download_results(self, output_filepath: str) -> None:
+        """Downloads the results of an experimental job to the given output filepath.
+
+        Args:
+            output_filepath: The path to save the downloaded results to.
+        Raises:
+            BetaJobError: if job is not yet ready or has failed.
+            DownloadResultsError: if output file extension does not match result file type.
+        """
         output_path = pathlib.Path(output_filepath)
 
         if self.status == JobStatus.FAILED:
@@ -144,6 +156,13 @@ def download_results(self, output_filepath: str) -> None:
 
     @classmethod
     def list(cls, api_key: str) -> List[BetaJob]:
+        """Lists all jobs you have run through the Beta API.
+
+        Args:
+            api_key: Your API key.
+        Returns:
+            A list of all the jobs you have run through the Cleanlab Studio Beta API.
+        """
         jobs = list_jobs(api_key)
         return [
             cls(
diff --git a/cleanlab_studio/studio_beta/studio_beta.py b/cleanlab_studio/studio_beta/studio_beta.py
index 85db729a..9227101c 100644
--- a/cleanlab_studio/studio_beta/studio_beta.py
+++ b/cleanlab_studio/studio_beta/studio_beta.py
@@ -1,3 +1,7 @@
+"""
+Python API for Cleanlab Studio Private Beta functionality.
+"""
+
 from typing import List
 
 from cleanlab_studio.internal.studio_base import StudioBase
@@ -29,7 +33,7 @@ def upload_dataset(
         return BetaDataset.from_filepath(self._api_key, filepath)
 
     def run_job(self, dataset_id: str, job_definition_name: str) -> BetaJob:
-        """Runs a Cleanlab Studio Beta job with the given dataset and job definition.
+        """Runs a Cleanlab Studio Beta job with the given dataset and job definition. Raises an error if the job definition name is invalid.
         Args:
             dataset_id: The ID of the dataset to run the job on.
             job_definition_name: The name of the job definition to run.