AbanteAI · mentatai · Jul 14, 2024 · Jul 14, 2024 · Jul 14, 2024 · jakethekoenig
diff --git a/spice/spice.py b/spice/spice.py
@@ -230,6 +230,9 @@ def __init__(
         logging_dir: Optional[Path | str] = None,
         logging_callback: Optional[Callable[[SpiceResponse, str, str], None]] = None,
         default_temperature: Optional[float] = None,
+        max_retries: int = 0,  # Add this line
+        base_delay: float = 1.0,  # Add this line
+        max_delay: float = 32.0,  # Add this line
     ):
         """
         Creates a new Spice client.
@@ -268,6 +271,11 @@ def __init__(
         self._default_embeddings_model = embeddings_model
         self._default_temperature = default_temperature
 
+        # Initialize retry configuration parameters
+        self.max_retries = max_retries
+        self.base_delay = base_delay
+        self.max_delay = max_delay
+
         # TODO: Should we validate model aliases?
         self._model_aliases = model_aliases
 
@@ -451,7 +459,7 @@ async def get_response(
             elif i > 1 and call_args.temperature is not None:
                 call_args.temperature = max(0.5, call_args.temperature)
 
-            with client.catch_and_convert_errors():
+            with client.catch_and_convert_errors(max_retries=self.max_retries, base_delay=self.base_delay, max_delay=self.max_delay):
                 if streaming_callback is not None:
                     stream = await client.get_chat_completion_or_stream(call_args)
                     stream = cast(AsyncIterator, stream)

diff --git a/spice/wrapped_clients.py b/spice/wrapped_clients.py
@@ -3,6 +3,7 @@
 import base64
 import io
 import mimetypes
+import time
 from abc import ABC, abstractmethod
 from contextlib import contextmanager
 from pathlib import Path
@@ -119,16 +120,24 @@ def extract_text_and_tokens(self, chat_completion, call_args: SpiceCallArgs):
 
     @override
     @contextmanager
-    def catch_and_convert_errors(self):
-        # TODO: Do we catch all errors? I think we should catch APIStatusError
-        try:
-            yield
-        except openai.APIConnectionError as e:
-            raise APIConnectionError(f"OpenAI Connection Error: {e.message}") from e
-        except openai.AuthenticationError as e:
-            raise AuthenticationError(f"OpenAI Authentication Error: {e.message}") from e
-        except openai.APIStatusError as e:
-            raise APIError(f"OpenAI Status Error: {e.message}") from e
+    def catch_and_convert_errors(self, max_retries: int = 0, base_delay: float = 1.0, max_delay: float = 32.0):
+        retries = 0
+        delay = base_delay
+        while retries <= max_retries:
+            try:
+                yield
+                return
+            except openai.APIConnectionError as e:
+                if retries == max_retries:
+                    raise APIConnectionError(f"OpenAI Connection Error: {e.message}") from e
+            except openai.AuthenticationError as e:
+                raise AuthenticationError(f"OpenAI Authentication Error: {e.message}") from e
+            except openai.APIStatusError as e:
+                if retries == max_retries:
+                    raise APIError(f"OpenAI Status Error: {e.message}") from e
+            time.sleep(min(delay, max_delay))
+            delay *= 2
+            retries += 1
 
     def _get_encoding_for_model(self, model: Model | str) -> tiktoken.Encoding:
         from spice.models import Model
@@ -388,15 +397,24 @@ def extract_text_and_tokens(self, chat_completion, call_args: SpiceCallArgs):
 
     @override
     @contextmanager
-    def catch_and_convert_errors(self):
-        try:
-            yield
-        except anthropic.APIConnectionError as e:
-            raise APIConnectionError(f"Anthropic Connection Error: {e.message}") from e
-        except anthropic.AuthenticationError as e:
-            raise AuthenticationError(f"Anthropic Authentication Error: {e.message}") from e
-        except anthropic.APIStatusError as e:
-            raise APIError(f"Anthropic Status Error: {e.message}") from e
+    def catch_and_convert_errors(self, max_retries: int = 0, base_delay: float = 1.0, max_delay: float = 32.0):
+        retries = 0
+        delay = base_delay
+        while retries <= max_retries:
+            try:
+                yield
+                return
+            except anthropic.APIConnectionError as e:
+                if retries == max_retries:
+                    raise APIConnectionError(f"Anthropic Connection Error: {e.message}") from e
+            except anthropic.AuthenticationError as e:
+                raise AuthenticationError(f"Anthropic Authentication Error: {e.message}") from e
+            except anthropic.APIStatusError as e:
+                if retries == max_retries:
+                    raise APIError(f"Anthropic Status Error: {e.message}") from e
+            time.sleep(min(delay, max_delay))
+            delay *= 2
+            retries += 1
 
     # Anthropic doesn't give us a way to count tokens, so we just use OpenAI's token counting functions and multiply by a pre-determined multiplier
     class _FakeWrappedOpenAIClient(WrappedOpenAIClient):