AgentOps-AI · devin-ai-integration · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024
diff --git a/agentops/llms/providers/ollama.py b/agentops/llms/providers/ollama.py
@@ -1,13 +1,30 @@
-import inspect
-import sys
-from typing import Optional
+import json
+from typing import Any, AsyncGenerator, Dict, List, Optional, Union
+from dataclasses import dataclass
+import asyncio
+import httpx
 
-from agentops.event import LLMEvent
+from agentops.event import LLMEvent, ErrorEvent
 from agentops.session import Session
 from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id
 from .instrumented_provider import InstrumentedProvider
 from agentops.singleton import singleton
 
+
+@dataclass
+class Choice:
+    message: dict = None
+    delta: dict = None
+    finish_reason: str = None
+    index: int = 0
+
+
+@dataclass
+class ChatResponse:
+    model: str
+    choices: list[Choice]
+
+
 original_func = {}
 
 
@@ -16,111 +33,217 @@ class OllamaProvider(InstrumentedProvider):
     original_create = None
     original_create_async = None
 
-    def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None) -> dict:
-        llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
-        if session is not None:
-            llm_event.session_id = session.session_id
-
-        def handle_stream_chunk(chunk: dict):
-            message = chunk.get("message", {"role": None, "content": ""})
-
-            if chunk.get("done"):
-                llm_event.end_timestamp = get_ISO_time()
-                llm_event.model = f'ollama/{chunk.get("model")}'
-                llm_event.returns = chunk
-                llm_event.returns["message"] = llm_event.completion
-                llm_event.prompt = kwargs["messages"]
-                llm_event.agent_id = check_call_stack_for_agent_id()
-                self._safe_record(session, llm_event)
-
-            if llm_event.completion is None:
-                llm_event.completion = {
-                    "role": message.get("role"),
-                    "content": message.get("content", ""),
-                    "tool_calls": None,
-                    "function_call": None,
-                }
-            else:
-                llm_event.completion["content"] += message.get("content", "")
-
-        if inspect.isgenerator(response):
-
-            def generator():
-                for chunk in response:
-                    handle_stream_chunk(chunk)
-                    yield chunk
-
-            return generator()
-
-        llm_event.end_timestamp = get_ISO_time()
-        llm_event.model = f'ollama/{response["model"]}'
-        llm_event.returns = response
-        llm_event.agent_id = check_call_stack_for_agent_id()
-        llm_event.prompt = kwargs["messages"]
-        llm_event.completion = {
-            "role": response["message"].get("role"),
-            "content": response["message"].get("content", ""),
-            "tool_calls": None,
-            "function_call": None,
+    def handle_response(self, response_data, request_data, init_timestamp, session=None):
+        """Handle the response from the Ollama API."""
+        end_timestamp = get_ISO_time()
+        model = request_data.get("model", "unknown")
+
+        # Extract error if present
+        error = None
+        if isinstance(response_data, dict) and "error" in response_data:
+            error = response_data["error"]
+
+        # Create event data
+        event_data = {
+            "model": model,  # Use the raw model name from request
+            "params": request_data,
+            "returns": response_data,  # Include full response data
+            "init_timestamp": init_timestamp,
+            "end_timestamp": end_timestamp,
+            "prompt": request_data.get("messages", []),
+            "prompt_tokens": None,  # Ollama doesn't provide token counts
+            "completion_tokens": None,
+            "cost": None,  # Ollama is free/local
         }
-        self._safe_record(session, llm_event)
-        return response
+
+        if error:
+            event_data["completion"] = error
+        else:
+            # Extract completion from response
+            if isinstance(response_data, dict):
+                message = response_data.get("message", {})
+                if isinstance(message, dict):
+                    content = message.get("content", "")
+                    event_data["completion"] = content
+
+        # Create and emit LLM event
+        if session:
+            event = LLMEvent(**event_data)
+            session.record(event)
+
+        return event_data
 
     def override(self):
+        """Override Ollama methods with instrumented versions."""
         self._override_chat_client()
         self._override_chat()
         self._override_chat_async_client()
 
     def undo_override(self):
-        if original_func is not None and original_func != {}:
-            import ollama
-
-            ollama.chat = original_func["ollama.chat"]
-            ollama.Client.chat = original_func["ollama.Client.chat"]
-            ollama.AsyncClient.chat = original_func["ollama.AsyncClient.chat"]
+        import ollama
 
-    def __init__(self, client):
-        super().__init__(client)
+        if hasattr(self, "_original_chat"):
+            ollama.chat = self._original_chat
+        if hasattr(self, "_original_client_chat"):
+            ollama.Client.chat = self._original_client_chat
+        if hasattr(self, "_original_async_chat"):
+            ollama.AsyncClient.chat = self._original_async_chat
+
+    def __init__(self, http_client=None, client=None, model=None):
+        """Initialize the Ollama provider."""
+        super().__init__(client=client)
+        self.base_url = "http://localhost:11434"  # Ollama runs locally by default
+        self.timeout = 60.0  # Default timeout in seconds
+        self.model = model  # Store default model
+
+        # Initialize HTTP client if not provided
+        if http_client is None:
+            self.http_client = httpx.AsyncClient(timeout=self.timeout)
+        else:
+            self.http_client = http_client
+
+        # Store original methods for restoration
+        self._original_chat = None
+        self._original_chat_client = None
+        self._original_chat_async_client = None
 
     def _override_chat(self):
         import ollama
 
-        original_func["ollama.chat"] = ollama.chat
+        self._original_chat = ollama.chat
 
         def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
             init_timestamp = get_ISO_time()
-            result = original_func["ollama.chat"](*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=kwargs.get("session", None))
+            session = kwargs.pop("session", None)
+            result = self._original_chat(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
 
-        # Override the original method with the patched one
         ollama.chat = patched_function
 
-
-    def _override_chat(self):
-        import ollama
-
-        original_func["ollama.chat"] = ollama.chat
-        self._original_chat = ollama.chat
-
-        def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-            init_timestamp = get_ISO_time()
-            result = original_func["ollama.chat"](*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=kwargs.get("session", None))
-            session = kwargs.pop("session", None)
-            result = self._original_chat(*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=session)
-
-        # Override the original method with the patched one
-        ollama.chat = patched_function
+def _override_chat(self):
+    import ollama
+
+    self._original_chat = ollama.chat
+
+    def patched_function(*args, **kwargs):
+        init_timestamp = get_ISO_time()
+        session = kwargs.get("session", None)  # Use get instead of pop to avoid removing session
+        result = self._original_chat(*args, **kwargs)
+        return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+    ollama.chat = patched_function
-
-    def _override_chat(self):
-        import ollama
-
-        original_func["ollama.chat"] = ollama.chat
-        self._original_chat = ollama.chat
-
-        def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-            init_timestamp = get_ISO_time()
-            result = original_func["ollama.chat"](*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=kwargs.get("session", None))
-            session = kwargs.pop("session", None)
-            result = self._original_chat(*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=session)
-
-        # Override the original method with the patched one
-        ollama.chat = patched_function
+def _override_chat(self):
+    import ollama
+
+    self._original_chat = ollama.chat
+
+    def patched_function(*args, **kwargs):
+        init_timestamp = get_ISO_time()
+        session = kwargs.pop("session", None)
+        try:
+            result = self._original_chat(*args, **kwargs)
+        except Exception as e:
+            # Handle exceptions that may occur during the chat operation
+            self.log_error(f"Error in chat operation: {str(e)}")
+            raise
+        return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+    ollama.chat = patched_function
+
+# Additional unit tests should be written to ensure the patched function behaves as expected
+# across various scenarios, including edge cases and error conditions.
-
-    def _override_chat(self):
-        import ollama
-
-        original_func["ollama.chat"] = ollama.chat
-        self._original_chat = ollama.chat
-
-        def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-            init_timestamp = get_ISO_time()
-            result = original_func["ollama.chat"](*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=kwargs.get("session", None))
-            session = kwargs.pop("session", None)
-            result = self._original_chat(*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=session)
-
-        # Override the original method with the patched one
-        ollama.chat = patched_function
+def _override_chat(self):
+    import ollama
+
+    self._original_chat = ollama.chat
+
+    def patched_function(*args, **kwargs):
+        init_timestamp = get_ISO_time()
+        session = kwargs.get("session", None)  # Use get instead of pop to avoid removing session
+        result = self._original_chat(*args, **kwargs)
+        return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+    ollama.chat = patched_function
-
-    def _override_chat(self):
-        import ollama
-
-        original_func["ollama.chat"] = ollama.chat
-        self._original_chat = ollama.chat
-
-        def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-            init_timestamp = get_ISO_time()
-            result = original_func["ollama.chat"](*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=kwargs.get("session", None))
-            session = kwargs.pop("session", None)
-            result = self._original_chat(*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=session)
-
-        # Override the original method with the patched one
-        ollama.chat = patched_function
+def _override_chat(self):
+    import ollama
+
+    self._original_chat = ollama.chat
+
+    def patched_function(*args, **kwargs):
+        init_timestamp = get_ISO_time()
+        session = kwargs.pop("session", None)
+        try:
+            result = self._original_chat(*args, **kwargs)
+        except Exception as e:
+            # Handle exceptions that may occur during the chat operation
+            self.log_error(f"Error in chat operation: {str(e)}")
+            raise
+        return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+    ollama.chat = patched_function
+
+# Additional unit tests should be written to ensure the patched function behaves as expected
+# across various scenarios, including edge cases and error conditions.
     def _override_chat_client(self):
         from ollama import Client
 
-        original_func["ollama.Client.chat"] = Client.chat
+        self._original_client_chat = Client.chat
 
-        def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
+        def patched_function(self_client, *args, **kwargs):
             init_timestamp = get_ISO_time()
-            result = original_func["ollama.Client.chat"](*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=kwargs.get("session", None))
+            session = kwargs.pop("session", None)
+            result = self._original_client_chat(self_client, *args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
 
-        # Override the original method with the patched one
         Client.chat = patched_function
 
     def _override_chat_async_client(self):
         from ollama import AsyncClient
 
-        original_func = {}
-        original_func["ollama.AsyncClient.chat"] = AsyncClient.chat
+        self._original_async_chat = AsyncClient.chat
 
-        async def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
+        async def patched_function(self_client, *args, **kwargs):
             init_timestamp = get_ISO_time()
-            result = await original_func["ollama.AsyncClient.chat"](*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=kwargs.get("session", None))
+            session = kwargs.pop("session", None)
+            result = await self._original_async_chat(self_client, *args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
 
-        # Override the original method with the patched one
         AsyncClient.chat = patched_function
+
-
-    def _override_chat_async_client(self):
-        from ollama import AsyncClient
-
-        original_func = {}
-        original_func["ollama.AsyncClient.chat"] = AsyncClient.chat
-        self._original_async_chat = AsyncClient.chat
-
-        async def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-        async def patched_function(self_client, *args, **kwargs):
-            init_timestamp = get_ISO_time()
-            result = await original_func["ollama.AsyncClient.chat"](*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=kwargs.get("session", None))
-            session = kwargs.pop("session", None)
-            result = await self._original_async_chat(self_client, *args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=session)
-
-        # Override the original method with the patched one
-        AsyncClient.chat = patched_function
+def _override_chat_async_client(self):
+    from ollama import AsyncClient
+
+    self._original_async_chat = AsyncClient.chat
+
+    async def patched_function(self_client, *args, **kwargs):
+        init_timestamp = get_ISO_time()
+        session = kwargs.pop("session", None)
+        result = await self._original_async_chat(self_client, *args, **kwargs)
+        return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+    # Override the original method with the patched one
+    AsyncClient.chat = patched_function
-
-    def _override_chat_async_client(self):
-        from ollama import AsyncClient
-
-        original_func = {}
-        original_func["ollama.AsyncClient.chat"] = AsyncClient.chat
-        self._original_async_chat = AsyncClient.chat
-
-        async def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-        async def patched_function(self_client, *args, **kwargs):
-            init_timestamp = get_ISO_time()
-            result = await original_func["ollama.AsyncClient.chat"](*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=kwargs.get("session", None))
-            session = kwargs.pop("session", None)
-            result = await self._original_async_chat(self_client, *args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=session)
-
-        # Override the original method with the patched one
-        AsyncClient.chat = patched_function
+def _override_chat_async_client(self):
+    from ollama import AsyncClient
+
+    self._original_async_chat = AsyncClient.chat
+
+    async def patched_function(self_client, *args, **kwargs):
+        init_timestamp = get_ISO_time()
+        session = kwargs.pop("session", None)
+        result = await self._original_async_chat(self_client, *args, **kwargs)
+        return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+    # Override the original method with the patched one
+    AsyncClient.chat = patched_function
+    async def chat_completion(
+        self,
+        messages: List[Dict[str, str]],
+        model: Optional[str] = None,
+        stream: bool = False,
+        session: Optional[Session] = None,
+        **kwargs,
+    ) -> Union[ChatResponse, AsyncGenerator[ChatResponse, None]]:
+        """Send a chat completion request to the Ollama API."""
+        model = model or self.model
+        init_timestamp = get_ISO_time()
+
+        request_data = {
+            "model": model,
+            "messages": messages,
+            "stream": stream,
+            **kwargs,
+        }
+
+        try:
+            response = await self.http_client.post(
+                f"{self.base_url}/api/chat",
+                json=request_data,
+            )
+            response_data = await response.json()
+
+            # Check for error response
+            if "error" in response_data:
+                error_message = response_data["error"]
+                # Format error message consistently for model not found errors
+                if "not found" in error_message.lower():
+                    error_message = f'model "{model}" not found'
+
+                # Record error event
+                if session:
+                    error_event = ErrorEvent(details=error_message, error_type="ModelError")
+                    session.record(error_event)
+                raise Exception(error_message)
+
+            if stream:
+                return self.stream_generator(response, request_data, init_timestamp, session)
+
+            # Record event for non-streaming response
+            self.handle_response(response_data, request_data, init_timestamp, session)
+
+            return ChatResponse(model=model, choices=[Choice(message=response_data["message"], finish_reason="stop")])
+
+        except Exception as e:
+            error_msg = str(e)
+            # Format error message consistently for model not found errors
+            if "not found" in error_msg.lower() and 'model "' not in error_msg:
+                error_msg = f'model "{model}" not found'
+
+            # Create error event
+            error_event = ErrorEvent(details=error_msg, error_type="ModelError")
+            if session:
+                session.record(error_event)
+            raise Exception(error_msg)
+
+    async def stream_generator(
+        self,
+        response: Any,
+        request_data: dict,
+        init_timestamp: str,
+        session: Optional[Session] = None,
+    ) -> AsyncGenerator[ChatResponse, None]:
+        """Generate streaming responses from the Ollama API."""
+        try:
+            current_content = ""
+            async for line in response.aiter_lines():
+                if not line:
+                    continue
+
+                chunk = json.loads(line)
+                content = chunk.get("message", {}).get("content", "")
+                current_content += content
+
+                if chunk.get("done", False):
+                    # Record the final event with complete response
+                    event_data = {
+                        "model": request_data.get("model", "unknown"),  # Use raw model name
+                        "params": request_data,
+                        "returns": chunk,
+                        "prompt": request_data.get("messages", []),
+                        "completion": current_content,
+                        "prompt_tokens": None,
+                        "completion_tokens": None,
+                        "cost": None,
+                    }
+                    if session:
+                        session.record(LLMEvent(**event_data))
+
+                yield ChatResponse(
+                    model=request_data.get("model", "unknown"),  # Add model parameter
+                    choices=[Choice(message={"role": "assistant", "content": content}, finish_reason=None)],
+                )
+        except Exception as e:
+            # Create error event with correct model information
+            error_event = ErrorEvent(details=str(e), error_type="ModelError")
+            session.record(error_event)
+            raise
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
@@ -0,0 +1 @@
+"""Integration tests for AgentOps providers and frameworks."""
diff --git a/tests/integration/test_base.py b/tests/integration/test_base.py
@@ -0,0 +1,46 @@
+import pytest
+import asyncio
+import json
+from unittest.mock import AsyncMock, MagicMock
+from uuid import uuid4
+
+from agentops.session import Session
+from agentops.client import Client
+from agentops.event import LLMEvent
+
+
+class BaseProviderTest:
+    """Base class for provider tests."""
+
+    async def async_setup_method(self, method):
+        """Set up test method."""
+        # Initialize mock client and session
+        self.mock_req = AsyncMock()
+        client = Client()
+        client.configure(api_key="test-key")
+        config = client._config
+        config.http_client = self.mock_req  # Set mock on config instead of session
+        self.session = Session(session_id=uuid4(), config=config)
+
+    async def teardown_method(self, method):
+        """Clean up after test."""
+        if hasattr(self, "provider"):
+            self.provider.undo_override()
+
+    async def async_verify_events(self, session, expected_count=1):
+        """Verify events were recorded."""
+        await asyncio.sleep(0.1)  # Allow time for async event processing
+        create_events_requests = [req for req in self.mock_req.request_history if req.url.endswith("/v2/create_events")]
+        assert len(create_events_requests) >= 1, "No events were recorded"
+        request_body = json.loads(create_events_requests[-1].body.decode("utf-8"))
+        assert "session_id" in request_body, "Session ID not found in request"
+
+    async def async_verify_llm_event(self, mock_req, model=None):
+        """Verify LLM event was recorded."""
+        await asyncio.sleep(0.1)  # Allow time for async event processing
+        create_events_requests = [req for req in mock_req.request_history if req.url.endswith("/v2/create_events")]
+        assert len(create_events_requests) >= 1, "No events were recorded"
+        request_body = json.loads(create_events_requests[-1].body.decode("utf-8"))
+        assert "event_type" in request_body and request_body["event_type"] == "llms", "LLM event not found"
+        if model:
+            assert "model" in request_body and request_body["model"] == model, f"Model {model} not found in event"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Integration tests for AgentOps providers and frameworks."""