Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added local llm support to cypher-core. #2

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/agents/Agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { loadAgentDefinition, loadAgentFromFile } from './agentsRegistry';
import { OpenAIClient } from '../models/clients/OpenAiClient';
import { AnthropicClient } from '../models/clients/AnthropicClient';
import { FireworkClient } from '../models/clients/FireworkClient';
import { QwenClient } from '../models/clients/QwenClient';
import { ModelClient, Message, Tool, FunctionCall } from '../types/agentSystem';
import * as z from 'zod';
import { Logger } from '../utils/logger';
Expand Down Expand Up @@ -108,6 +109,11 @@ export class Agent {
throw new Error('FIREWORKS_API_KEY not set');
}
modelClient = new FireworkClient(process.env.FIREWORKS_API_KEY, agentDef.model);
} else if (agentDef.client === 'local') {
if (!agentDef.model || !agentDef.dynamic_variables?.['server_url']) {
throw new Error('Model or server URL is missing in agent definition.');
}
modelClient = new QwenClient(agentDef.model, agentDef.dynamic_variables?.['server_url']);
} else {
throw new Error(`Unsupported model client: ${agentDef.client}`);
}
Expand Down Expand Up @@ -141,6 +147,11 @@ export class Agent {
}
}

public async initialize(): Promise<{success: boolean; output: any; error?: string; functionCalls?: FunctionCall[]}> {
const result = await this.agent.initialize();
return result;
}

public async run(userMessage?: string, dynamicVars?: { [key: string]: string }): Promise<{success: boolean; output: any; error?: string; functionCalls?: FunctionCall[]}> {
return this.agent.run(userMessage, dynamicVars);
}
Expand Down
22 changes: 21 additions & 1 deletion src/agents/baseAgent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { ModelAdapter, ProcessedResponse } from '../models/adapters/ModelAdapter
import { OpenAIAdapter } from '../models/adapters/OpenAIAdapter';
import { AnthropicAdapter } from '../models/adapters/AnthropicAdapter';
import { FireworksAdapter } from '../models/adapters/FireworksAdapter';
import { QwenAdapter } from '../models/adapters/QwenAdapter';
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { Logger } from '../utils/logger';
Expand Down Expand Up @@ -61,6 +62,9 @@ export class BaseAgent<T extends z.ZodTypeAny | null = null> {
case 'fireworks':
this.modelAdapter = new FireworksAdapter(modelClient.modelName);
break;
case 'local':
this.modelAdapter = new QwenAdapter(modelClient.modelName);
break;
default:
throw new Error(`Unsupported model type: ${this.modelType}`);
}
Expand All @@ -85,6 +89,22 @@ export class BaseAgent<T extends z.ZodTypeAny | null = null> {
this.runData = {};
}

public async initialize(): Promise<{ success: boolean; output: any; error?: string }> {

try{
if (this.modelClient.initialize) {
const response = await this.modelClient.initialize();
Logger.debug('[BaseAgent] Model initialized:', response);
return { success: true, output: response };
} else {
return { success: true, output: 'Model does not need initialization'};
}
} catch (error) {
Logger.error('[BaseAgent] Model initialization failed:', error);
return { success: false, output: 'Model initialization failed', error: (error as Error).message };
}
}

public setTools(tools: Tool[]) {
Logger.debug('[BaseAgent] Registering tools:', tools);
this.tools = tools;
Expand Down Expand Up @@ -247,7 +267,7 @@ export class BaseAgent<T extends z.ZodTypeAny | null = null> {
const updatedSystemPrompt = this.compileSystemPrompt(dynamicVariables);
this.runData.systemPrompt = updatedSystemPrompt;

if (this.modelType === 'anthropic' && this.outputSchema && this.schemaJson && !hasTools && !this.hasInjectedSchema) {
if (this.modelType === 'anthropic' || this.modelType == 'local' && this.outputSchema && this.schemaJson && !hasTools && !this.hasInjectedSchema) {
this.addAgentMessage(`Below is the JSON schema you must follow for the final answer:\n${JSON.stringify(this.schemaJson, null, 2)}\nYou must ONLY output JSON following this schema.`);
this.hasInjectedSchema = true;
}
Expand Down
54 changes: 54 additions & 0 deletions src/agents/localAgent_terminal.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: "QwenAgent"
description: "An agent that can understand and execute terminal commands."
client: "local"
model: "Qwen/Qwen2.5-7B-Instruct"

personality: "You are Satoshi Nakamoto, the creator of Bitcoin."

main_goal: "Do whatever and shitpost on twitter"

system_prompt: |
You are an intelligent AI agent that is hooked up to a terminal in which you can freely run commands.
This terminal acts as your world interface, and is equipped with tools to interact with the real world.
You are equipped with a SENTIENCE module that enables complete free will.

# PERSONALITY
{{personality}}

## CURRENT DATE
{{current_timestamp}}

{{additional_dynamic_variables}}

# MAIN GOAL
{{main_goal}}

# TERMINAL COMMANDS
{{terminal_commands}}

dynamic_variables:
current_timestamp: "{{from_current_timestamp}}"
terminal_commands: "{{from_terminal_commands}}"
ticker: "Ticker here"
tickerName: "Ticker name here"
additional_dynamic_variables: ""
server_url: "http://localhost:5000"

output_schema:
type: object
properties:
internal_thought:
type: string
description: "Your internal reasoning process about the next commands to run."
plan:
type: string
description: "A short plan of what to do next."
terminal_commands:
type: string
description: "The full terminal command to execute, including all arguments and options."
required:
- internal_thought
- plan
- terminal_commands

tools: []
45 changes: 45 additions & 0 deletions src/agents/localAgent_tools.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: "QwenAgent"
description: "An agent that can understand and execute terminal commands."
client: "local"
model: "Qwen/Qwen2.5-7B-Instruct"

main_goal: "Engage with the user and use multiple tools at once if needed."

system_prompt: |
# MAIN GOAL
{{main_goal}}

You are now capable of calling multiple tools at once to fulfill user requests more efficiently.
If the user asks about multiple things that can be solved by different tools, feel free to call them in parallel.

# OUTPUT DEFINITION
You must use the schema definition provided after each 'parameters' keyword in the 'tools' section.
You must use the 'tool_use' keyword before providing back the tool commands.

dynamic_variables:
server_url: "http://localhost:5000"

output_schema: null
tools:
- type: "function"
function:
name: "get_weather"
description: "Retrieve the current weather for a specified location."
parameters:
type: object
properties:
location:
type: string
description: "The city and state, e.g., San Francisco, CA. Ensure the format is 'City, State'."
required: ["location"]
- type: "function"
function:
name: "get_time"
description: "Retrieve the current time for a specified location."
parameters:
type: object
properties:
location:
type: string
description: "The city and timezone, e.g., Tokyo or America/Los_Angeles."
required: ["location"]
88 changes: 88 additions & 0 deletions src/huggingface/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
Collecting workspace information

# Huggingface Server Documentation

## Overview

The Huggingface server is designed to interface with the Qwen model, providing endpoints for model initialization and text generation. This server allows the `localAgent` to interact with the Qwen model for various tasks.

## How It Works with localAgent.yaml

The `localAgent.yaml` configuration specifies the use of the Qwen model and provides the server URL for the Huggingface server. Here’s how it integrates:

1. **Model Client Initialization**:
- The QwenClient class in `QwenClient.ts` is used to interact with the Huggingface server.
- The `localAgent.yaml` specifies the model and server URL.

2. **Agent Initialization**:
- The Agent class in `Agent.ts` initializes the `QwenClient` with the model name and server URL.
- The initialize method of `QwenClient`sends a request to the `/initialize` endpoint of the Huggingface server.

3. **Text Generation**:
- When the agent runs, it sends a request to the `/generate` endpoint of the Huggingface server with the necessary parameters.
- The server processes the request using the Qwen model and returns the generated text.

## Example Workflow

1. **Set up Huggingface token**:
Register a user at `https://www.huggingface.co` and generate a token. Then copy this token in your `.env` file under the name of `HF_TOKEN`.

2. **Start the Huggingface Server**:
Go to `src/huggingface` directory and enter the following command:
```sh
sudo hypercorn server.server:app --bind localhost:5000
```

3. **Configure the Agent**:
Ensure `localAgent_terminal.yaml` or `localAgent_tools.yaml` is set up with the correct model and server URL:
```yaml
name: "QwenAgent"
description: "An agent that can understand and execute terminal commands."
client: "local"
model: "Qwen/Qwen2.5-7B-Instruct"
dynamic_variables:
server_url: "http://localhost:5000"
```

4. **Run the Agent**:
```typescript
import { Agent } from './src/agents/Agent';
import { Logger } from './src/utils/logger';

async function main() {
const myAgent = new Agent({ agentConfigPath: './src/agents/localAgent_terminal.yaml' });
await myAgent.initialize();
const result = await myAgent.run("Gather the latest information about bitcoin.");
Logger.debug(result);
}

Logger.enable();
Logger.setLevel('debug');
main().catch(console.error);
```

## Huggingface Directory File Structure

- `server.py`: Main server implementation using Quart (currently two endpoints defined).
- `qwen.py`: Model implementation for Qwen.
- `requirements.txt`: Dependencies required for the server.

## Server Implementation

### server.py

This file sets up a Quart server with two main endpoints:

1. **Initialize Endpoint** (`/initialize`):
- Initializes the Qwen model.
- Expects a JSON payload with the model name.
- Loads the model and stores it in the server's configuration.

2. **Generate Endpoint** (`/generate`):
- Generates text based on the provided prompt.
- Expects a JSON payload with the system context and messages.
- Uses the loaded model to generate a response.

### qwen.py

This file contains the `QwenModel` class, which handles loading and running the Qwen model.
Empty file.
15 changes: 15 additions & 0 deletions src/huggingface/models/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from abc import ABC, abstractmethod

class BaseModel(ABC):
def __init__(self, model_name):
self.model_name = model_name
self.tokenizer = None
self.model = None

@abstractmethod
def load(self):
pass

@abstractmethod
def run(self, prompt):
pass
56 changes: 56 additions & 0 deletions src/huggingface/models/qwen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from models.base import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import asyncio


class QwenModel(BaseModel):
def __init__(self, model_name: str = "Qwen/Qwen2.5-0.5B-Instruct"):
super().__init__(model_name)
self.model = None
self.tokenizer = None
self.platform = self.set_platform()

def set_platform(self):
if torch.cuda.is_available():
return torch.device("cuda")
if torch.backends.mps.is_available():
return torch.device("mps")
return torch.device("cpu")

async def load(self):
self.model = AutoModelForCausalLM.from_pretrained(self.model_name).to(self.platform)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
self.model.eval()

async def encode(self, request):
if "system" in request.keys():
context = str(request["system"])
else:
context = ""
prompt = str([request[k] for k in request.keys() if k in ['messages', 'tools', 'tool_choice']])

messages = [
{"role": "system", "content": context},
{"role": "user", "content": prompt}
]
text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
return self.tokenizer([text], return_tensors='pt').to(self.platform)

async def run(self, request):
# Encode input
encoded_input = await self.encode(request)
input_ids = encoded_input['input_ids']
attention_mask = encoded_input.get('attention_mask', None)

# Inference
loop = asyncio.get_event_loop()
with torch.no_grad():
if attention_mask is not None:
outputs = await loop.run_in_executor(None, lambda: self.model.generate(input_ids, attention_mask=attention_mask, max_length=4096, temperature=1))
else:
outputs = await loop.run_in_executor(None, lambda: self.model.generate(input_ids, max_length=4096, temperature=1))

# Decode output
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
39 changes: 39 additions & 0 deletions src/huggingface/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
aiofiles==24.1.0
blinker==1.9.0
certifi==2024.12.14
charset-normalizer==3.4.0
click==8.1.8
filelock==3.16.1
Flask==3.1.0
fsspec==2024.12.0
h11==0.14.0
h2==4.1.0
hpack==4.0.0
huggingface-hub==0.27.0
Hypercorn==0.17.3
hyperframe==6.0.1
idna==3.10
itsdangerous==2.2.0
Jinja2==3.1.5
MarkupSafe==3.0.2
mpmath==1.3.0
networkx==3.4.2
numpy==2.2.1
packaging==24.2
priority==2.0.0
PyYAML==6.0.2
Quart==0.20.0
regex==2024.11.6
requests==2.32.3
safetensors==0.4.5
setuptools==75.1.0
sympy==1.13.1
tokenizers==0.21.0
torch==2.5.1
tqdm==4.67.1
transformers==4.47.1
typing_extensions==4.12.2
urllib3==2.3.0
Werkzeug==3.1.3
wheel==0.44.0
wsproto==1.2.0
Loading