-
Notifications
You must be signed in to change notification settings - Fork 0
/
llm_deepseek.py
242 lines (202 loc) · 8.05 KB
/
llm_deepseek.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import llm
from llm.default_plugins.openai_models import Chat, Completion
from pathlib import Path
import json
import time
import httpx
from typing import Optional
from pydantic import Field
# Constants for cache timeout and API base URL
CACHE_TIMEOUT = 3600
DEEPSEEK_API_BASE = "https://api.deepseek.com/beta"
def get_deepseek_models():
"""Fetch and cache DeepSeek models."""
return fetch_cached_json(
url="https://api.deepseek.com/v1/models",
path=llm.user_dir() / "deepseek_models.json",
cache_timeout=CACHE_TIMEOUT,
)["data"]
def get_model_ids_with_aliases(models):
"""Extract model IDs and create empty aliases list."""
return [(model['id'], []) for model in models]
class DeepSeekChat(Chat):
needs_key = "deepseek"
key_env_var = "LLM_DEEPSEEK_KEY"
def __init__(self, model_id, **kwargs):
super().__init__(model_id, **kwargs)
self.api_base = DEEPSEEK_API_BASE
def __str__(self):
return f"DeepSeek Chat: {self.model_id}"
class Options(Chat.Options):
prefill: Optional[str] = Field(
description="Initial text for the model's response (beta feature). Uses DeepSeek's Chat Prefix Completion.",
default=None
)
response_format: Optional[str] = Field(
description="Format of the response (e.g., 'json_object').",
default=None
)
def execute(self, prompt, stream, response, conversation):
messages = self._build_messages(conversation, prompt)
response._prompt_json = {"messages": messages}
kwargs = self.build_kwargs(prompt)
max_tokens = kwargs.pop('max_tokens', 8192)
if prompt.options.response_format:
kwargs["response_format"] = {"type": prompt.options.response_format}
kwargs.pop('prefill', None)
client = self.get_client()
try:
completion = client.chat.completions.create(
model=self.model_name,
messages=messages,
stream=stream,
max_tokens=max_tokens,
**kwargs,
)
for chunk in completion:
content = chunk.choices[0].delta.content
if content is not None:
yield content
response.response_json = {"content": "".join(response._chunks)}
except httpx.HTTPError as e:
raise llm.ModelError(f"DeepSeek API error: {str(e)}")
def _build_messages(self, conversation, prompt):
"""Build the messages list for the API call."""
messages = []
if conversation:
for prev_response in conversation.responses:
messages.append({"role": "user", "content": prev_response.prompt.prompt})
messages.append({"role": "assistant", "content": prev_response.text()})
# Add system message if provided
if prompt.system:
messages.append({"role": "system", "content": prompt.system})
messages.append({"role": "user", "content": prompt.prompt})
if prompt.options.prefill:
messages.append({
"role": "assistant",
"content": prompt.options.prefill,
"prefix": True
})
return messages
class DeepSeekCompletion(Completion):
needs_key = "deepseek"
key_env_var = "LLM_DEEPSEEK_KEY"
def __init__(self, model_id, **kwargs):
super().__init__(model_id, **kwargs)
self.api_base = DEEPSEEK_API_BASE
def __str__(self):
return f"DeepSeek Completion: {self.model_id}"
class Options(Completion.Options):
prefill: Optional[str] = Field(
description="Initial text for the model's response (beta feature). Uses DeepSeek's Completion Prefix.",
default=None
)
echo: Optional[bool] = Field(
description="Echo back the prompt in addition to the completion.",
default=None
)
def execute(self, prompt, stream, response, conversation):
full_prompt = self._build_full_prompt(conversation, prompt)
response._prompt_json = {"prompt": full_prompt}
kwargs = self.build_kwargs(prompt)
max_tokens = kwargs.pop('max_tokens', 4096)
if prompt.options.echo:
kwargs["echo"] = prompt.options.echo
kwargs.pop('prefill', None)
client = self.get_client()
try:
completion = client.completions.create(
model=self.model_name,
prompt=full_prompt,
stream=stream,
max_tokens=max_tokens,
**kwargs,
)
for chunk in completion:
text = chunk.choices[0].text
if text:
yield text
response.response_json = {"content": "".join(response._chunks)}
except httpx.HTTPError as e:
raise llm.ModelError(f"DeepSeek API error: {str(e)}")
def _build_full_prompt(self, conversation, prompt):
"""Build the full prompt for the API call."""
messages = []
if conversation:
for prev_response in conversation.responses:
messages.append(prev_response.prompt.prompt)
messages.append(prev_response.text())
messages.append(prompt.prompt)
# Include system message if provided
if prompt.system:
messages.insert(0, prompt.system)
full_prompt = "\n".join(messages)
if prompt.options.prefill:
full_prompt += f"\n{prompt.options.prefill}"
return full_prompt
class DownloadError(Exception):
pass
def fetch_cached_json(url, path, cache_timeout, headers=None):
"""Fetch JSON data from a URL and cache it."""
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
if path.is_file() and time.time() - path.stat().st_mtime < cache_timeout:
with open(path, "r") as file:
return json.load(file)
try:
response = httpx.get(url, headers=headers, follow_redirects=True)
response.raise_for_status()
with open(path, "w") as file:
json.dump(response.json(), file)
return response.json()
except httpx.HTTPError:
if path.is_file():
with open(path, "r") as file:
return json.load(file)
else:
raise DownloadError(f"Failed to download data and no cache is available at {path}")
@llm.hookimpl
def register_models(register):
key = llm.get_key("", "deepseek", "LLM_DEEPSEEK_KEY")
if not key:
return
try:
models = get_deepseek_models()
models_with_aliases = get_model_ids_with_aliases(models)
for model_id, aliases in models_with_aliases:
register(
DeepSeekChat(
model_id=f"deepseekchat/{model_id}",
model_name=model_id,
),
aliases=[model_id]
)
register(
DeepSeekCompletion(
model_id=f"deepseekcompletion/{model_id}",
model_name=model_id,
),
aliases=[f"{model_id}-completion"]
)
except DownloadError as e:
print(f"Error fetching DeepSeek models: {e}")
@llm.hookimpl
def register_commands(cli):
@cli.command()
def deepseek_models():
"""List available DeepSeek models."""
key = llm.get_key("", "deepseek", "LLM_DEEPSEEK_KEY")
if not key:
print("DeepSeek API key not set. Use 'llm keys set deepseek' to set it.")
return
try:
models = get_deepseek_models()
models_with_aliases = get_model_ids_with_aliases(models)
for model_id, aliases in models_with_aliases:
print(f"DeepSeek Chat: deepseekchat/{model_id}")
print(f" Aliases: {model_id}")
print(f"DeepSeek Completion: deepseekcompletion/{model_id}")
print(f" Aliases: {model_id}-completion")
print()
except DownloadError as e:
print(f"Error fetching DeepSeek models: {e}")