forked from carlini/yet-another-applied-llm-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
llm.py
122 lines (104 loc) · 4.37 KB
/
llm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
## Copyright (C) 2024, Nicholas Carlini <[email protected]>.
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
from io import BytesIO
import os
import base64
import requests
import json
import pickle
import time
from llms.openai_model import OpenAIModel
from llms.anthropic_model import AnthropicModel
from llms.mistral_model import MistralModel
from llms.vertexai_model import VertexAIModel
from llms.cohere_model import CohereModel
from llms.moonshot_model import MoonshotAIModel
from llms.groq_model import GroqModel
class LLM:
def __init__(self, name="gpt-3.5-turbo", use_cache=True, override_hparams={}):
self.name = name
if 'gpt' in name:
self.model = OpenAIModel(name)
# elif 'llama' in name:
# self.model = LLAMAModel(name)
elif 'mistral' in name:
self.model = MistralModel(name)
elif 'bison' in name or 'gemini' in name:
self.model = VertexAIModel(name)
#elif 'gemini' in name:
# self.model = GeminiModel(name)
elif 'claude' in name:
self.model = AnthropicModel(name)
elif 'moonshot' in name:
self.model = MoonshotAIModel(name)
elif 'command' in name:
self.model = CohereModel(name)
elif 'llama3' in name or 'mixtral' in name or 'gemma' in name:
self.model = GroqModel(name)
else:
raise
self.model.hparams.update(override_hparams)
self.use_cache = use_cache
if use_cache:
try:
if not os.path.exists("tmp"):
os.mkdir("tmp")
self.cache = pickle.load(open(f"tmp/cache-{name.split('/')[-1]}.p","rb"))
except:
self.cache = {}
else:
self.cache = {}
def __call__(self, conversation, add_image=None, max_tokens=None, skip_cache=False, json=False):
if type(conversation) == str:
conversation = [conversation]
cache_key = tuple(conversation) if add_image is None else tuple(conversation + [add_image.tobytes()])
if cache_key in self.cache and not skip_cache and self.use_cache:
print(self.name, "GETCACHE", repr(self.cache[cache_key]))
if len(self.cache[cache_key]) > 0:
return self.cache[cache_key]
else:
print("Empty cache hit")
print(self.name, "CACHE MISS", repr(conversation))
response = "Model API request failed"
for _ in range(3):
try:
extra = {}
if json:
extra['json'] = json
response = self.model.make_request(conversation, add_image=add_image, max_tokens=max_tokens, **extra)
break
except Exception as e:
print("RUN FAILED", e)
import traceback
traceback.print_exc()
time.sleep(10)
pass
if self.use_cache and response != "Model API request failed":
self.cache[cache_key] = response
pickle.dump(self.cache, open(f"tmp/cache-{self.name.split('/')[-1]}.p","wb"))
return response
#llm = LLM("command")
llm = LLM("gpt-3.5-turbo")
#llm = LLM("gpt-4-turbo-2024-04-09")
#llm = LLM("gemini-1.5-pro-preview-0409")
#llm = LLM("gpt-4o", override_hparams={'temperature': 0.1})
#llm = LLM("claude-3-opus-20240229")
#llm = LLM("claude-3-sonnet-20240229")
#llm = LLM("mistral-tiny")
#llm = LLM("gemini-pro", override_hparams={'temperature': 0.3}, use_cache=False)
#eval_llm = LLM("gpt-4-1106-preview")
eval_llm = LLM("gpt-4o", override_hparams={'temperature': 0.1})
#eval_llm = LLM("gpt-3.5-turbo", override_hparams={'temperature': 0.1})
vision_eval_llm = LLM("gpt-4o", override_hparams={'temperature': 0.1})