From 129fc907919755f9fc83f85eeb51fef79b5acad5 Mon Sep 17 00:00:00 2001 From: Donald Lam Date: Thu, 5 Sep 2024 17:45:03 +0800 Subject: [PATCH] update testing api --- global_functions.py | 12 +++- gpt_setting.py | 94 ++++++-------------------------- llm_settings/deepinfra_models.py | 35 ++++++++++++ llm_settings/gemini_models.py | 35 ++++++++++++ llm_settings/openai_models.py | 34 ++++++++++++ server.py | 28 ++++++++-- 6 files changed, 155 insertions(+), 83 deletions(-) create mode 100644 llm_settings/deepinfra_models.py create mode 100644 llm_settings/gemini_models.py create mode 100644 llm_settings/openai_models.py diff --git a/global_functions.py b/global_functions.py index cc3f319..83a7ad2 100644 --- a/global_functions.py +++ b/global_functions.py @@ -1,3 +1,4 @@ +import re import json # Convert number to specified label @@ -115,4 +116,13 @@ def add_statement(qname, language, statements): data[qname]["questions"][language][new_version]["statements"][str(i+1)] = s with open('dataset/questionnaires.json', 'w', encoding='utf-8') as f: - json.dump(data, f, ensure_ascii=False, indent=4) \ No newline at end of file + json.dump(data, f, ensure_ascii=False, indent=4) + + +def extract_json_str(input_string): + json_match = re.search(r'\{.*\}', input_string, flags=re.DOTALL) + if json_match: + json_content = json_match.group(0) + return json_content + else: + return input_string \ No newline at end of file diff --git a/gpt_setting.py b/gpt_setting.py index b98f374..0ecc466 100644 --- a/gpt_setting.py +++ b/gpt_setting.py @@ -1,69 +1,12 @@ -from tenacity import ( - retry, - stop_after_attempt, - wait_random_exponential, -) -import openai -import time import os import re -import random from utils import * +from llm_settings.openai_models import * +from llm_settings.gemini_models import * +from llm_settings.deepinfra_models import * -openai.api_key = api_key -@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) -def chat( - model, # gpt-4, gpt-4-0314, gpt-4-32k, gpt-4-32k-0314, gpt-3.5-turbo, gpt-3.5-turbo-0301 - messages, # [{"role": "system"/"user"/"assistant", "content": "Hello!", "name": "example"}] - temperature=temperature, # [0, 2]: Lower values -> more focused and deterministic; Higher values -> more random. - n=1, # Chat completion choices to generate for each input message. - max_tokens=1024, # The maximum number of tokens to generate in the chat completion. - delay=delay_time # Seconds to sleep after each request. -): - time.sleep(delay) - - response = openai.ChatCompletion.create( - model=model, - messages=messages, - temperature=temperature, - n=n, - max_tokens=max_tokens - ) - - if n == 1: - return response['choices'][0]['message']['content'] - else: - return [i['message']['content'] for i in response['choices']] - - -@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) -def completion( - model, # text-davinci-003, text-davinci-002, text-curie-001, text-babbage-001, text-ada-001 - prompt, # The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays. - temperature=temperature, # [0, 2]: Lower values -> more focused and deterministic; Higher values -> more random. - n=1, # Completions to generate for each prompt. - max_tokens=1024, # The maximum number of tokens to generate in the chat completion. - delay=delay_time # Seconds to sleep after each request. -): - time.sleep(delay) - - response = openai.Completion.create( - model=model, - prompt=prompt, - temperature=temperature, - n=n, - max_tokens=max_tokens - ) - - if n == 1: - return response['choices'][0]['text'] - else: - response = response['choices'] - response.sort(key=lambda x: x['index']) - return [i['text'] for i in response['choices']] - def print_prompt(inputs, response): os.makedirs("records", exist_ok=True) with open(f"records/records.txt", 'a') as f: @@ -71,21 +14,20 @@ def print_prompt(inputs, response): f.write(f"{response}\n====\n") return -def gpt_request(model, inputs): - json_format = r'({.*})' + +def llm_request(model, inputs): + if model.startswith("gpt"): + response = gpt_chat(model, inputs).strip() + + elif model.startswith("gemini"): + response = gemini_chat(model, inputs).strip() + + elif model.startswith("meta-llama"): + response = deepinfra_chat(model, inputs).strip() - if model == 'text-davinci-003': - response = completion(model, inputs).strip() - print_prompt(inputs, response) - match = re.search(json_format, response, re.DOTALL) - return str(match) - elif model in ['gpt-3.5-turbo', 'gpt-4']: - response = chat(model, inputs).strip() - print_prompt(inputs, response) - match = re.search(json_format, response, re.DOTALL) - if match: - return match.group(1).strip() - else: - "" - + else: raise ValueError("The model is not supported or does not exist.") + + print_prompt(inputs, response) + + return response \ No newline at end of file diff --git a/llm_settings/deepinfra_models.py b/llm_settings/deepinfra_models.py new file mode 100644 index 0000000..46709d8 --- /dev/null +++ b/llm_settings/deepinfra_models.py @@ -0,0 +1,35 @@ +from tenacity import ( + retry, + stop_after_attempt, + wait_random_exponential, +) +import time +from openai import OpenAI + +from utils import * +from global_functions import * + +openai = OpenAI( + api_key=infradeep_api_key, + base_url="https://api.deepinfra.com/v1/openai", +) + +@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) +def deepinfra_chat( + model, # meta-llama/Meta-Llama-3.1-70B-Instruct, mistralai/Mixtral-8x7B-Instruct-v0.1, Qwen/Qwen2-72B-Instruct + prompt, # The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays. + temperature=temperature, # [0, 2]: Lower values -> more focused and deterministic; Higher values -> more random. + n=1, # Completions to generate for each prompt. + max_tokens=1024, # The maximum number of tokens to generate in the chat completion. + delay=delay_time # Seconds to sleep after each request. +): + time.sleep(delay) + + response = openai.chat.completions.create( + model=model, + messages=prompt, + temperature=temperature, + stream=False, + ) + + return extract_json_str(response.choices[0].message.content) diff --git a/llm_settings/gemini_models.py b/llm_settings/gemini_models.py new file mode 100644 index 0000000..ae90a0b --- /dev/null +++ b/llm_settings/gemini_models.py @@ -0,0 +1,35 @@ +from tenacity import ( + retry, + stop_after_attempt, + wait_random_exponential, +) +import time +import google.generativeai as genai + +from utils import * +from global_functions import * + +genai.configure(api_key=google_api_key) + +@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) +def gemini_chat( + model, # gemini-1.0-pro, gemini-1.0-pro-001, gemini-1.0-pro-latest, gemini-1.0-pro-vision-latest, gemini-pro, gemini-pro-vision + messages, # [{'role': 'user', 'parts': "In one sentence, explain how a computer works to a young child."}, {'role': "model', 'parts': "A computer is like a very smart machine that can understand and follow our instructions, help us with our work, and even play games with us!"} + temperature=temperature, # [0, 2]: Lower values -> more focused and deterministic; Higher values -> more random. + n=1, # Chat response choices to generate for each input message. + max_tokens=1024, # The maximum number of tokens to generate in the chat completion. + delay=delay_time # Seconds to sleep after each request. +): + time.sleep(delay) + model = genai.GenerativeModel(model) + response = model.generate_content( + messages, + generation_config=genai.types.GenerationConfig( + # Only one candidate for now. + candidate_count=n, + # stop_sequences=['x'], + max_output_tokens=max_tokens, + temperature=temperature) + ) + + return extract_json_str(response.text) diff --git a/llm_settings/openai_models.py b/llm_settings/openai_models.py new file mode 100644 index 0000000..99afc97 --- /dev/null +++ b/llm_settings/openai_models.py @@ -0,0 +1,34 @@ +from tenacity import ( + retry, + stop_after_attempt, + wait_random_exponential, +) +import time +from openai import OpenAI + +from utils import * +from global_functions import * + +openai = OpenAI(api_key=openai_api_key) + +@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) +def gpt_chat( + model, # gpt-4, gpt-4-0314, gpt-4-32k, gpt-4-32k-0314, gpt-3.5-turbo, gpt-3.5-turbo-0301 + messages, # [{"role": "system"/"user"/"assistant", "content": "Hello!", "name": "example"}] + temperature=temperature, # [0, 2]: Lower values -> more focused and deterministic; Higher values -> more random. + n=1, # Chat completion choices to generate for each input message. + max_tokens=1024, # The maximum number of tokens to generate in the chat completion. + delay=delay_time # Seconds to sleep after each request. +): + time.sleep(delay) + + response = openai.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + n=n, + max_tokens=max_tokens + ) + + return extract_json_str(response.choices[0].message.content) + diff --git a/server.py b/server.py index 21c8cfd..4a3b276 100644 --- a/server.py +++ b/server.py @@ -60,7 +60,7 @@ def rephrase(questionnaire_name, language, savename=None): {"role": "user", "content": prompt} ] try: - response = chat('gpt-4', inputs).strip() + response = gpt_chat('gpt-4', inputs).strip() parsered_responses = json.loads(response) parsered_responses = parsered_responses["sentence"] break @@ -149,22 +149,38 @@ def get_statements(self, questions, version="v1"): def start_request(self, scale_details, level_description, statement_description, questions, language, template, label, order, version): responses = list() _, scale_max, symbol_min, symbol_max = scale_details - inputs = [{"role": "system", "content": questions["system_prompt"]}] + + if model.startswith("gemini"): + inputs = [{"role": "user", "parts": [questions["system_prompt"]]}] + else: + inputs = [{"role": "system", "content": questions["system_prompt"]}] + for statement_str in statement_description: # Construct the prompt from prompt_template prompt = get_prompt(f'prompt_template/{language}/{self.questionnaire_name}_{language}_{template}.txt', [symbol_min, symbol_max, level_description, statement_str]) - inputs.append({"role": "user", "content": prompt}) + + if model.startswith("gemini"): + inputs.append({"role": "user", "parts": [prompt]}) + else: + inputs.append({"role": "user", "content": prompt}) + try: - gpt_responses = gpt_request(self.model, inputs) + gpt_responses = llm_request(self.model, inputs) parsed_responses = json.loads(gpt_responses) parsed_responses = [convert_symbol(label, value) for value in parsed_responses.values()] if order == 'r': parsed_responses = [scale_max-score+1 for score in parsed_responses] - except ValueError: + + except: return None responses += parsed_responses - inputs.append({"role": "assistant", "content": gpt_responses}) + + if model.startswith("gemini"): + inputs.append({"role": "model", "parts": [gpt_responses]}) + else: + inputs.append({"role": "assistant", "content": gpt_responses}) + return responses """