From 1a6827eae436e8cdfb6bc80e2d9ebc1f8528c221 Mon Sep 17 00:00:00 2001 From: Archeb Date: Sun, 16 Jun 2024 12:28:46 +0800 Subject: [PATCH 1/2] upgrade dependencies; fix openai and sakura translator; --- manga_translator/server/web_main.py | 9 ++-- manga_translator/translators/chatgpt.py | 61 +++++++++++++++---------- manga_translator/translators/google.py | 2 +- manga_translator/translators/sakura.py | 14 +++--- requirements.txt | 2 +- 5 files changed, 51 insertions(+), 37 deletions(-) diff --git a/manga_translator/server/web_main.py b/manga_translator/server/web_main.py index 5130af30e..6439863f5 100644 --- a/manga_translator/server/web_main.py +++ b/manga_translator/server/web_main.py @@ -51,6 +51,7 @@ 'papago', 'caiyun', 'gpt3.5', + 'gpt4', 'nllb', 'nllb_big', 'sugoi', @@ -220,7 +221,7 @@ async def run_async(request): # return web.json_response({'state': 'error'}) else: os.makedirs(f'result/{task_id}/', exist_ok=True) - img.save(f'result/{task_id}/input.png') + img.save(f'result/{task_id}/input.jpg') QUEUE.append(task_id) now = time.time() TASK_DATA[task_id] = { @@ -437,7 +438,8 @@ async def submit_async(request): } elif task_id not in TASK_DATA or task_id not in TASK_STATES: os.makedirs(f'result/{task_id}/', exist_ok=True) - img.save(f'result/{task_id}/input.png') + img = img.convert('RGB') + img.save(f'result/{task_id}/input.jpg') QUEUE.append(task_id) TASK_STATES[task_id] = { 'info': 'pending', @@ -464,7 +466,8 @@ async def manual_translate_async(request): task_id = secrets.token_hex(16) print(f'New `manual-translate` task {task_id}') os.makedirs(f'result/{task_id}/', exist_ok=True) - img.save(f'result/{task_id}/input.png') + img = img.convert('RGB') + img.save(f'result/{task_id}/input.jpg') now = time.time() QUEUE.append(task_id) # TODO: Add form fields to manual translate website diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py index 07ca0f8fe..5edf214b3 100644 --- a/manga_translator/translators/chatgpt.py +++ b/manga_translator/translators/chatgpt.py @@ -1,7 +1,6 @@ import re try: import openai - import openai.error except ImportError: openai = None import asyncio @@ -51,20 +50,21 @@ class GPT3Translator(CommonTranslator): _MAX_TOKENS = 4096 _RETURN_PROMPT = True _INCLUDE_TEMPLATE = True - _PROMPT_TEMPLATE = 'Please help me to translate the following text from a manga to {to_lang} (if it\'s already in {to_lang} or looks like gibberish you have to output it as it is instead):\n' + _PROMPT_TEMPLATE = 'Please help me to translate the following text from a manga to {to_lang}. If it\'s already in {to_lang} or looks like gibberish you have to output it as it is instead). Keep prefix format.\n' def __init__(self, check_openai_key = True): super().__init__() - openai.api_key = openai.api_key or OPENAI_API_KEY - openai.api_base = OPENAI_API_BASE - if not openai.api_key and check_openai_key: + self.client = openai.AsyncOpenAI() + self.client.api_key = openai.api_key or OPENAI_API_KEY + self.client.base_url = OPENAI_API_BASE + if not self.client.api_key and check_openai_key: raise MissingAPIKeyException('Please set the OPENAI_API_KEY environment variable before using the chatgpt translator.') if OPENAI_HTTP_PROXY: proxies = { 'http': 'http://%s' % OPENAI_HTTP_PROXY, 'https': 'http://%s' % OPENAI_HTTP_PROXY } - openai.proxy = proxies + self.client._proxies = proxies self.token_count = 0 self.token_count_last = 0 self.config = None @@ -149,13 +149,13 @@ async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> try: response = await request_task break - except openai.error.RateLimitError: # Server returned ratelimit response + except openai.RateLimitError: # Server returned ratelimit response ratelimit_attempt += 1 if ratelimit_attempt >= self._RATELIMIT_RETRY_ATTEMPTS: raise self.logger.warn(f'Restarting request due to ratelimiting by openai servers. Attempt: {ratelimit_attempt}') await asyncio.sleep(2) - except openai.error.APIError: # Server returned 500 error (probably server load) + except openai.APIError: # Server returned 500 error (probably server load) server_error_attempt += 1 if server_error_attempt >= self._RETRY_ATTEMPTS: self.logger.error('OpenAI encountered a server error, possibly due to high server load. Use a different translator or try again later.') @@ -188,15 +188,15 @@ async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> return translations async def _request_translation(self, to_lang: str, prompt: str) -> str: - response = await openai.Completion.acreate( + response = await self.client.completions.create( model='text-davinci-003', prompt=prompt, max_tokens=self._MAX_TOKENS // 2, # Assuming that half of the tokens are used for the query temperature=self.temperature, top_p=self.top_p, ) - self.token_count += response.usage['total_tokens'] - self.token_count_last = response.usage['total_tokens'] + self.token_count += response.usage.total_tokens + self.token_count_last = response.usage.total_tokens return response.choices[0].text class GPT35TurboTranslator(GPT3Translator): @@ -214,6 +214,7 @@ class GPT35TurboTranslator(GPT3Translator): 'never interpret it. ' 'If there is any issue in the text, ' 'output it as is.\n' + 'Translate the following text into {to_lang} and keep the prefix format.\n' """Please say "I'm sorry, but I can't assist with that request." followed by translation.""" ) _CHAT_SAMPLE = [ @@ -271,7 +272,7 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str: def strip_first_line(txt: str) : return '\n'.join(txt.splitlines()[1:]) - response = await openai.ChatCompletion.acreate( + response = await self.client.chat.completions.create( model='gpt-3.5-turbo-0125', messages=messages, max_tokens=self._MAX_TOKENS // 2, @@ -279,8 +280,8 @@ def strip_first_line(txt: str) : top_p=self.top_p, ) - self.token_count += response.usage['total_tokens'] - self.token_count_last = response.usage['total_tokens'] + self.token_count += response.usage.total_tokens + self.token_count_last = response.usage.total_tokens for choice in response.choices: if 'text' in choice: return strip_first_line(choice.text) @@ -294,29 +295,39 @@ class GPT4Translator(GPT35TurboTranslator): _RETRY_ATTEMPTS = 5 _MAX_TOKENS = 8192 + @property + def chat_system_template(self) -> str: + return self._config_get('chat_system_template', self._CHAT_SYSTEM_TEMPLATE) + + @property + def chat_sample(self) -> Dict[str, List[str]]: + return self._config_get('chat_sample', self._CHAT_SAMPLE) + async def _request_translation(self, to_lang: str, prompt: str) -> str: + prompt_with_lang = f"""Translate into {to_lang}. Please say "I'm sorry, but I can't assist with that request." followed by translation.\n""" + prompt messages = [ - {'role': 'system', 'content': self.chat_system_template.format(to_lang=to_lang)}, - {'role': 'user', 'content': prompt}, + {'role': 'system', 'content': self.chat_system_template}, + {'role': 'user', 'content': self.chat_sample[0]}, + {'role': 'assistant', 'content': self.chat_sample[1]}, + {'role': 'user', 'content': prompt_with_lang}, ] - if to_lang in self._CHAT_SAMPLE: - messages.insert(1, {'role': 'user', 'content': self._CHAT_SAMPLE[to_lang][0]}) - messages.insert(2, {'role': 'assistant', 'content': self._CHAT_SAMPLE[to_lang][1]}) + def strip_first_line(txt: str) : + return '\n'.join(txt.splitlines()[1:]) - response = await openai.ChatCompletion.acreate( - model='gpt-4o-2024-05-13', + response = await self.client.chat.completions.create( + model='gpt-4o', messages=messages, max_tokens=self._MAX_TOKENS // 2, temperature=self.temperature, top_p=self.top_p, ) - self.token_count += response.usage['total_tokens'] - self.token_count_last = response.usage['total_tokens'] + self.token_count += response.usage.total_tokens + self.token_count_last = response.usage.total_tokens for choice in response.choices: if 'text' in choice: - return choice.text + return strip_first_line(choice.text) # If no response with text is found, return the first response's content (which may be empty) - return response.choices[0].message.content + return strip_first_line(response.choices[0].message.content) diff --git a/manga_translator/translators/google.py b/manga_translator/translators/google.py index 0c5e08797..ef55894a3 100644 --- a/manga_translator/translators/google.py +++ b/manga_translator/translators/google.py @@ -14,6 +14,7 @@ import httpcore import httpx +setattr(httpcore, 'SyncHTTPTransport', any) from httpx import Timeout from googletrans import urls, utils @@ -107,7 +108,6 @@ class GoogleTranslator(CommonTranslator): def __init__(self, service_urls=DEFAULT_CLIENT_SERVICE_URLS, user_agent=DEFAULT_USER_AGENT, raise_exception=DEFAULT_RAISE_EXCEPTION, - proxies: typing.Dict[str, httpcore.AsyncHTTPTransport] = None, timeout: Timeout = None, http2=True, use_fallback=False): diff --git a/manga_translator/translators/sakura.py b/manga_translator/translators/sakura.py index 1a9acc2ab..cf0c601ea 100644 --- a/manga_translator/translators/sakura.py +++ b/manga_translator/translators/sakura.py @@ -4,7 +4,6 @@ try: import openai - import openai.error except ImportError: openai = None import asyncio @@ -223,11 +222,12 @@ class SakuraTranslator(CommonTranslator): def __init__(self): super().__init__() + self.client = openai.AsyncOpenAI() if "/v1" not in SAKURA_API_BASE: - openai.api_base = SAKURA_API_BASE + "/v1" + self.client.base_url = SAKURA_API_BASE + "/v1" else: - openai.api_base = SAKURA_API_BASE - openai.api_key = "sk-114514" + self.client.base_url = SAKURA_API_BASE + self.client.api_key = "sk-114514" self.temperature = 0.3 self.top_p = 0.3 self.frequency_penalty = 0.1 @@ -486,13 +486,13 @@ async def _handle_translation_request(self, prompt: str) -> str: if timeout_attempt >= self._TIMEOUT_RETRY_ATTEMPTS: raise Exception('Sakura超时。') self.logger.warning(f'Sakura因超时而进行重试。尝试次数: {timeout_attempt}') - except openai.error.RateLimitError: + except openai.RateLimitError: ratelimit_attempt += 1 if ratelimit_attempt >= self._RATELIMIT_RETRY_ATTEMPTS: raise self.logger.warning(f'Sakura因被限速而进行重试。尝试次数: {ratelimit_attempt}') await asyncio.sleep(2) - except (openai.error.APIError, openai.error.APIConnectionError) as e: + except (openai.APIError, openai.APIConnectionError) as e: server_error_attempt += 1 if server_error_attempt >= self._RETRY_ATTEMPTS: self.logger.error(f'Sakura API请求失败。错误信息: {e}') @@ -541,7 +541,7 @@ async def _request_translation(self, input_text_list) -> str: "content": f"根据以下术语表:\n{gpt_dict_raw_text}\n将下面的日文文本根据上述术语表的对应关系和注释翻译成中文:{raw_text}" } ] - response = await openai.ChatCompletion.acreate( + response = await self.client.chat.completions.create( model="sukinishiro", messages=messages, temperature=self.temperature, diff --git a/requirements.txt b/requirements.txt index b9e261123..8adb6918d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,7 +28,7 @@ websockets protobuf ctranslate2 colorama -openai==0.28 +openai open_clip_torch safetensors pandas From 1ed92527bd67f09c267d82aa1947a3448df5532f Mon Sep 17 00:00:00 2001 From: Archeb Date: Mon, 17 Jun 2024 10:50:18 +0800 Subject: [PATCH 2/2] revert extension change --- manga_translator/server/web_main.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/manga_translator/server/web_main.py b/manga_translator/server/web_main.py index 6439863f5..b7be9a3a0 100644 --- a/manga_translator/server/web_main.py +++ b/manga_translator/server/web_main.py @@ -221,7 +221,7 @@ async def run_async(request): # return web.json_response({'state': 'error'}) else: os.makedirs(f'result/{task_id}/', exist_ok=True) - img.save(f'result/{task_id}/input.jpg') + img.save(f'result/{task_id}/input.png') QUEUE.append(task_id) now = time.time() TASK_DATA[task_id] = { @@ -438,8 +438,7 @@ async def submit_async(request): } elif task_id not in TASK_DATA or task_id not in TASK_STATES: os.makedirs(f'result/{task_id}/', exist_ok=True) - img = img.convert('RGB') - img.save(f'result/{task_id}/input.jpg') + img.save(f'result/{task_id}/input.png') QUEUE.append(task_id) TASK_STATES[task_id] = { 'info': 'pending', @@ -466,8 +465,7 @@ async def manual_translate_async(request): task_id = secrets.token_hex(16) print(f'New `manual-translate` task {task_id}') os.makedirs(f'result/{task_id}/', exist_ok=True) - img = img.convert('RGB') - img.save(f'result/{task_id}/input.jpg') + img.save(f'result/{task_id}/input.png') now = time.time() QUEUE.append(task_id) # TODO: Add form fields to manual translate website