diff --git a/README.md b/README.md index c284be325..5485589b4 100644 --- a/README.md +++ b/README.md @@ -13,33 +13,41 @@ Some manga/images will never be translated, therefore this project is born. -- [Preview](#samples) -- [Demo](#online-demo) -- [Disclaimer](#disclaimer) -- [Getting Started](#installation) - - [Installation](#installation) - - [Venv](#pipvenv) - - [Poetry](#poetry) - - [Extra Windows Info](#additional-instructions-for-windows) - - [Docker](#docker) - - [Usage](#usage) - - [Batch mode](#batch-mode-default) - - [Demo mode](#demo-mode) - - [Web mode](#web-mode) - - [Api mode](#api-mode) - - [Related Projects](#related-projects) -- [Docs](#docs) +- [Image/Manga Translator](#imagemanga-translator) + - [Samples](#samples) + - [Online Demo](#online-demo) + - [Disclaimer](#disclaimer) + - [Installation](#installation) + - [Pip/venv](#pipvenv) + - [Poetry](#poetry) + - [Additional instructions for **Windows**](#additional-instructions-for-windows) + - [Docker](#docker) + - [Hosting the web server](#hosting-the-web-server) + - [Using as CLI](#using-as-cli) + - [Setting Translation Secrets](#setting-translation-secrets) + - [Using with Nvidia GPU](#using-with-nvidia-gpu) + - [Building locally](#building-locally) + - [Usage](#usage) + - [Batch mode (default)](#batch-mode-default) + - [Demo mode](#demo-mode) + - [Web Mode](#web-mode) + - [Api Mode](#api-mode) + - [Related Projects](#related-projects) + - [Docs](#docs) - [Recommended Modules](#recommended-modules) - - [Args](#options) - - [Languages](#language-code-reference) - - [Translators](#translators-reference) - - [GPT config](#gpt-config-reference) - - [Gimp](#using-gimp-for-rendering) - - [Api Docs](#api-documentation) - - [v1](#api-documentation) - - [v2](#api-documentation) -- [Roadmap](#next-steps) -- [Support Us](#support-us) + - [Tips to improve translation quality](#tips-to-improve-translation-quality) + - [Options](#options) + - [Language Code Reference](#language-code-reference) + - [Translators Reference](#translators-reference) + - [GPT Config Reference](#gpt-config-reference) + - [Using Gimp for rendering](#using-gimp-for-rendering) + - [Api Documentation](#api-documentation) + - [Synchronous mode](#synchronous-mode) + - [Asynchronous mode](#asynchronous-mode) + - [Manual translation](#manual-translation) + - [Next steps](#next-steps) + - [Support Us](#support-us) + - [Thanks To All Our Contributors :](#thanks-to-all-our-contributors-) ## Samples @@ -521,6 +529,7 @@ IND: Indonesian | gpt3.5 | ✔️ | | Implements gpt-3.5-turbo. Requires `OPENAI_API_KEY` | | gpt4 | ✔️ | | Implements gpt-4. Requires `OPENAI_API_KEY` | | papago | | | | +| sakura | | |Requires `SAKURA_API_BASE` | | offline | | ✔️ | Chooses most suitable offline translator for language | | sugoi | | ✔️ | Sugoi V4.0 Models | | m2m100 | | ✔️ | Supports every language | diff --git a/README_CN.md b/README_CN.md index 825a25871..cba06493b 100644 --- a/README_CN.md +++ b/README_CN.md @@ -66,6 +66,7 @@ $ pip install git+https://github.com/kodalli/pydensecrf.git | gpt3.5 | ✔️ | | Implements gpt-3.5-turbo. Requires `OPENAI_API_KEY` | | gpt4 | ✔️ | | Implements gpt-4. Requires `OPENAI_API_KEY` | | papago | | | | +| sakura | | |需要`SAKURA_API_BASE`| | offline | | ✔️ | 自动选择可用的离线模型,只是选择器 | | sugoi | | ✔️ | 只能翻译英文 | | m2m100 | | ✔️ | 可以翻译所有语言 | diff --git a/manga_translator/translators/__init__.py b/manga_translator/translators/__init__.py index 3dea727ae..5e722b4bf 100644 --- a/manga_translator/translators/__init__.py +++ b/manga_translator/translators/__init__.py @@ -7,7 +7,7 @@ from .deepl import DeeplTranslator from .papago import PapagoTranslator from .caiyun import CaiyunTranslator -from .chatgpt import GPT3Translator, GPT35TurboTranslator, GPT4Translator, SakuraTranslator +from .chatgpt import GPT3Translator, GPT35TurboTranslator, GPT4Translator from .nllb import NLLBTranslator, NLLBBigTranslator from .sugoi import JparacrawlTranslator, JparacrawlBigTranslator, SugoiTranslator from .m2m100 import M2M100Translator, M2M100BigTranslator @@ -25,7 +25,6 @@ 'jparacrawl_big': JparacrawlBigTranslator, 'm2m100': M2M100Translator, 'm2m100_big': M2M100BigTranslator, - 'sakura': SakuraTranslator, } TRANSLATORS = { diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py index 0e8117816..bf380e951 100644 --- a/manga_translator/translators/chatgpt.py +++ b/manga_translator/translators/chatgpt.py @@ -9,8 +9,7 @@ from typing import List, Dict from .common import CommonTranslator, MissingAPIKeyException -from .keys import OPENAI_API_KEY, OPENAI_HTTP_PROXY, OPENAI_API_BASE, SAKURA_API_BASE, SAKURA_API_KEY - +from .keys import OPENAI_API_KEY, OPENAI_HTTP_PROXY, OPENAI_API_BASE CONFIG = None class GPT3Translator(CommonTranslator): @@ -320,77 +319,3 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str: # If no response with text is found, return the first response's content (which may be empty) return response.choices[0].message.content - - -class SakuraTranslator(GPT3Translator): - _CONFIG_KEY = 'sakura' - _MAX_REQUESTS_PER_MINUTE = 200 - _RETRY_ATTEMPTS = 5 - _MAX_TOKENS = 8192 - _CHAT_SYSTEM_TEMPLATE = ( - '你是一个轻小说翻译模型,可以流畅通顺地以日本轻小说的风格将日文翻译成简体中文,并联系上下文正确使用人称代词,不擅自添加原文中没有的代词。' - ) - def __init__(self): - super().__init__(check_openai_key=False) - - async def _request_translation(self, to_lang: str, prompt: str) -> str: - messages = [ - {'role': 'system', 'content': self._CHAT_SYSTEM_TEMPLATE}, - {'role': 'user', 'content': '将下面的日文文本翻译成中文:'+prompt}, - ] - - response = await openai.ChatCompletion.acreate( - model='gpt-4-0613', - messages=messages, - max_tokens=self._MAX_TOKENS // 2, - temperature=self.temperature, - top_p=self.top_p, - api_key=SAKURA_API_KEY, - api_base=SAKURA_API_BASE, - ) - - self.token_count += response.usage['total_tokens'] - self.token_count_last = response.usage['total_tokens'] - for choice in response.choices: - if 'text' in choice: - return choice.text - - # If no response with text is found, return the first response's content (which may be empty) - return response.choices[0].message.content - async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> List[str]: - translations = [] - self.logger.debug(f'Temperature: {self.temperature}, TopP: {self.top_p}') - - for query in queries: - - ratelimit_attempt = 0 - server_error_attempt = 0 - timeout_attempt = 0 - while True: - request_task = asyncio.create_task(self._request_translation(to_lang, query)) - try: - response = await request_task - break - except openai.error.RateLimitError: # Server returned ratelimit response - ratelimit_attempt += 1 - if ratelimit_attempt >= self._RATELIMIT_RETRY_ATTEMPTS: - raise - self.logger.warn(f'Restarting request due to ratelimiting by openai servers. Attempt: {ratelimit_attempt}') - await asyncio.sleep(2) - except openai.error.APIError: # Server returned 500 error (probably server load) - server_error_attempt += 1 - if server_error_attempt >= self._RETRY_ATTEMPTS: - self.logger.error('Sakura encountered a server error, possibly due to high server load. Use a different translator or try again later.') - raise - self.logger.warn(f'Restarting request due to a server error. Attempt: {server_error_attempt}') - await asyncio.sleep(1) - - self.logger.debug('-- Sakura Response --\n' + response) - - translations.extend([response]) - - self.logger.debug(translations) - if self.token_count_last: - self.logger.info(f'Used {self.token_count_last} tokens (Total: {self.token_count})') - - return translations \ No newline at end of file diff --git a/manga_translator/translators/sakura.py b/manga_translator/translators/sakura.py index dc653513f..f4c10b22f 100644 --- a/manga_translator/translators/sakura.py +++ b/manga_translator/translators/sakura.py @@ -17,7 +17,7 @@ class Sakura13BTranslator(CommonTranslator): _TIMEOUT = 999 # Seconds to wait for a response from the server before retrying - _RETRY_ATTEMPTS = 3 # Number of times to retry an errored request before giving up + _RETRY_ATTEMPTS = 1 # Number of times to retry an errored request before giving up _TIMEOUT_RETRY_ATTEMPTS = 3 # Number of times to retry a timed out request before giving up _RATELIMIT_RETRY_ATTEMPTS = 3 # Number of times to retry a ratelimited request before giving up @@ -128,6 +128,10 @@ async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> self.logger.debug(f'Queries: {queries}') text_prompt = '\n'.join(queries) self.logger.debug('-- Sakura Prompt --\n' + self._format_prompt_log(text_prompt) + '\n\n') + # 去除emoji + queries = [re.sub(r'[\U00010000-\U0010ffff]', '', query) for query in queries] + # 替换❤ + queries = [re.sub(r'❤', '♥', query) for query in queries] # 给queries的每行加上「」 queries = [f'「{query}」' for query in queries] response = await self._handle_translation_request(queries) @@ -139,6 +143,9 @@ async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> rep_flag = self.detect_and_remove_extra_repeats(response)[0] if rep_flag: for i in range(self._RETRY_ATTEMPTS): + if self.detect_and_remove_extra_repeats(queries)[0]: + self.logger.warning('Queries have repeats.') + break self.logger.warning(f'Re-translated because of model degradation, {i} times.') self._set_gpt_style("precise") self.logger.debug(f'Temperature: {self.temperature}, TopP: {self.top_p}') @@ -254,7 +261,7 @@ async def _request_translation(self, input_text_list) -> str: ], temperature=self.temperature, top_p=self.top_p, - max_tokens=512, + max_tokens=1024, frequency_penalty=self.frequency_penalty, seed=-1, extra_query=extra_query,