Skip to content

Commit

Permalink
Merge pull request #584 from PiDanShouRouZhouXD/sakura_llm
Browse files Browse the repository at this point in the history
Fix: Remove conflicting SakuraTranslator from translators
  • Loading branch information
zyddnys authored Feb 27, 2024
2 parents f5dab5b + 3caf51f commit 230ba8b
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 106 deletions.
61 changes: 35 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,41 @@
Some manga/images will never be translated, therefore this project is born.

- [Preview](#samples)
- [Demo](#online-demo)
- [Disclaimer](#disclaimer)
- [Getting Started](#installation)
- [Installation](#installation)
- [Venv](#pipvenv)
- [Poetry](#poetry)
- [Extra Windows Info](#additional-instructions-for-windows)
- [Docker](#docker)
- [Usage](#usage)
- [Batch mode](#batch-mode-default)
- [Demo mode](#demo-mode)
- [Web mode](#web-mode)
- [Api mode](#api-mode)
- [Related Projects](#related-projects)
- [Docs](#docs)
- [Image/Manga Translator](#imagemanga-translator)
- [Samples](#samples)
- [Online Demo](#online-demo)
- [Disclaimer](#disclaimer)
- [Installation](#installation)
- [Pip/venv](#pipvenv)
- [Poetry](#poetry)
- [Additional instructions for **Windows**](#additional-instructions-for-windows)
- [Docker](#docker)
- [Hosting the web server](#hosting-the-web-server)
- [Using as CLI](#using-as-cli)
- [Setting Translation Secrets](#setting-translation-secrets)
- [Using with Nvidia GPU](#using-with-nvidia-gpu)
- [Building locally](#building-locally)
- [Usage](#usage)
- [Batch mode (default)](#batch-mode-default)
- [Demo mode](#demo-mode)
- [Web Mode](#web-mode)
- [Api Mode](#api-mode)
- [Related Projects](#related-projects)
- [Docs](#docs)
- [Recommended Modules](#recommended-modules)
- [Args](#options)
- [Languages](#language-code-reference)
- [Translators](#translators-reference)
- [GPT config](#gpt-config-reference)
- [Gimp](#using-gimp-for-rendering)
- [Api Docs](#api-documentation)
- [v1](#api-documentation)
- [v2](#api-documentation)
- [Roadmap](#next-steps)
- [Support Us](#support-us)
- [Tips to improve translation quality](#tips-to-improve-translation-quality)
- [Options](#options)
- [Language Code Reference](#language-code-reference)
- [Translators Reference](#translators-reference)
- [GPT Config Reference](#gpt-config-reference)
- [Using Gimp for rendering](#using-gimp-for-rendering)
- [Api Documentation](#api-documentation)
- [Synchronous mode](#synchronous-mode)
- [Asynchronous mode](#asynchronous-mode)
- [Manual translation](#manual-translation)
- [Next steps](#next-steps)
- [Support Us](#support-us)
- [Thanks To All Our Contributors :](#thanks-to-all-our-contributors-)

## Samples

Expand Down Expand Up @@ -521,6 +529,7 @@ IND: Indonesian
| gpt3.5 | ✔️ | | Implements gpt-3.5-turbo. Requires `OPENAI_API_KEY` |
| gpt4 | ✔️ | | Implements gpt-4. Requires `OPENAI_API_KEY` |
| papago | | | |
| sakura | | |Requires `SAKURA_API_BASE` |
| offline | | ✔️ | Chooses most suitable offline translator for language |
| sugoi | | ✔️ | Sugoi V4.0 Models |
| m2m100 | | ✔️ | Supports every language |
Expand Down
1 change: 1 addition & 0 deletions README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ $ pip install git+https://github.com/kodalli/pydensecrf.git
| gpt3.5 | ✔️ | | Implements gpt-3.5-turbo. Requires `OPENAI_API_KEY` |
| gpt4 | ✔️ | | Implements gpt-4. Requires `OPENAI_API_KEY` |
| papago | | | |
| sakura | | |需要`SAKURA_API_BASE`|
| offline | | ✔️ | 自动选择可用的离线模型,只是选择器 |
| sugoi | | ✔️ | 只能翻译英文 |
| m2m100 | | ✔️ | 可以翻译所有语言 |
Expand Down
3 changes: 1 addition & 2 deletions manga_translator/translators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .deepl import DeeplTranslator
from .papago import PapagoTranslator
from .caiyun import CaiyunTranslator
from .chatgpt import GPT3Translator, GPT35TurboTranslator, GPT4Translator, SakuraTranslator
from .chatgpt import GPT3Translator, GPT35TurboTranslator, GPT4Translator
from .nllb import NLLBTranslator, NLLBBigTranslator
from .sugoi import JparacrawlTranslator, JparacrawlBigTranslator, SugoiTranslator
from .m2m100 import M2M100Translator, M2M100BigTranslator
Expand All @@ -25,7 +25,6 @@
'jparacrawl_big': JparacrawlBigTranslator,
'm2m100': M2M100Translator,
'm2m100_big': M2M100BigTranslator,
'sakura': SakuraTranslator,
}

TRANSLATORS = {
Expand Down
77 changes: 1 addition & 76 deletions manga_translator/translators/chatgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
from typing import List, Dict

from .common import CommonTranslator, MissingAPIKeyException
from .keys import OPENAI_API_KEY, OPENAI_HTTP_PROXY, OPENAI_API_BASE, SAKURA_API_BASE, SAKURA_API_KEY

from .keys import OPENAI_API_KEY, OPENAI_HTTP_PROXY, OPENAI_API_BASE
CONFIG = None

class GPT3Translator(CommonTranslator):
Expand Down Expand Up @@ -320,77 +319,3 @@ async def _request_translation(self, to_lang: str, prompt: str) -> str:

# If no response with text is found, return the first response's content (which may be empty)
return response.choices[0].message.content


class SakuraTranslator(GPT3Translator):
_CONFIG_KEY = 'sakura'
_MAX_REQUESTS_PER_MINUTE = 200
_RETRY_ATTEMPTS = 5
_MAX_TOKENS = 8192
_CHAT_SYSTEM_TEMPLATE = (
'你是一个轻小说翻译模型,可以流畅通顺地以日本轻小说的风格将日文翻译成简体中文,并联系上下文正确使用人称代词,不擅自添加原文中没有的代词。'
)
def __init__(self):
super().__init__(check_openai_key=False)

async def _request_translation(self, to_lang: str, prompt: str) -> str:
messages = [
{'role': 'system', 'content': self._CHAT_SYSTEM_TEMPLATE},
{'role': 'user', 'content': '将下面的日文文本翻译成中文:'+prompt},
]

response = await openai.ChatCompletion.acreate(
model='gpt-4-0613',
messages=messages,
max_tokens=self._MAX_TOKENS // 2,
temperature=self.temperature,
top_p=self.top_p,
api_key=SAKURA_API_KEY,
api_base=SAKURA_API_BASE,
)

self.token_count += response.usage['total_tokens']
self.token_count_last = response.usage['total_tokens']
for choice in response.choices:
if 'text' in choice:
return choice.text

# If no response with text is found, return the first response's content (which may be empty)
return response.choices[0].message.content
async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) -> List[str]:
translations = []
self.logger.debug(f'Temperature: {self.temperature}, TopP: {self.top_p}')

for query in queries:

ratelimit_attempt = 0
server_error_attempt = 0
timeout_attempt = 0
while True:
request_task = asyncio.create_task(self._request_translation(to_lang, query))
try:
response = await request_task
break
except openai.error.RateLimitError: # Server returned ratelimit response
ratelimit_attempt += 1
if ratelimit_attempt >= self._RATELIMIT_RETRY_ATTEMPTS:
raise
self.logger.warn(f'Restarting request due to ratelimiting by openai servers. Attempt: {ratelimit_attempt}')
await asyncio.sleep(2)
except openai.error.APIError: # Server returned 500 error (probably server load)
server_error_attempt += 1
if server_error_attempt >= self._RETRY_ATTEMPTS:
self.logger.error('Sakura encountered a server error, possibly due to high server load. Use a different translator or try again later.')
raise
self.logger.warn(f'Restarting request due to a server error. Attempt: {server_error_attempt}')
await asyncio.sleep(1)

self.logger.debug('-- Sakura Response --\n' + response)

translations.extend([response])

self.logger.debug(translations)
if self.token_count_last:
self.logger.info(f'Used {self.token_count_last} tokens (Total: {self.token_count})')

return translations
11 changes: 9 additions & 2 deletions manga_translator/translators/sakura.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
class Sakura13BTranslator(CommonTranslator):

_TIMEOUT = 999 # Seconds to wait for a response from the server before retrying
_RETRY_ATTEMPTS = 3 # Number of times to retry an errored request before giving up
_RETRY_ATTEMPTS = 1 # Number of times to retry an errored request before giving up
_TIMEOUT_RETRY_ATTEMPTS = 3 # Number of times to retry a timed out request before giving up
_RATELIMIT_RETRY_ATTEMPTS = 3 # Number of times to retry a ratelimited request before giving up

Expand Down Expand Up @@ -128,6 +128,10 @@ async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) ->
self.logger.debug(f'Queries: {queries}')
text_prompt = '\n'.join(queries)
self.logger.debug('-- Sakura Prompt --\n' + self._format_prompt_log(text_prompt) + '\n\n')
# 去除emoji
queries = [re.sub(r'[\U00010000-\U0010ffff]', '', query) for query in queries]
# 替换❤
queries = [re.sub(r'❤', '♥', query) for query in queries]
# 给queries的每行加上「」
queries = [f'「{query}」' for query in queries]
response = await self._handle_translation_request(queries)
Expand All @@ -139,6 +143,9 @@ async def _translate(self, from_lang: str, to_lang: str, queries: List[str]) ->
rep_flag = self.detect_and_remove_extra_repeats(response)[0]
if rep_flag:
for i in range(self._RETRY_ATTEMPTS):
if self.detect_and_remove_extra_repeats(queries)[0]:
self.logger.warning('Queries have repeats.')
break
self.logger.warning(f'Re-translated because of model degradation, {i} times.')
self._set_gpt_style("precise")
self.logger.debug(f'Temperature: {self.temperature}, TopP: {self.top_p}')
Expand Down Expand Up @@ -254,7 +261,7 @@ async def _request_translation(self, input_text_list) -> str:
],
temperature=self.temperature,
top_p=self.top_p,
max_tokens=512,
max_tokens=1024,
frequency_penalty=self.frequency_penalty,
seed=-1,
extra_query=extra_query,
Expand Down

0 comments on commit 230ba8b

Please sign in to comment.