Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into moeflow-companion-main
Browse files Browse the repository at this point in the history
  • Loading branch information
jokester committed Nov 24, 2024
2 parents b0d6380 + 661fd5e commit 361e0ba
Show file tree
Hide file tree
Showing 8 changed files with 231 additions and 20 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ Colorizer: **mc2**
--save-text-file SAVE_TEXT_FILE Like --save-text but with a specified file path.
--filter-text FILTER_TEXT Filter regions by their text with a regex. Example
usage: --text-filter ".*badtext.*"
--pre-dict FILe_PATH Path to the pre-translation dictionary file. One entry per line,
--pre-dict FILE_PATH Path to the pre-translation dictionary file. One entry per line,
Comments can be added with `#` and `//`.
usage: //Example
dog cat #Example
Expand Down
6 changes: 2 additions & 4 deletions README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,15 +205,13 @@ FIL: Filipino (Tagalog)
--save-text-file SAVE_TEXT_FILE Like --save-text but with a specified file path.
--filter-text FILTER_TEXT Filter regions by their text with a regex. Example
usage: --text-filter ".*badtext.*"
--filter-text FILTER_TEXT Filter regions by their text with a regex. Example
usage: --text-filter ".*badtext.*"
--pre-dict FILe_PATH Path to the pre-translation dictionary file. One entry per line,
--pre-dict FILE_PATH Path to the pre-translation dictionary file. One entry per line,
Comments can be added with `#` and `//`.
usage: //Example
dog cat #Example
abc def
abc
--post-dict file_path Path to the post-translation dictionary file. Same as above.
--post-dict FILE_PATH Path to the post-translation dictionary file. Same as above.
--skip-lang Skip translation if source image is one of the provide languages,
use comma to separate multiple languages. Example: JPN,ENG
--prep-manual Prepare for manual typesetting by outputting blank,
Expand Down
43 changes: 43 additions & 0 deletions execute_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import asyncio
import json
import pickle
import requests
from PIL import Image

async def execute_method(method_name, attributes):
url = f"http://127.0.0.1:5003/execute/{method_name}"
headers = {'Content-Type': 'application/octet-stream'}

response = requests.post(url, data=pickle.dumps(attributes), headers=headers, stream=True)

if response.status_code == 200:
buffer = b''
for chunk in response.iter_content(chunk_size=None):
if chunk:
buffer += chunk
while True:
if len(buffer) >= 5:
status = int.from_bytes(buffer[0:1], byteorder='big')
expected_size = int.from_bytes(buffer[1:5], byteorder='big')
if len(buffer) >= 5 + expected_size:
data = buffer[5:5 + expected_size]
if status == 0:
print("data", pickle.loads(data))
elif status == 1:
print("log", data)
elif status == 2:
print("error", data)
buffer = buffer[5 + expected_size:]
else:
break
else:
break
else:
print(json.loads(response.content))



if __name__ == '__main__':
image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png")
attributes = {"image": image, "params": {"translator": "none", "inpainter": "none"}}
asyncio.run(execute_method("translate", attributes))
10 changes: 7 additions & 3 deletions manga_translator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
from argparse import Namespace

from manga_translator.share import MangaShare
from .manga_translator import (
MangaTranslator,
MangaTranslatorWeb,
Expand Down Expand Up @@ -84,6 +85,11 @@ async def dispatch(args: Namespace):
elif args.mode == 'api':
translator = MangaTranslatorAPI(args_dict)
await translator.listen(args_dict)
elif args.mode == 'shared':
translator = MangaShare(args_dict)
await translator.listen(args_dict)



if __name__ == '__main__':
args = None
Expand All @@ -96,9 +102,7 @@ async def dispatch(args: Namespace):
if args.mode != 'web':
logger.debug(args)

loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(dispatch(args))
asyncio.run(dispatch(args))
except KeyboardInterrupt:
if not args or args.mode != 'web':
print()
Expand Down
2 changes: 1 addition & 1 deletion manga_translator/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str:


parser = argparse.ArgumentParser(prog='manga_translator', description='Seamlessly translate mangas into a chosen language', formatter_class=HelpFormatter)
parser.add_argument('-m', '--mode', default='batch', type=str, choices=['demo', 'batch', 'web', 'web_client', 'ws', 'api'], help='Run demo in single image demo mode (demo), batch translation mode (batch), web service mode (web)')
parser.add_argument('-m', '--mode', default='batch', type=str, choices=['demo', 'batch', 'web', 'web_client', 'ws', 'api', 'shared'], help='Run demo in single image demo mode (demo), batch translation mode (batch), web service mode (web)')
parser.add_argument('-i', '--input', default=None, type=path, nargs='+', help='Path to an image file if using demo mode, or path to an image folder if using batch mode')
parser.add_argument('-o', '--dest', default='', type=str, help='Path to the destination folder for translated images in batch mode')
parser.add_argument('-l', '--target-lang', default='CHS', type=str, choices=VALID_LANGUAGES, help='Destination language')
Expand Down
46 changes: 36 additions & 10 deletions manga_translator/manga_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class TranslationInterrupt(Exception):
pass


class MangaTranslator():
class MangaTranslator:

def __init__(self, params: dict = None):
self._progress_hooks = []
Expand Down Expand Up @@ -620,28 +620,38 @@ async def _run_text_translation(self, ctx: Context):
diff_target_regions = [] # Target language regions with different translation
same_non_target_regions = [] # Non-target language regions with identical translation
diff_non_target_regions = [] # Non-target language regions with different translation
has_target_lang_in_translation_regions = []

for region in ctx.text_regions:
text_equal = region.text.lower().strip() == region.translation.lower().strip()
has_target_lang = False
has_target_lang_in_translation = False

# Target language detection
if ctx.target_lang in ['CHS', 'CHT']: # Chinese
has_target_lang = bool(re.search('[\u4e00-\u9fff]', region.text))
has_target_lang = bool(re.search('[\u4e00-\u9fff]', region.text))
has_target_lang_in_translation = bool(re.search('[\u4e00-\u9fff]', region.translation))
elif ctx.target_lang == 'JPN': # Japanese
has_target_lang = bool(re.search('[\u3040-\u309f\u30a0-\u30ff\u4e00-\u9fff]', region.text))
has_target_lang_in_translation = bool(re.search('[\u3040-\u309f\u30a0-\u30ff\u4e00-\u9fff]', region.translation))
elif ctx.target_lang == 'KOR': # Korean
has_target_lang = bool(re.search('[\uac00-\ud7af\u1100-\u11ff]', region.text))
has_target_lang_in_translation = bool(re.search('[\uac00-\ud7af\u1100-\u11ff]', region.translation))
elif ctx.target_lang == 'ARA': # Arabic
has_target_lang = bool(re.search('[\u0600-\u06ff]', region.text))
has_target_lang_in_translation = bool(re.search('[\u0600-\u06ff]', region.translation))
elif ctx.target_lang == 'THA': # Thai
has_target_lang = bool(re.search('[\u0e00-\u0e7f]', region.text))
has_target_lang_in_translation = bool(re.search('[\u0e00-\u0e7f]', region.translation))
elif ctx.target_lang == 'RUS': # Russian
has_target_lang = bool(re.search('[\u0400-\u04ff]', region.text))
has_target_lang_in_translation = bool(re.search('[\u0400-\u04ff]', region.translation))
elif ctx.target_lang == 'UKR': # Ukrainian
has_target_lang = bool(re.search('[\u0400-\u04ff]', region.text))
has_target_lang_in_translation = bool(re.search('[\u0400-\u04ff]', region.translation))
elif ctx.target_lang == 'IND': # Indonesian
has_target_lang = bool(re.search('[A-Za-z]', region.text))
has_target_lang_in_translation = bool(re.search('[A-Za-z]', region.translation))

# Skip numeric translations and filtered text
if region.translation.isnumeric():
Expand All @@ -655,27 +665,43 @@ async def _run_text_translation(self, ctx: Context):
continue

if has_target_lang:
if text_equal:
logger.info(f'Filtered out: {region.translation}')
logger.info('Reason: Translation identical to original')
if text_equal:
same_target_regions.append(region)
else:
diff_target_regions.append(region)
else:
if text_equal:
logger.info(f'Filtered out: {region.translation}')
logger.info('Reason: Translation identical to original')
same_non_target_regions.append(region)
else:
diff_non_target_regions.append(region)


if has_target_lang_in_translation:
has_target_lang_in_translation_regions.append(region)

# If any different translations exist, retain all target language regions
if diff_target_regions or diff_non_target_regions:
new_text_regions.extend(same_target_regions)
new_text_regions.extend(diff_target_regions)

# Keep all non_target_lang regions with different translations (if translation contains target language characters)
for region in diff_non_target_regions:
if region in has_target_lang_in_translation_regions:
new_text_regions.append(region)
else:
logger.info(f'Filtered out: {region.translation}')
logger.info('Reason: Translation does not contain target language characters')

# Retain all non-target language regions with different translations (It appears empty, it clears all contents.)
new_text_regions.extend(diff_non_target_regions)
# No different translations exist, clear all content.
if not (diff_target_regions or diff_non_target_regions):
for region in same_target_regions:
logger.info(f'Filtered out: {region.translation}')
logger.info('Reason: Translation identical to original -the whole page-')

# Clear non_target_lang_regions with identical translations.
for region in same_non_target_regions:
logger.info(f'Filtered out: {region.translation}')
logger.info('Reason: Translation identical to original -one textine-')


else:
# Process non-special language scenarios using original logic
Expand Down
137 changes: 137 additions & 0 deletions manga_translator/share.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import asyncio
import pickle
from threading import Lock

import uvicorn
from fastapi import FastAPI, HTTPException, Path, Request, Response
from pydantic import BaseModel
import inspect

from starlette.responses import StreamingResponse

from manga_translator import MangaTranslator

class MethodCall(BaseModel):
method_name: str
attributes: bytes


async def load_data(request: Request, method):
attributes_bytes = await request.body()
attributes = pickle.loads(attributes_bytes)
sig = inspect.signature(method)
expected_args = set(sig.parameters.keys())
provided_args = set(attributes.keys())

if expected_args != provided_args:
raise HTTPException(status_code=400, detail="Incorrect number or names of arguments")
return attributes


class MangaShare:
def __init__(self, params: dict = None):
self.manga = MangaTranslator(params)
self.host = params.get('host', '127.0.0.1')
self.port = int(params.get('port', '5003'))
self.nonce = params.get('nonce', None)

# each chunk has a structure like this status_code(int/1byte),len(int/4bytes),bytechunk
# status codes are 0 for result, 1 for progress report, 2 for error
self.progress_queue = asyncio.Queue()
self.lock = Lock()

async def hook(state: str, finished: bool):
state_data = state.encode("utf-8")
progress_data = b'\x01' + len(state_data).to_bytes(4, 'big') + state_data
await self.progress_queue.put(progress_data)
await asyncio.sleep(0)

self.manga.add_progress_hook(hook)

async def progress_stream(self):
"""
loops until the status is != 1 which is eiter an error or the result
"""
while True:
progress = await self.progress_queue.get()
yield progress
if progress[0] != 1:
break

async def run_method(self, method, **attributes):
try:
if asyncio.iscoroutinefunction(method):
result = await method(**attributes)
else:
result = method(**attributes)
result_bytes = pickle.dumps(result)
encoded_result = b'\x00' + len(result_bytes).to_bytes(4, 'big') + result_bytes
await self.progress_queue.put(encoded_result)
except Exception as e:
err_bytes = str(e).encode("utf-8")
encoded_result = b'\x02' + len(err_bytes).to_bytes(4, 'big') + err_bytes
await self.progress_queue.put(encoded_result)
finally:
self.lock.release()


def check_nonce(self, request: Request):
if self.nonce:
nonce = request.headers.get('X-Nonce')
if nonce != self.nonce:
raise HTTPException(401, detail="Nonce does not match")

def check_lock(self):
if not self.lock.acquire(blocking=False):
raise HTTPException(status_code=429, detail="some Method is already being executed.")

def get_fn(self, method_name: str):
if method_name.startswith("__"):
raise HTTPException(status_code=403, detail="These functions are not allowed to be executed remotely")
method = getattr(self.manga, method_name, None)
if not method:
raise HTTPException(status_code=404, detail="Method not found")
return method

async def listen(self, translation_params: dict = None):
app = FastAPI()

@app.get("/is_locked")
async def is_locked():
if self.lock.locked():
return {"locked": True}
return {"locked": False}

@app.post("/simple_execute/{method_name}")
async def execute_method(request: Request, method_name: str = Path(...)):
self.check_nonce(request)
self.check_lock()
method = self.get_fn(method_name)
attr = await load_data(request, method)
try:
if asyncio.iscoroutinefunction(method):
result = await method(**attr)
else:
result = method(**attr)
self.lock.release()
result_bytes = pickle.dumps(result)
return Response(content=result_bytes, media_type="application/octet-stream")
except Exception as e:
self.lock.release()
raise HTTPException(status_code=500, detail=str(e))

@app.post("/execute/{method_name}")
async def execute_method(request: Request, method_name: str = Path(...)):
self.check_nonce(request)
self.check_lock()
method = self.get_fn(method_name)
attr = await load_data(request, method)

# streaming response
streaming_response = StreamingResponse(self.progress_stream(), media_type="application/octet-stream")
asyncio.create_task(self.run_method(method, **attr))
return streaming_response

config = uvicorn.Config(app, host=self.host, port=self.port)
server = uvicorn.Server(config)
await server.serve()
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,7 @@ manga-ocr
langdetect
pydensecrf@https://github.com/lucasb-eyer/pydensecrf/archive/refs/heads/master.zip
accelerate
bitsandbytes
bitsandbytes
uvicorn
fastapi
pydantic

0 comments on commit 361e0ba

Please sign in to comment.