From 04581221f502ff0a0d1048e30e3ac63de9020beb Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 16 Nov 2024 23:15:22 +0100 Subject: [PATCH 01/75] config structure --- manga_translator/args.py | 124 ++++++++++++--------------------- manga_translator/config.py | 137 +++++++++++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+), 82 deletions(-) create mode 100644 manga_translator/config.py diff --git a/manga_translator/args.py b/manga_translator/args.py index 3933b04ba..0ac6a65fa 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -87,98 +87,58 @@ def _format_action_invocation(self, action: argparse.Action) -> str: parser = argparse.ArgumentParser(prog='manga_translator', description='Seamlessly translate mangas into a chosen language', formatter_class=HelpFormatter) -parser.add_argument('-m', '--mode', default='batch', type=str, choices=['demo', 'batch', 'web', 'web_client', 'ws', 'api'], help='Run demo in single image demo mode (demo), batch translation mode (batch), web service mode (web)') -parser.add_argument('-i', '--input', default=None, type=path, nargs='+', help='Path to an image file if using demo mode, or path to an image folder if using batch mode') -parser.add_argument('-o', '--dest', default='', type=str, help='Path to the destination folder for translated images in batch mode') -parser.add_argument('-l', '--target-lang', default='CHS', type=str, choices=VALID_LANGUAGES, help='Destination language') parser.add_argument('-v', '--verbose', action='store_true', help='Print debug info and save intermediate images in result folder') -parser.add_argument('-f', '--format', default=None, choices=OUTPUT_FORMATS, help='Output format of the translation.') parser.add_argument('--attempts', default=0, type=int, help='Retry attempts on encountered error. -1 means infinite times.') parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.') -parser.add_argument('--overwrite', action='store_true', help='Overwrite already translated images in batch mode.') -parser.add_argument('--skip-no-text', action='store_true', help='Skip image without text (Will not be saved).') parser.add_argument('--model-dir', default=None, type=dir_path, help='Model directory (by default ./models in project root)') -parser.add_argument('--skip-lang', default=None, type=str, help='Skip translation if source image is one of the provide languages, use comma to separate multiple languages. Example: JPN,ENG') - g = parser.add_mutually_exclusive_group() g.add_argument('--use-gpu', action='store_true', help='Turn on/off gpu (auto switch between mps and cuda)') g.add_argument('--use-gpu-limited', action='store_true', help='Turn on/off gpu (excluding offline translator)') - -parser.add_argument('--detector', default='default', type=str, choices=DETECTORS, help='Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it') -parser.add_argument('--ocr', default='48px', type=str, choices=OCRS, help='Optical character recognition (OCR) model to use') -parser.add_argument('--use-mocr-merge', action='store_true', help='Use bbox merge when Manga OCR inference.') -parser.add_argument('--inpainter', default='lama_large', type=str, choices=INPAINTERS, help='Inpainting model to use') -parser.add_argument('--upscaler', default='esrgan', type=str, choices=UPSCALERS, help='Upscaler to use. --upscale-ratio has to be set for it to take effect') -parser.add_argument('--upscale-ratio', default=None, type=float, help='Image upscale ratio applied before detection. Can improve text detection.') -parser.add_argument('--colorizer', default=None, type=str, choices=COLORIZERS, help='Colorization model to use.') - -g = parser.add_mutually_exclusive_group() -g.add_argument('--translator', default='google', type=str, choices=TRANSLATORS, help='Language translator to use') -g.add_argument('--translator-chain', default=None, type=translator_chain, help='Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG".') -g.add_argument('--selective-translation', default=None, type=translator_chain, help='Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".') - -parser.add_argument('--revert-upscaling', action='store_true', help='Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio).') -parser.add_argument('--detection-size', default=1536, type=int, help='Size of image used for detection') -parser.add_argument('--det-rotate', action='store_true', help='Rotate the image for detection. Might improve detection.') -parser.add_argument('--det-auto-rotate', action='store_true', help='Rotate the image for detection to prefer vertical textlines. Might improve detection.') -parser.add_argument('--det-invert', action='store_true', help='Invert the image colors for detection. Might improve detection.') -parser.add_argument('--det-gamma-correct', action='store_true', help='Applies gamma correction for detection. Might improve detection.') -parser.add_argument('--unclip-ratio', default=2.3, type=float, help='How much to extend text skeleton to form bounding box') -parser.add_argument('--box-threshold', default=0.7, type=float, help='Threshold for bbox generation') -parser.add_argument('--text-threshold', default=0.5, type=float, help='Threshold for text detection') -parser.add_argument('--min-text-length', default=0, type=int, help='Minimum text length of a text region') -parser.add_argument('--no-text-lang-skip', action='store_true', help='Dont skip text that is seemingly already in the target language.') -parser.add_argument('--inpainting-size', default=2048, type=int, help='Size of image used for inpainting (too large will result in OOM)') -parser.add_argument('--inpainting-precision', default='fp32', type=str, help='Inpainting precision for lama, use bf16 while you can.', choices=['fp32', 'fp16', 'bf16']) -parser.add_argument('--colorization-size', default=576, type=int, help='Size of image used for colorization. Set to -1 to use full image size') -parser.add_argument('--denoise-sigma', default=30, type=int, help='Used by colorizer and affects color strength, range from 0 to 255 (default 30). -1 turns it off.') -parser.add_argument('--mask-dilation-offset', default=0, type=int, help='By how much to extend the text mask to remove left-over text pixels of the original image.') - -parser.add_argument('--disable-font-border', action='store_true', help='Disable font border') -parser.add_argument('--font-size', default=None, type=int, help='Use fixed font size for rendering') -parser.add_argument('--font-size-offset', default=0, type=int, help='Offset font size by a given amount, positive number increase font size and vice versa') -parser.add_argument('--font-size-minimum', default=-1, type=int, help='Minimum output font size. Default is image_sides_sum/200') -parser.add_argument('--font-color', default=None, type=str, help='Overwrite the text fg/bg color detected by the OCR model. Use hex string without the "#" such as FFFFFF for a white foreground or FFFFFF:000000 to also have a black background around the text.') -parser.add_argument('--line-spacing', default=None, type=float, help='Line spacing is font_size * this value. Default is 0.01 for horizontal text and 0.2 for vertical.') - -g = parser.add_mutually_exclusive_group() -g.add_argument('--force-horizontal', action='store_true', help='Force text to be rendered horizontally') -g.add_argument('--force-vertical', action='store_true', help='Force text to be rendered vertically') - -g = parser.add_mutually_exclusive_group() -g.add_argument('--align-left', action='store_true', help='Align rendered text left') -g.add_argument('--align-center', action='store_true', help='Align rendered text centered') -g.add_argument('--align-right', action='store_true', help='Align rendered text right') - -g = parser.add_mutually_exclusive_group() -g.add_argument('--uppercase', action='store_true', help='Change text to uppercase') -g.add_argument('--lowercase', action='store_true', help='Change text to lowercase') - -parser.add_argument('--no-hyphenation', action='store_true', help='If renderer should be splitting up words using a hyphen character (-)') -parser.add_argument('--manga2eng', action='store_true', help='Render english text translated from manga with some additional typesetting. Ignores some other argument options') -parser.add_argument('--gpt-config', type=file_path, help='Path to GPT config file, more info in README') -parser.add_argument('--use-mtpe', action='store_true', help='Turn on/off machine translation post editing (MTPE) on the command line (works only on linux right now)') - -g = parser.add_mutually_exclusive_group() -g.add_argument('--save-text', action='store_true', help='Save extracted text and translations into a text file.') -g.add_argument('--save-text-file', default='', type=str, help='Like --save-text but with a specified file path.') - -parser.add_argument('--filter-text', default=None, type=str, help='Filter regions by their text with a regex. Example usage: --text-filter ".*badtext.*"') -parser.add_argument('--prep-manual', action='store_true', help='Prepare for manual typesetting by outputting blank, inpainted images, plus copies of the original for reference') parser.add_argument('--font-path', default='', type=file_path, help='Path to font file') -parser.add_argument('--gimp-font', default='Sans-serif', type=str, help='Font family to use for gimp rendering.') -parser.add_argument('--host', default='127.0.0.1', type=str, help='Used by web module to decide which host to attach to') -parser.add_argument('--port', default=5003, type=int, help='Used by web module to decide which port to attach to') -parser.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Used by web module as secret for securing internal web server communication') -# parser.add_argument('--log-web', action='store_true', help='Used by web module to decide if web logs should be surfaced') -parser.add_argument('--ws-url', default='ws://localhost:5000', type=str, help='Server URL for WebSocket mode') -parser.add_argument('--save-quality', default=100, type=int, help='Quality of saved JPEG image, range from 0 to 100 with 100 being best') -parser.add_argument('--ignore-bubble', default=0, type=int, help='The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles') - -parser.add_argument('--kernel-size', default=3, type=int, help='Set the convolution kernel size of the text erasure area to completely clean up text residues') - parser.add_argument('--pre-dict', default=None, type=file_path, help='Path to the pre-translation dictionary file') parser.add_argument('--post-dict', default=None, type=file_path, help='Path to the post-translation dictionary file') +subparsers = parser.add_subparsers(dest='mode', required=True, help='Mode of operation') + +# Batch mode +parser_batch = subparsers.add_parser('batch', help='Run in batch translation mode') +parser_batch.add_argument('-i', '--input', required=True, type=dir_path, help='Path to an image folder') +parser_batch.add_argument('-o', '--dest', default='', type=str, help='Path to the destination folder for translated images') +parser_batch.add_argument('-f', '--format', default=None, choices=OUTPUT_FORMATS, help='Output format of the translation.') +parser_batch.add_argument('--overwrite', action='store_true', help='Overwrite already translated images') +parser_batch.add_argument('--skip-no-text', action='store_true', help='Skip image without text (Will not be saved).') +parser_batch.add_argument('--use-mtpe', action='store_true', help='Turn on/off machine translation post editing (MTPE) on the command line (works only on linux right now)') +g_batch = parser_batch.add_mutually_exclusive_group() +g_batch.add_argument('--save-text', action='store_true', help='Save extracted text and translations into a text file.') +g_batch.add_argument('--save-text-file', default='', type=str, help='Like --save-text but with a specified file path.') +parser_batch.add_argument('--prep-manual', action='store_true', help='Prepare for manual typesetting by outputting blank, inpainted images, plus copies of the original for reference') +parser_batch.add_argument('--save-quality', default=100, type=int, help='Quality of saved JPEG image, range from 0 to 100 with 100 being best') + +# Web mode +parser_web = subparsers.add_parser('web', help='Run in web service mode') +parser_web.add_argument('--host', default='127.0.0.1', type=str, help='Host for web service') +parser_web.add_argument('--port', default=5003, type=int, help='Port for web service') +parser_web.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web server communication') + +# Web client mode +parser_web_client = subparsers.add_parser('web_client', help='Run in web client mode') +parser_web_client.add_argument('--host', default='127.0.0.1', type=str, help='Host for web client') +parser_web_client.add_argument('--port', default=5003, type=int, help='Port for web client') +parser_web_client.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web client communication') + +# WebSocket mode +parser_ws = subparsers.add_parser('ws', help='Run in WebSocket mode') +parser_ws.add_argument('--host', default='127.0.0.1', type=str, help='Host for WebSocket service') +parser_ws.add_argument('--port', default=5003, type=int, help='Port for WebSocket service') +parser_ws.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal WebSocket communication') +parser_ws.add_argument('--ws-url', default='ws://localhost:5000', type=str, help='Server URL for WebSocket mode') + +# API mode +parser_api = subparsers.add_parser('api', help='Run in API mode') +parser_api.add_argument('--host', default='127.0.0.1', type=str, help='Host for API service') +parser_api.add_argument('--port', default=5003, type=int, help='Port for API service') +parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication') + + # Generares dict with a default value for each argument DEFAULT_ARGS = vars(parser.parse_args([])) diff --git a/manga_translator/config.py b/manga_translator/config.py new file mode 100644 index 000000000..abb7b9268 --- /dev/null +++ b/manga_translator/config.py @@ -0,0 +1,137 @@ +from pydantic import BaseModel +from typing import Optional + +class RenderConfig(BaseModel): + """""" + """Render english text translated from manga with some additional typesetting. Ignores some other argument options""" + renderer: str = 'default' #todo: validate {"default", "manga2eng"} #todo: convert to enum + """Align rendered text""" + alignment: str = 'auto' # todo: validate {'left','center','right'} #todo: convert to enum + """Disable font border""" + disable_font_border: bool = False + """Offset font size by a given amount, positive number increase font size and vice versa""" + font_size_offset: int = 0 + """Minimum output font size. Default is image_sides_sum/200""" + font_size_minimum: int = -1 + """Force text to be rendered horizontally/vertically/none""" + direction: str = 'auto' # todo: validate {'auto', 'h', 'v'} #todo: convert to enum + """Change text to uppercase""" + uppercase: bool = False + """Change text to lowercase""" + lowercase: bool = False + """Font family to use for gimp rendering.""" + gimp_font: str = 'Sans-serif' + """If renderer should be splitting up words using a hyphen character (-)""" + no_hyphenation: bool = False + """Overwrite the text fg/bg color detected by the OCR model. Use hex string without the "#" such as FFFFFF for a white foreground or FFFFFF:000000 to also have a black background around the text.""" + font_color: Optional[str] = None + """Line spacing is font_size * this value. Default is 0.01 for horizontal text and 0.2 for vertical.""" + line_spacing: Optional[float] = None + """Use fixed font size for rendering""" + font_size: Optional[int] = None + +class UpscaleConfig(BaseModel): + """""" + """Upscaler to use. --upscale-ratio has to be set for it to take effect""" + upscaler: str = 'esrgan' #todo: validate UPSCALERS #todo: convert to enum + """Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio).""" + revert_upscaling: bool = False + """Image upscale ratio applied before detection. Can improve text detection.""" + upscale_ratio: Optional[float] = None + + +class TranslatorConfig(BaseModel): + """""" + """Language translator to use""" + translator: str = "google" #todo: validate TRANSLATORS todo: convert to enum + """Destination language""" + target_lang: str = 'ENG' #todo: validate VALID_LANGUAGES #todo: convert to enum + """Dont skip text that is seemingly already in the target language.""" + no_text_lang_skip: bool = False + """Skip translation if source image is one of the provide languages, use comma to separate multiple languages. Example: JPN,ENG""" + skip_lang: Optional[str] = None + """Path to GPT config file, more info in README""" + gpt_config: Optional[str] = None # todo: no more path + """Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG".""" + translator_chain: Optional[str] = None # todo: add parser translator_chain #todo: merge into one + """Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".'""" + selective_translation: Optional[str] = None # todo: add parser translator_chain #todo: merge into one + +class DetectorConfig(BaseModel): + """""" + """"Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it""" + detector: str = 'default' #todo: validate DETECTORS #todo: convert to enum + """Size of image used for detection""" + detection_size: int = 1536 + """Threshold for text detection""" + text_threshold: float = 0.5 + """Rotate the image for detection. Might improve detection.""" + det_rotate: bool = False + """Rotate the image for detection to prefer vertical textlines. Might improve detection.""" + det_auto_rotate: bool = False + """Invert the image colors for detection. Might improve detection.""" + det_invert: bool = False + """Applies gamma correction for detection. Might improve detection.""" + det_gamma_correct: bool = False + """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles""" + ignore_bubble: int = 0 + +class InpainterConfig(BaseModel): + """""" + """Inpainting model to use""" + inpainter: str = 'lama_large' #todo: validate INPAINTERS #todo: convert to enum + """Size of image used for inpainting (too large will result in OOM)""" + inpainting_size: int = 2048 + """Inpainting precision for lama, use bf16 while you can.""" + inpainting_precision: str = 'fp32' #todo: validate ['fp32', 'fp16', 'bf16'] #todo: convert to enum + +class ColorizerConfig(BaseModel): + """""" + """Size of image used for colorization. Set to -1 to use full image size""" + colorization_size: int = 576 + """Used by colorizer and affects color strength, range from 0 to 255 (default 30). -1 turns it off.""" + denoise_sigma: int = 30 + """Colorization model to use.""" + colorizer: Optional[str] = None # todo: validate COLORIZERS #todo: convert to enum + + +class OcrConfig(BaseModel): + """""" + """Use bbox merge when Manga OCR inference.""" + use_mocr_merge: bool = False + """Optical character recognition (OCR) model to use""" + ocr: str = '48px' #todo: validate OCRS #todo: convert to enum + """Minimum text length of a text region""" + min_text_length: int = 0 + +class Config(BaseModel): + # unclear + pre_dict: Optional[str] = None + post_dict: Optional[str] = None + + # json + """Filter regions by their text with a regex. Example usage: '.*badtext.*'""" + filter_text: Optional[str] = None + """render configs""" + render: RenderConfig + """upscaler configs""" + upscale: UpscaleConfig + """tanslator configs""" + translator: TranslatorConfig + """detector configs""" + detector: DetectorConfig + """colorizer configs""" + colorizer: ColorizerConfig + """inpainter configs""" + inpainter: InpainterConfig + """Ocr configs""" + ocr: OcrConfig + # ? + """How much to extend text skeleton to form bounding box""" + unclip_ratio: float = 2.3 + """Set the convolution kernel size of the text erasure area to completely clean up text residues""" + kernel_size: int = 3 + """By how much to extend the text mask to remove left-over text pixels of the original image.""" + mask_dilation_offset: int = 0 + """Threshold for bbox generation""" + box_threshold: float = 0.7 \ No newline at end of file From 5711acc08a2d073bf4b9d66c4128c60e00ca5dc3 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 16 Nov 2024 23:37:17 +0100 Subject: [PATCH 02/75] i have python. why bellow? --- manga_translator/config.py | 136 +++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 59 deletions(-) diff --git a/manga_translator/config.py b/manga_translator/config.py index abb7b9268..5340b2be9 100644 --- a/manga_translator/config.py +++ b/manga_translator/config.py @@ -1,108 +1,126 @@ +from enum import Enum, IntEnum + from pydantic import BaseModel from typing import Optional +class Renderer(IntEnum): + default = 0 + manga2Eng = 1 + +class Alignment(IntEnum): + auto = 0 + left = 1 + center = 2 + right = 3 + +class Direction(IntEnum): + auto = 0 + h = 1 + v = 2 + +class InpaintPrecision(IntEnum): + fp32 = 0 + fp16 = 1 + bf16 = 2 + class RenderConfig(BaseModel): - """""" + renderer: Renderer = Renderer.default """Render english text translated from manga with some additional typesetting. Ignores some other argument options""" - renderer: str = 'default' #todo: validate {"default", "manga2eng"} #todo: convert to enum + alignment: Alignment = Alignment.auto """Align rendered text""" - alignment: str = 'auto' # todo: validate {'left','center','right'} #todo: convert to enum - """Disable font border""" disable_font_border: bool = False - """Offset font size by a given amount, positive number increase font size and vice versa""" + """Disable font border""" font_size_offset: int = 0 - """Minimum output font size. Default is image_sides_sum/200""" + """Offset font size by a given amount, positive number increase font size and vice versa""" font_size_minimum: int = -1 + """Minimum output font size. Default is image_sides_sum/200""" + direction: Direction = Direction.auto """Force text to be rendered horizontally/vertically/none""" - direction: str = 'auto' # todo: validate {'auto', 'h', 'v'} #todo: convert to enum - """Change text to uppercase""" uppercase: bool = False - """Change text to lowercase""" + """Change text to uppercase""" lowercase: bool = False - """Font family to use for gimp rendering.""" + """Change text to lowercase""" gimp_font: str = 'Sans-serif' - """If renderer should be splitting up words using a hyphen character (-)""" + """Font family to use for gimp rendering.""" no_hyphenation: bool = False - """Overwrite the text fg/bg color detected by the OCR model. Use hex string without the "#" such as FFFFFF for a white foreground or FFFFFF:000000 to also have a black background around the text.""" + """If renderer should be splitting up words using a hyphen character (-)""" font_color: Optional[str] = None - """Line spacing is font_size * this value. Default is 0.01 for horizontal text and 0.2 for vertical.""" + """Overwrite the text fg/bg color detected by the OCR model. Use hex string without the "#" such as FFFFFF for a white foreground or FFFFFF:000000 to also have a black background around the text.""" line_spacing: Optional[float] = None - """Use fixed font size for rendering""" + """Line spacing is font_size * this value. Default is 0.01 for horizontal text and 0.2 for vertical.""" font_size: Optional[int] = None + """Use fixed font size for rendering""" + class UpscaleConfig(BaseModel): - """""" - """Upscaler to use. --upscale-ratio has to be set for it to take effect""" upscaler: str = 'esrgan' #todo: validate UPSCALERS #todo: convert to enum - """Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio).""" + """Upscaler to use. --upscale-ratio has to be set for it to take effect""" revert_upscaling: bool = False - """Image upscale ratio applied before detection. Can improve text detection.""" + """Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio).""" upscale_ratio: Optional[float] = None - + """Image upscale ratio applied before detection. Can improve text detection.""" class TranslatorConfig(BaseModel): - """""" - """Language translator to use""" translator: str = "google" #todo: validate TRANSLATORS todo: convert to enum - """Destination language""" + """Language translator to use""" target_lang: str = 'ENG' #todo: validate VALID_LANGUAGES #todo: convert to enum - """Dont skip text that is seemingly already in the target language.""" + """Destination language""" no_text_lang_skip: bool = False - """Skip translation if source image is one of the provide languages, use comma to separate multiple languages. Example: JPN,ENG""" + """Dont skip text that is seemingly already in the target language.""" skip_lang: Optional[str] = None - """Path to GPT config file, more info in README""" + """Skip translation if source image is one of the provide languages, use comma to separate multiple languages. Example: JPN,ENG""" gpt_config: Optional[str] = None # todo: no more path - """Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG".""" + """Path to GPT config file, more info in README""" translator_chain: Optional[str] = None # todo: add parser translator_chain #todo: merge into one - """Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".'""" + """Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG".""" selective_translation: Optional[str] = None # todo: add parser translator_chain #todo: merge into one + """Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".'""" class DetectorConfig(BaseModel): """""" - """"Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it""" detector: str = 'default' #todo: validate DETECTORS #todo: convert to enum - """Size of image used for detection""" + """"Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it""" detection_size: int = 1536 - """Threshold for text detection""" + """Size of image used for detection""" text_threshold: float = 0.5 - """Rotate the image for detection. Might improve detection.""" + """Threshold for text detection""" det_rotate: bool = False - """Rotate the image for detection to prefer vertical textlines. Might improve detection.""" + """Rotate the image for detection. Might improve detection.""" det_auto_rotate: bool = False - """Invert the image colors for detection. Might improve detection.""" + """Rotate the image for detection to prefer vertical textlines. Might improve detection.""" det_invert: bool = False - """Applies gamma correction for detection. Might improve detection.""" + """Invert the image colors for detection. Might improve detection.""" det_gamma_correct: bool = False - """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles""" + """Applies gamma correction for detection. Might improve detection.""" ignore_bubble: int = 0 + """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles""" class InpainterConfig(BaseModel): - """""" - """Inpainting model to use""" inpainter: str = 'lama_large' #todo: validate INPAINTERS #todo: convert to enum - """Size of image used for inpainting (too large will result in OOM)""" + """Inpainting model to use""" inpainting_size: int = 2048 + """Size of image used for inpainting (too large will result in OOM)""" + inpainting_precision: InpaintPrecision = InpaintPrecision.fp32 """Inpainting precision for lama, use bf16 while you can.""" - inpainting_precision: str = 'fp32' #todo: validate ['fp32', 'fp16', 'bf16'] #todo: convert to enum + class ColorizerConfig(BaseModel): - """""" - """Size of image used for colorization. Set to -1 to use full image size""" colorization_size: int = 576 - """Used by colorizer and affects color strength, range from 0 to 255 (default 30). -1 turns it off.""" + """Size of image used for colorization. Set to -1 to use full image size""" denoise_sigma: int = 30 - """Colorization model to use.""" + """Used by colorizer and affects color strength, range from 0 to 255 (default 30). -1 turns it off.""" colorizer: Optional[str] = None # todo: validate COLORIZERS #todo: convert to enum + """Colorization model to use.""" + class OcrConfig(BaseModel): - """""" - """Use bbox merge when Manga OCR inference.""" use_mocr_merge: bool = False - """Optical character recognition (OCR) model to use""" + """Use bbox merge when Manga OCR inference.""" ocr: str = '48px' #todo: validate OCRS #todo: convert to enum - """Minimum text length of a text region""" + """Optical character recognition (OCR) model to use""" min_text_length: int = 0 + """Minimum text length of a text region""" class Config(BaseModel): # unclear @@ -110,28 +128,28 @@ class Config(BaseModel): post_dict: Optional[str] = None # json - """Filter regions by their text with a regex. Example usage: '.*badtext.*'""" filter_text: Optional[str] = None - """render configs""" + """Filter regions by their text with a regex. Example usage: '.*badtext.*'""" render: RenderConfig - """upscaler configs""" + """render configs""" upscale: UpscaleConfig - """tanslator configs""" + """upscaler configs""" translator: TranslatorConfig - """detector configs""" + """tanslator configs""" detector: DetectorConfig - """colorizer configs""" + """detector configs""" colorizer: ColorizerConfig - """inpainter configs""" + """colorizer configs""" inpainter: InpainterConfig - """Ocr configs""" + """inpainter configs""" ocr: OcrConfig + """Ocr configs""" # ? - """How much to extend text skeleton to form bounding box""" unclip_ratio: float = 2.3 - """Set the convolution kernel size of the text erasure area to completely clean up text residues""" + """How much to extend text skeleton to form bounding box""" kernel_size: int = 3 - """By how much to extend the text mask to remove left-over text pixels of the original image.""" + """Set the convolution kernel size of the text erasure area to completely clean up text residues""" mask_dilation_offset: int = 0 + """By how much to extend the text mask to remove left-over text pixels of the original image.""" + box_threshold: float = 0.7 """Threshold for bbox generation""" - box_threshold: float = 0.7 \ No newline at end of file From c5feafea5ead4b2426e801b87a70909351b0d650 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 16 Nov 2024 23:50:18 +0100 Subject: [PATCH 03/75] update to enum --- manga_translator/colorization/__init__.py | 3 +- manga_translator/config.py | 66 ++++++++++++++++++++--- manga_translator/detection/__init__.py | 11 ++-- manga_translator/inpainting/__init__.py | 13 ++--- manga_translator/ocr/__init__.py | 9 ++-- manga_translator/translators/__init__.py | 49 ++++++++--------- manga_translator/upscaling/__init__.py | 7 +-- 7 files changed, 109 insertions(+), 49 deletions(-) diff --git a/manga_translator/colorization/__init__.py b/manga_translator/colorization/__init__.py index aef0dcd0e..d7423286c 100644 --- a/manga_translator/colorization/__init__.py +++ b/manga_translator/colorization/__init__.py @@ -2,9 +2,10 @@ from .common import CommonColorizer, OfflineColorizer from .manga_colorization_v2 import MangaColorizationV2 +from ..config import Colorizer COLORIZERS = { - 'mc2': MangaColorizationV2, + Colorizer.mc2: MangaColorizationV2, } colorizer_cache = {} diff --git a/manga_translator/config.py b/manga_translator/config.py index 5340b2be9..30c44193c 100644 --- a/manga_translator/config.py +++ b/manga_translator/config.py @@ -23,6 +23,60 @@ class InpaintPrecision(IntEnum): fp16 = 1 bf16 = 2 +class Detector(IntEnum): + default = 0 + dbconvnext = 1 + ctd = 2 + craft = 3 + none = 4 + +class Inpainter(IntEnum): + default = 0 + lama_large = 1 + lama_mpe = 2 + sd = 3 + none = 4 + original = 5 + +class Colorizer(IntEnum): + none = 0 + mc2 = 1 + +class Ocr(IntEnum): + ocr32px = 0 + ocr48px = 1 + ocr48px_ctc = 2 + mocr = 3 +class Translator(IntEnum): + youdao = 0 + baidu = 1 + deepl = 2 + papago = 3 + caiyun = 4 + gpt3 = 5 + gpt3_5 = 6 + gpt4 = 7 + none = 8 + original = 9 + sakura = 10 + deepseek = 11 + groq = 12 + offline = 13 + nllb = 14 + nllb_big = 15 + sugoi = 16 + jparacrawl = 17 + jparacrawl_big = 18 + m2m100 = 19 + m2m100_big = 20 + mbart50 = 21 + qwen2 = 22 + qwen2_big = 23 +class Upscaler: + waifu2x = 0 + esrgan = 1 + upscler4xultrasharp = 2 + class RenderConfig(BaseModel): renderer: Renderer = Renderer.default """Render english text translated from manga with some additional typesetting. Ignores some other argument options""" @@ -53,7 +107,7 @@ class RenderConfig(BaseModel): class UpscaleConfig(BaseModel): - upscaler: str = 'esrgan' #todo: validate UPSCALERS #todo: convert to enum + upscaler: Upscaler = Upscaler.esrgan """Upscaler to use. --upscale-ratio has to be set for it to take effect""" revert_upscaling: bool = False """Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio).""" @@ -61,7 +115,7 @@ class UpscaleConfig(BaseModel): """Image upscale ratio applied before detection. Can improve text detection.""" class TranslatorConfig(BaseModel): - translator: str = "google" #todo: validate TRANSLATORS todo: convert to enum + translator: Translator = Translator.sugoi """Language translator to use""" target_lang: str = 'ENG' #todo: validate VALID_LANGUAGES #todo: convert to enum """Destination language""" @@ -78,7 +132,7 @@ class TranslatorConfig(BaseModel): class DetectorConfig(BaseModel): """""" - detector: str = 'default' #todo: validate DETECTORS #todo: convert to enum + detector: Detector =Detector.default """"Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it""" detection_size: int = 1536 """Size of image used for detection""" @@ -96,7 +150,7 @@ class DetectorConfig(BaseModel): """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles""" class InpainterConfig(BaseModel): - inpainter: str = 'lama_large' #todo: validate INPAINTERS #todo: convert to enum + inpainter: Inpainter = Inpainter.lama_large """Inpainting model to use""" inpainting_size: int = 2048 """Size of image used for inpainting (too large will result in OOM)""" @@ -109,7 +163,7 @@ class ColorizerConfig(BaseModel): """Size of image used for colorization. Set to -1 to use full image size""" denoise_sigma: int = 30 """Used by colorizer and affects color strength, range from 0 to 255 (default 30). -1 turns it off.""" - colorizer: Optional[str] = None # todo: validate COLORIZERS #todo: convert to enum + colorizer: Colorizer = Colorizer.none """Colorization model to use.""" @@ -117,7 +171,7 @@ class ColorizerConfig(BaseModel): class OcrConfig(BaseModel): use_mocr_merge: bool = False """Use bbox merge when Manga OCR inference.""" - ocr: str = '48px' #todo: validate OCRS #todo: convert to enum + ocr: Ocr = Ocr.ocr48px """Optical character recognition (OCR) model to use""" min_text_length: int = 0 """Minimum text length of a text region""" diff --git a/manga_translator/detection/__init__.py b/manga_translator/detection/__init__.py index 96e930f54..2f8bd24e4 100644 --- a/manga_translator/detection/__init__.py +++ b/manga_translator/detection/__init__.py @@ -6,13 +6,14 @@ from .craft import CRAFTDetector from .none import NoneDetector from .common import CommonDetector, OfflineDetector +from ..config import Detector DETECTORS = { - 'default': DefaultDetector, - 'dbconvnext': DBConvNextDetector, - 'ctd': ComicTextDetector, - 'craft': CRAFTDetector, - 'none': NoneDetector, + Detector.default: DefaultDetector, + Detector.dbconvnext: DBConvNextDetector, + Detector.ctd: ComicTextDetector, + Detector.craft: CRAFTDetector, + Detector.none: NoneDetector, } detector_cache = {} diff --git a/manga_translator/inpainting/__init__.py b/manga_translator/inpainting/__init__.py index 5b7a9b161..16f242126 100644 --- a/manga_translator/inpainting/__init__.py +++ b/manga_translator/inpainting/__init__.py @@ -6,14 +6,15 @@ from .inpainting_sd import StableDiffusionInpainter from .none import NoneInpainter from .original import OriginalInpainter +from ..config import Inpainter INPAINTERS = { - 'default': AotInpainter, - 'lama_large': LamaLargeInpainter, - 'lama_mpe': LamaMPEInpainter, - 'sd': StableDiffusionInpainter, - 'none': NoneInpainter, - 'original': OriginalInpainter, + Inpainter.default: AotInpainter, + Inpainter.lama_large: LamaLargeInpainter, + Inpainter.lama_mpe: LamaMPEInpainter, + Inpainter.sd: StableDiffusionInpainter, + Inpainter.none: NoneInpainter, + Inpainter.original: OriginalInpainter, } inpainter_cache = {} diff --git a/manga_translator/ocr/__init__.py b/manga_translator/ocr/__init__.py index 15d7a0187..75390d764 100644 --- a/manga_translator/ocr/__init__.py +++ b/manga_translator/ocr/__init__.py @@ -6,13 +6,14 @@ from .model_48px import Model48pxOCR from .model_48px_ctc import Model48pxCTCOCR from .model_manga_ocr import ModelMangaOCR +from ..config import Ocr from ..utils import Quadrilateral OCRS = { - '32px': Model32pxOCR, - '48px': Model48pxOCR, - '48px_ctc': Model48pxCTCOCR, - 'mocr': ModelMangaOCR, + Ocr.ocr32px: Model32pxOCR, + Ocr.ocr48px: Model48pxOCR, + Ocr.ocr48px_ctc: Model48pxCTCOCR, + Ocr.mocr: ModelMangaOCR, } ocr_cache = {} diff --git a/manga_translator/translators/__init__.py b/manga_translator/translators/__init__.py index 2ff45ea9c..30bff773e 100644 --- a/manga_translator/translators/__init__.py +++ b/manga_translator/translators/__init__.py @@ -19,36 +19,37 @@ from .sakura import SakuraTranslator from .qwen2 import Qwen2Translator, Qwen2BigTranslator from .groq import GroqTranslator +from ..config import Translator OFFLINE_TRANSLATORS = { - 'offline': SelectiveOfflineTranslator, - 'nllb': NLLBTranslator, - 'nllb_big': NLLBBigTranslator, - 'sugoi': SugoiTranslator, - 'jparacrawl': JparacrawlTranslator, - 'jparacrawl_big': JparacrawlBigTranslator, - 'm2m100': M2M100Translator, - 'm2m100_big': M2M100BigTranslator, - 'mbart50': MBart50Translator, - 'qwen2': Qwen2Translator, - 'qwen2_big': Qwen2BigTranslator, + Translator.offline: SelectiveOfflineTranslator, + Translator.nllb: NLLBTranslator, + Translator.nllb_big: NLLBBigTranslator, + Translator.sugoi: SugoiTranslator, + Translator.jparacrawl: JparacrawlTranslator, + Translator.jparacrawl_big: JparacrawlBigTranslator, + Translator.m2m100: M2M100Translator, + Translator.m2m100_big: M2M100BigTranslator, + Translator.mbart50: MBart50Translator, + Translator.qwen2: Qwen2Translator, + Translator.qwen2_big: Qwen2BigTranslator, } TRANSLATORS = { # 'google': GoogleTranslator, - 'youdao': YoudaoTranslator, - 'baidu': BaiduTranslator, - 'deepl': DeeplTranslator, - 'papago': PapagoTranslator, - 'caiyun': CaiyunTranslator, - 'gpt3': GPT3Translator, - 'gpt3.5': GPT35TurboTranslator, - 'gpt4': GPT4Translator, - 'none': NoneTranslator, - 'original': OriginalTranslator, - 'sakura': SakuraTranslator, - 'deepseek': DeepseekTranslator, - 'groq':GroqTranslator, + Translator.youdao: YoudaoTranslator, + Translator.baidu: BaiduTranslator, + Translator.deepl: DeeplTranslator, + Translator.papago: PapagoTranslator, + Translator.caiyun: CaiyunTranslator, + Translator.gpt3: GPT3Translator, + Translator.gpt3_5: GPT35TurboTranslator, + Translator.gpt4: GPT4Translator, + Translator.none: NoneTranslator, + Translator.original: OriginalTranslator, + Translator.sakura: SakuraTranslator, + Translator.deepseek: DeepseekTranslator, + Translator.groq:GroqTranslator, **OFFLINE_TRANSLATORS, } translator_cache = {} diff --git a/manga_translator/upscaling/__init__.py b/manga_translator/upscaling/__init__.py index b5a972dd6..768527890 100644 --- a/manga_translator/upscaling/__init__.py +++ b/manga_translator/upscaling/__init__.py @@ -5,11 +5,12 @@ from .waifu2x import Waifu2xUpscaler from .esrgan import ESRGANUpscaler from .esrgan_pytorch import ESRGANUpscalerPytorch +from ..config import Upscaler UPSCALERS = { - 'waifu2x': Waifu2xUpscaler, - 'esrgan': ESRGANUpscaler, - '4xultrasharp': ESRGANUpscalerPytorch, + Upscaler.waifu2x: Waifu2xUpscaler, + Upscaler.esrgan: ESRGANUpscaler, + Upscaler.upscler4xultrasharp: ESRGANUpscalerPytorch, } upscaler_cache = {} From 2ccd8023aa5b96b5ad8d3ae655f2dae497417c39 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sun, 17 Nov 2024 00:40:57 +0100 Subject: [PATCH 04/75] split manga_translator.py into multiple files --- manga_translator/__main__.py | 6 +- manga_translator/manga_translator.py | 891 +-------------------------- manga_translator/mode/api.py | 290 +++++++++ manga_translator/mode/local.py | 201 ++++++ manga_translator/mode/web.py | 151 +++++ manga_translator/mode/ws.py | 264 ++++++++ 6 files changed, 916 insertions(+), 887 deletions(-) create mode 100644 manga_translator/mode/api.py create mode 100644 manga_translator/mode/local.py create mode 100644 manga_translator/mode/web.py create mode 100644 manga_translator/mode/ws.py diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py index c4f580031..c0e01e508 100644 --- a/manga_translator/__main__.py +++ b/manga_translator/__main__.py @@ -5,9 +5,6 @@ from .manga_translator import ( MangaTranslator, - MangaTranslatorWeb, - MangaTranslatorWS, - MangaTranslatorAPI, set_main_logger, ) from .args import parser @@ -71,14 +68,17 @@ async def dispatch(args: Namespace): await dispatch(args.host, args.port, translation_params=args_dict) elif args.mode == 'web_client': + from manga_translator.mode.web import MangaTranslatorWeb translator = MangaTranslatorWeb(args_dict) await translator.listen(args_dict) elif args.mode == 'ws': + from manga_translator.mode.ws import MangaTranslatorWS translator = MangaTranslatorWS(args_dict) await translator.listen(args_dict) elif args.mode == 'api': + from manga_translator.mode.api import MangaTranslatorAPI translator = MangaTranslatorAPI(args_dict) await translator.listen(args_dict) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 85dbe430b..cb17850e2 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -1,65 +1,44 @@ -import asyncio -import base64 -import io - import cv2 -from aiohttp.web_middlewares import middleware from omegaconf import OmegaConf import langcodes import langdetect -import requests import os import re import torch -import time import logging import numpy as np from PIL import Image -from typing import List, Tuple, Union -from aiohttp import web -from marshmallow import Schema, fields, ValidationError +from typing import Union -from manga_translator.utils.threading import Throttler -from .args import DEFAULT_ARGS, translator_chain +from .args import DEFAULT_ARGS from .utils import ( BASE_PATH, LANGUAGE_ORIENTATION_PRESETS, ModelWrapper, Context, - PriorityLock, load_image, dump_image, - replace_prefix, visualize_textblocks, - add_file_logger, - remove_file_logger, is_valuable_text, - rgb2hex, hex2rgb, - get_color_name, - natural_sort, sort_regions, ) -from .detection import DETECTORS, dispatch as dispatch_detection, prepare as prepare_detection +from .detection import dispatch as dispatch_detection, prepare as prepare_detection from .upscaling import dispatch as dispatch_upscaling, prepare as prepare_upscaling, UPSCALERS -from .ocr import OCRS, dispatch as dispatch_ocr, prepare as prepare_ocr +from .ocr import dispatch as dispatch_ocr, prepare as prepare_ocr from .textline_merge import dispatch as dispatch_textline_merge from .mask_refinement import dispatch as dispatch_mask_refinement -from .inpainting import INPAINTERS, dispatch as dispatch_inpainting, prepare as prepare_inpainting +from .inpainting import dispatch as dispatch_inpainting, prepare as prepare_inpainting from .translators import ( - TRANSLATORS, - VALID_LANGUAGES, LANGDETECT_MAP, - LanguageUnsupportedException, TranslatorChain, dispatch as dispatch_translation, prepare as prepare_translation, ) from .colorization import dispatch as dispatch_colorization, prepare as prepare_colorization from .rendering import dispatch as dispatch_rendering, dispatch_eng_render -from .save import save_result # Will be overwritten by __main__.py if module is being run directly (with python -m) logger = logging.getLogger('manga_translator') @@ -78,7 +57,7 @@ class TranslationInterrupt(Exception): pass -class MangaTranslator(): +class MangaTranslator: def __init__(self, params: dict = None): self._progress_hooks = [] @@ -117,158 +96,6 @@ def parse_init_params(self, params: dict): def using_gpu(self): return self.device.startswith('cuda') or self.device == 'mps' - async def translate_path(self, path: str, dest: str = None, params: dict[str, Union[int, str]] = None): - """ - Translates an image or folder (recursively) specified through the path. - """ - if not os.path.exists(path): - raise FileNotFoundError(path) - path = os.path.abspath(os.path.expanduser(path)) - dest = os.path.abspath(os.path.expanduser(dest)) if dest else '' - params = params or {} - - # Handle format - file_ext = params.get('format') - if params.get('save_quality', 100) < 100: - if not params.get('format'): - file_ext = 'jpg' - elif params.get('format') != 'jpg': - raise ValueError('--save-quality of lower than 100 is only supported for .jpg files') - - if os.path.isfile(path): - # Determine destination file path - if not dest: - # Use the same folder as the source - p, ext = os.path.splitext(path) - _dest = f'{p}-translated.{file_ext or ext[1:]}' - elif not os.path.basename(dest): - p, ext = os.path.splitext(os.path.basename(path)) - # If the folders differ use the original filename from the source - if os.path.dirname(path) != dest: - _dest = os.path.join(dest, f'{p}.{file_ext or ext[1:]}') - else: - _dest = os.path.join(dest, f'{p}-translated.{file_ext or ext[1:]}') - else: - p, ext = os.path.splitext(dest) - _dest = f'{p}.{file_ext or ext[1:]}' - await self.translate_file(path, _dest, params) - - elif os.path.isdir(path): - # Determine destination folder path - if path[-1] == '\\' or path[-1] == '/': - path = path[:-1] - _dest = dest or path + '-translated' - if os.path.exists(_dest) and not os.path.isdir(_dest): - raise FileExistsError(_dest) - - translated_count = 0 - for root, subdirs, files in os.walk(path): - files = natural_sort(files) - dest_root = replace_prefix(root, path, _dest) - os.makedirs(dest_root, exist_ok=True) - for f in files: - if f.lower() == '.thumb': - continue - - file_path = os.path.join(root, f) - output_dest = replace_prefix(file_path, path, _dest) - p, ext = os.path.splitext(output_dest) - output_dest = f'{p}.{file_ext or ext[1:]}' - - if await self.translate_file(file_path, output_dest, params): - translated_count += 1 - if translated_count == 0: - logger.info('No further untranslated files found. Use --overwrite to write over existing translations.') - else: - logger.info(f'Done. Translated {translated_count} image{"" if translated_count == 1 else "s"}') - - async def translate_file(self, path: str, dest: str, params: dict): - if not params.get('overwrite') and os.path.exists(dest): - logger.info( - f'Skipping as already translated: "{dest}". Use --overwrite to overwrite existing translations.') - await self._report_progress('saved', True) - return True - - logger.info(f'Translating: "{path}"') - - # Turn dict to context to make values also accessible through params. - params = params or {} - ctx = Context(**params) - self._preprocess_params(ctx) - - attempts = 0 - while ctx.attempts == -1 or attempts < ctx.attempts + 1: - if attempts > 0: - logger.info(f'Retrying translation! Attempt {attempts}' - + (f' of {ctx.attempts}' if ctx.attempts != -1 else '')) - try: - return await self._translate_file(path, dest, ctx) - - except TranslationInterrupt: - break - except Exception as e: - if isinstance(e, LanguageUnsupportedException): - await self._report_progress('error-lang', True) - else: - await self._report_progress('error', True) - if not self.ignore_errors and not (ctx.attempts == -1 or attempts < ctx.attempts): - raise - else: - logger.error(f'{e.__class__.__name__}: {e}', - exc_info=e if self.verbose else None) - attempts += 1 - return False - - async def _translate_file(self, path: str, dest: str, ctx: Context) -> bool: - if path.endswith('.txt'): - with open(path, 'r') as f: - queries = f.read().split('\n') - translated_sentences = \ - await dispatch_translation(ctx.translator, queries, ctx.use_mtpe, ctx, - 'cpu' if self._gpu_limited_memory else self.device) - p, ext = os.path.splitext(dest) - if ext != '.txt': - dest = p + '.txt' - logger.info(f'Saving "{dest}"') - with open(dest, 'w') as f: - f.write('\n'.join(translated_sentences)) - return True - - # TODO: Add .gif handler - - else: # Treat as image - try: - img = Image.open(path) - img.verify() - img = Image.open(path) - except Exception: - logger.warn(f'Failed to open image: {path}') - return False - - ctx = await self.translate(img, ctx) - result = ctx.result - - # Save result - if ctx.skip_no_text and not ctx.text_regions: - logger.debug('Not saving due to --skip-no-text') - return True - if result: - logger.info(f'Saving "{dest}"') - save_result(result, dest, ctx) - await self._report_progress('saved', True) - - if ctx.save_text or ctx.save_text_file or ctx.prep_manual: - if ctx.prep_manual: - # Save original image next to translated - p, ext = os.path.splitext(dest) - img_filename = p + '-orig' + ext - img_path = os.path.join(os.path.dirname(dest), img_filename) - img.save(img_path, quality=ctx.save_quality) - if ctx.text_regions: - self._save_text_to_file(path, ctx) - return True - return False - async def translate(self, image: Image.Image, params: Union[dict, Context] = None) -> Context: """ Translates a PIL image from a manga. Returns dict with result and intermediates of translation. @@ -771,708 +598,4 @@ async def ph(state, finished): elif state in LOG_MESSAGES_ERROR: logger.error(LOG_MESSAGES_ERROR[state]) - self.add_progress_hook(ph) - - def _save_text_to_file(self, image_path: str, ctx: Context): - cached_colors = [] - - def identify_colors(fg_rgb: List[int]): - idx = 0 - for rgb, _ in cached_colors: - # If similar color already saved - if abs(rgb[0] - fg_rgb[0]) + abs(rgb[1] - fg_rgb[1]) + abs(rgb[2] - fg_rgb[2]) < 50: - break - else: - idx += 1 - else: - cached_colors.append((fg_rgb, get_color_name(fg_rgb))) - return idx + 1, cached_colors[idx][1] - - s = f'\n[{image_path}]\n' - for i, region in enumerate(ctx.text_regions): - fore, back = region.get_font_colors() - color_id, color_name = identify_colors(fore) - - s += f'\n-- {i + 1} --\n' - s += f'color: #{color_id}: {color_name} (fg, bg: {rgb2hex(*fore)} {rgb2hex(*back)})\n' - s += f'text: {region.text}\n' - s += f'trans: {region.translation}\n' - for line in region.lines: - s += f'coords: {list(line.ravel())}\n' - s += '\n' - - text_output_file = ctx.text_output_file - if not text_output_file: - text_output_file = os.path.splitext(image_path)[0] + '_translations.txt' - - with open(text_output_file, 'a', encoding='utf-8') as f: - f.write(s) - - -class MangaTranslatorWeb(MangaTranslator): - """ - Translator client that executes tasks on behalf of the webserver in web_main.py. - """ - - def __init__(self, params: dict = None): - super().__init__(params) - self.host = params.get('host', '127.0.0.1') - if self.host == '0.0.0.0': - self.host = '127.0.0.1' - self.port = params.get('port', 5003) - self.nonce = params.get('nonce', '') - self.ignore_errors = params.get('ignore_errors', True) - self._task_id = None - self._params = None - - async def _init_connection(self): - available_translators = [] - from .translators import MissingAPIKeyException, get_translator - for key in TRANSLATORS: - try: - get_translator(key) - available_translators.append(key) - except MissingAPIKeyException: - pass - - data = { - 'nonce': self.nonce, - 'capabilities': { - 'translators': available_translators, - }, - } - requests.post(f'http://{self.host}:{self.port}/connect-internal', json=data) - - async def _send_state(self, state: str, finished: bool): - # wait for translation to be saved first (bad solution?) - finished = finished and not state == 'finished' - while True: - try: - data = { - 'task_id': self._task_id, - 'nonce': self.nonce, - 'state': state, - 'finished': finished, - } - requests.post(f'http://{self.host}:{self.port}/task-update-internal', json=data, timeout=20) - break - except Exception: - # if translation is finished server has to know - if finished: - continue - else: - break - - def _get_task(self): - try: - rjson = requests.get(f'http://{self.host}:{self.port}/task-internal?nonce={self.nonce}', - timeout=3600).json() - return rjson.get('task_id'), rjson.get('data') - except Exception: - return None, None - - async def listen(self, translation_params: dict = None): - """ - Listens for translation tasks from web server. - """ - logger.info('Waiting for translation tasks') - - await self._init_connection() - self.add_progress_hook(self._send_state) - - while True: - self._task_id, self._params = self._get_task() - if self._params and 'exit' in self._params: - break - if not (self._task_id and self._params): - await asyncio.sleep(0.1) - continue - - self.result_sub_folder = self._task_id - logger.info(f'Processing task {self._task_id}') - if translation_params is not None: - # Combine default params with params chosen by webserver - for p, default_value in translation_params.items(): - current_value = self._params.get(p) - self._params[p] = current_value if current_value is not None else default_value - if self.verbose: - # Write log file - log_file = self._result_path('log.txt') - add_file_logger(log_file) - - # final.png will be renamed if format param is set - await self.translate_path(self._result_path('input.png'), self._result_path('final.png'), - params=self._params) - print() - - if self.verbose: - remove_file_logger(log_file) - self._task_id = None - self._params = None - self.result_sub_folder = '' - - async def _run_text_translation(self, ctx: Context): - # Run machine translation as reference for manual translation (if `--translator=none` is not set) - text_regions = await super()._run_text_translation(ctx) - - if ctx.get('manual', False): - logger.info('Waiting for user input from manual translation') - requests.post(f'http://{self.host}:{self.port}/request-manual-internal', json={ - 'task_id': self._task_id, - 'nonce': self.nonce, - 'texts': [r.text for r in text_regions], - 'translations': [r.translation for r in text_regions], - }, timeout=20) - - # wait for at most 1 hour for manual translation - wait_until = time.time() + 3600 - while time.time() < wait_until: - ret = requests.post(f'http://{self.host}:{self.port}/get-manual-result-internal', json={ - 'task_id': self._task_id, - 'nonce': self.nonce - }, timeout=20).json() - if 'result' in ret: - manual_translations = ret['result'] - if isinstance(manual_translations, str): - if manual_translations == 'error': - return [] - i = 0 - for translation in manual_translations: - if not translation.strip(): - text_regions.pop(i) - i = i - 1 - else: - text_regions[i].translation = translation - text_regions[i].target_lang = ctx.translator.langs[-1] - i = i + 1 - break - elif 'cancel' in ret: - return 'cancel' - await asyncio.sleep(0.1) - return text_regions - - -class MangaTranslatorWS(MangaTranslator): - def __init__(self, params: dict = None): - super().__init__(params) - self.url = params.get('ws_url') - self.secret = params.get('ws_secret', os.getenv('WS_SECRET', '')) - self.ignore_errors = params.get('ignore_errors', True) - - self._task_id = None - self._websocket = None - - async def listen(self, translation_params: dict = None): - from threading import Thread - import io - import aioshutil - from aiofiles import os - import websockets - from .server import ws_pb2 - - self._server_loop = asyncio.new_event_loop() - self.task_lock = PriorityLock() - self.counter = 0 - - async def _send_and_yield(websocket, msg): - # send message and yield control to the event loop (to actually send the message) - await websocket.send(msg) - await asyncio.sleep(0) - - send_throttler = Throttler(0.2) - send_and_yield = send_throttler.wrap(_send_and_yield) - - async def sync_state(state, finished): - if self._websocket is None: - return - msg = ws_pb2.WebSocketMessage() - msg.status.id = self._task_id - msg.status.status = state - self._server_loop.call_soon_threadsafe( - asyncio.create_task, - send_and_yield(self._websocket, msg.SerializeToString()) - ) - - self.add_progress_hook(sync_state) - - async def translate(task_id, websocket, image, params): - async with self.task_lock((1 << 31) - params['ws_count']): - self._task_id = task_id - self._websocket = websocket - result = await self.translate(image, params) - self._task_id = None - self._websocket = None - return result - - async def server_send_status(websocket, task_id, status): - msg = ws_pb2.WebSocketMessage() - msg.status.id = task_id - msg.status.status = status - await websocket.send(msg.SerializeToString()) - await asyncio.sleep(0) - - async def server_process_inner(main_loop, logger_task, session, websocket, task) -> Tuple[bool, bool]: - logger_task.info(f'-- Processing task {task.id}') - await server_send_status(websocket, task.id, 'pending') - - if self.verbose: - await aioshutil.rmtree(f'result/{task.id}', ignore_errors=True) - await os.makedirs(f'result/{task.id}', exist_ok=True) - - params = { - 'target_lang': task.target_language, - 'skip_lang': task.skip_language, - 'detector': task.detector, - 'direction': task.direction, - 'translator': task.translator, - 'size': task.size, - 'ws_event_loop': asyncio.get_event_loop(), - 'ws_count': self.counter, - } - self.counter += 1 - - logger_task.info(f'-- Downloading image from {task.source_image}') - await server_send_status(websocket, task.id, 'downloading') - async with session.get(task.source_image) as resp: - if resp.status == 200: - source_image = await resp.read() - else: - msg = ws_pb2.WebSocketMessage() - msg.status.id = task.id - msg.status.status = 'error-download' - await websocket.send(msg.SerializeToString()) - await asyncio.sleep(0) - return False, False - - logger_task.info(f'-- Translating image') - if translation_params: - for p, default_value in translation_params.items(): - current_value = params.get(p) - params[p] = current_value if current_value is not None else default_value - - image = Image.open(io.BytesIO(source_image)) - - (ori_w, ori_h) = image.size - if max(ori_h, ori_w) > 1200: - params['upscale_ratio'] = 1 - - await server_send_status(websocket, task.id, 'preparing') - # translation_dict = await self.translate(image, params) - translation_dict = await asyncio.wrap_future( - asyncio.run_coroutine_threadsafe( - translate(task.id, websocket, image, params), - main_loop - ) - ) - await send_throttler.flush() - - output: Image.Image = translation_dict.result - if output is not None: - await server_send_status(websocket, task.id, 'saving') - - output = output.resize((ori_w, ori_h), resample=Image.LANCZOS) - - img = io.BytesIO() - output.save(img, format='PNG') - if self.verbose: - output.save(self._result_path('ws_final.png')) - - img_bytes = img.getvalue() - logger_task.info(f'-- Uploading result to {task.translation_mask}') - await server_send_status(websocket, task.id, 'uploading') - async with session.put(task.translation_mask, data=img_bytes) as resp: - if resp.status != 200: - logger_task.error(f'-- Failed to upload result:') - logger_task.error(f'{resp.status}: {resp.reason}') - msg = ws_pb2.WebSocketMessage() - msg.status.id = task.id - msg.status.status = 'error-upload' - await websocket.send(msg.SerializeToString()) - await asyncio.sleep(0) - return False, False - - return True, output is not None - - async def server_process(main_loop, session, websocket, task) -> bool: - logger_task = logger.getChild(f'{task.id}') - try: - (success, has_translation_mask) = await server_process_inner(main_loop, logger_task, session, websocket, - task) - except Exception as e: - logger_task.error(f'-- Task failed with exception:') - logger_task.error(f'{e.__class__.__name__}: {e}', exc_info=e if self.verbose else None) - (success, has_translation_mask) = False, False - finally: - result = ws_pb2.WebSocketMessage() - result.finish_task.id = task.id - result.finish_task.success = success - result.finish_task.has_translation_mask = has_translation_mask - await websocket.send(result.SerializeToString()) - await asyncio.sleep(0) - logger_task.info(f'-- Task finished') - - async def async_server_thread(main_loop): - from aiohttp import ClientSession, ClientTimeout - timeout = ClientTimeout(total=30) - async with ClientSession(timeout=timeout) as session: - logger_conn = logger.getChild('connection') - if self.verbose: - logger_conn.setLevel(logging.DEBUG) - async for websocket in websockets.connect( - self.url, - extra_headers={ - 'x-secret': self.secret, - }, - max_size=1_000_000, - logger=logger_conn - ): - bg_tasks = set() - try: - logger.info('-- Connected to websocket server') - - async for raw in websocket: - # logger.info(f'Got message: {raw}') - msg = ws_pb2.WebSocketMessage() - msg.ParseFromString(raw) - if msg.WhichOneof('message') == 'new_task': - task = msg.new_task - bg_task = asyncio.create_task(server_process(main_loop, session, websocket, task)) - bg_tasks.add(bg_task) - bg_task.add_done_callback(bg_tasks.discard) - - except Exception as e: - logger.error(f'{e.__class__.__name__}: {e}', exc_info=e if self.verbose else None) - - finally: - logger.info('-- Disconnected from websocket server') - for bg_task in bg_tasks: - bg_task.cancel() - - def server_thread(future, main_loop, server_loop): - asyncio.set_event_loop(server_loop) - try: - server_loop.run_until_complete(async_server_thread(main_loop)) - finally: - future.set_result(None) - - future = asyncio.Future() - Thread( - target=server_thread, - args=(future, asyncio.get_running_loop(), self._server_loop), - daemon=True - ).start() - - # create a future that is never done - await future - - async def _run_text_translation(self, ctx: Context): - coroutine = super()._run_text_translation(ctx) - if ctx.translator.has_offline(): - return await coroutine - else: - task_id = self._task_id - websocket = self._websocket - await self.task_lock.release() - result = await asyncio.wrap_future( - asyncio.run_coroutine_threadsafe( - coroutine, - ctx.ws_event_loop - ) - ) - await self.task_lock.acquire((1 << 30) - ctx.ws_count) - self._task_id = task_id - self._websocket = websocket - return result - - async def _run_text_rendering(self, ctx: Context): - render_mask = (ctx.mask >= 127).astype(np.uint8)[:, :, None] - - output = await super()._run_text_rendering(ctx) - render_mask[np.sum(ctx.img_rgb != output, axis=2) > 0] = 1 - ctx.render_mask = render_mask - if self.verbose: - cv2.imwrite(self._result_path('ws_render_in.png'), cv2.cvtColor(ctx.img_rgb, cv2.COLOR_RGB2BGR)) - cv2.imwrite(self._result_path('ws_render_out.png'), cv2.cvtColor(output, cv2.COLOR_RGB2BGR)) - cv2.imwrite(self._result_path('ws_mask.png'), render_mask * 255) - - # only keep sections in mask - if self.verbose: - cv2.imwrite(self._result_path('ws_inmask.png'), cv2.cvtColor(ctx.img_rgb, cv2.COLOR_RGB2BGRA) * render_mask) - output = cv2.cvtColor(output, cv2.COLOR_RGB2RGBA) * render_mask - if self.verbose: - cv2.imwrite(self._result_path('ws_output.png'), cv2.cvtColor(output, cv2.COLOR_RGBA2BGRA) * render_mask) - - return output - - -# Experimental. May be replaced by a refactored server/web_main.py in the future. -class MangaTranslatorAPI(MangaTranslator): - def __init__(self, params: dict = None): - import nest_asyncio - nest_asyncio.apply() - super().__init__(params) - self.host = params.get('host', '127.0.0.1') - self.port = params.get('port', '5003') - self.log_web = params.get('log_web', False) - self.ignore_errors = params.get('ignore_errors', True) - self._task_id = None - self._params = None - self.params = params - self.queue = [] - - async def wait_queue(self, id: int): - while self.queue[0] != id: - await asyncio.sleep(0.05) - - def remove_from_queue(self, id: int): - self.queue.remove(id) - - def generate_id(self): - try: - x = max(self.queue) - except: - x = 0 - return x + 1 - - def middleware_factory(self): - @middleware - async def sample_middleware(request, handler): - id = self.generate_id() - self.queue.append(id) - try: - await self.wait_queue(id) - except Exception as e: - print(e) - try: - # todo make cancellable - response = await handler(request) - except: - response = web.json_response({'error': "Internal Server Error", 'status': 500}, - status=500) - # Handle cases where a user leaves the queue, request fails, or is completed - try: - self.remove_from_queue(id) - except Exception as e: - print(e) - return response - - return sample_middleware - - async def get_file(self, image, base64Images, url) -> Image: - if image is not None: - content = image.file.read() - elif base64Images is not None: - base64Images = base64Images - if base64Images.__contains__('base64,'): - base64Images = base64Images.split('base64,')[1] - content = base64.b64decode(base64Images) - elif url is not None: - from aiohttp import ClientSession - async with ClientSession() as session: - async with session.get(url) as resp: - if resp.status == 200: - content = await resp.read() - else: - return web.json_response({'status': 'error'}) - else: - raise ValidationError("donest exist") - img = Image.open(io.BytesIO(content)) - - img.verify() - img = Image.open(io.BytesIO(content)) - if img.width * img.height > 8000 ** 2: - raise ValidationError("to large") - return img - - async def listen(self, translation_params: dict = None): - self.params = translation_params - app = web.Application(client_max_size=1024 * 1024 * 50, middlewares=[self.middleware_factory()]) - - routes = web.RouteTableDef() - run_until_state = '' - - async def hook(state, finished): - if run_until_state and run_until_state == state and not finished: - raise TranslationInterrupt() - - self.add_progress_hook(hook) - - @routes.post("/get_text") - async def text_api(req): - nonlocal run_until_state - run_until_state = 'translating' - return await self.err_handling(self.run_translate, req, self.format_translate) - - @routes.post("/translate") - async def translate_api(req): - nonlocal run_until_state - run_until_state = 'after-translating' - return await self.err_handling(self.run_translate, req, self.format_translate) - - @routes.post("/inpaint_translate") - async def inpaint_translate_api(req): - nonlocal run_until_state - run_until_state = 'rendering' - return await self.err_handling(self.run_translate, req, self.format_translate) - - @routes.post("/colorize_translate") - async def colorize_translate_api(req): - nonlocal run_until_state - run_until_state = 'rendering' - return await self.err_handling(self.run_translate, req, self.format_translate, True) - - # #@routes.post("/file") - # async def file_api(req): - # #TODO: return file - # return await self.err_handling(self.file_exec, req, None) - - app.add_routes(routes) - web.run_app(app, host=self.host, port=self.port) - - async def run_translate(self, translation_params, img): - return await self.translate(img, translation_params) - - async def err_handling(self, func, req, format, ri=False): - try: - if req.content_type == 'application/json' or req.content_type == 'multipart/form-data': - if req.content_type == 'application/json': - d = await req.json() - else: - d = await req.post() - schema = self.PostSchema() - data = schema.load(d) - if 'translator_chain' in data: - data['translator_chain'] = translator_chain(data['translator_chain']) - if 'selective_translation' in data: - data['selective_translation'] = translator_chain(data['selective_translation']) - ctx = Context(**dict(self.params, **data)) - self._preprocess_params(ctx) - if data.get('image') is None and data.get('base64Images') is None and data.get('url') is None: - return web.json_response({'error': "Missing input", 'status': 422}) - fil = await self.get_file(data.get('image'), data.get('base64Images'), data.get('url')) - if 'image' in data: - del data['image'] - if 'base64Images' in data: - del data['base64Images'] - if 'url' in data: - del data['url'] - attempts = 0 - while ctx.attempts == -1 or attempts <= ctx.attempts: - if attempts > 0: - logger.info(f'Retrying translation! Attempt {attempts}' + ( - f' of {ctx.attempts}' if ctx.attempts != -1 else '')) - try: - await func(ctx, fil) - break - except TranslationInterrupt: - break - except Exception as e: - print(e) - attempts += 1 - if ctx.attempts != -1 and attempts > ctx.attempts: - return web.json_response({'error': "Internal Server Error", 'status': 500}, - status=500) - try: - return format(ctx, ri) - except Exception as e: - print(e) - return web.json_response({'error': "Failed to format", 'status': 500}, - status=500) - else: - return web.json_response({'error': "Wrong content type: " + req.content_type, 'status': 415}, - status=415) - except ValueError as e: - print(e) - return web.json_response({'error': "Wrong input type", 'status': 422}, status=422) - - except ValidationError as e: - print(e) - return web.json_response({'error': "Input invalid", 'status': 422}, status=422) - - def format_translate(self, ctx: Context, return_image: bool): - text_regions = ctx.text_regions - inpaint = ctx.img_inpainted - results = [] - if 'overlay_ext' in ctx: - overlay_ext = ctx['overlay_ext'] - else: - overlay_ext = 'jpg' - for i, blk in enumerate(text_regions): - minX, minY, maxX, maxY = blk.xyxy - if 'translations' in ctx: - trans = {key: value[i] for key, value in ctx['translations'].items()} - else: - trans = {} - trans["originalText"] = text_regions[i].text - if inpaint is not None: - overlay = inpaint[minY:maxY, minX:maxX] - - retval, buffer = cv2.imencode('.' + overlay_ext, overlay) - jpg_as_text = base64.b64encode(buffer) - background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8") - else: - background = None - text_region = text_regions[i] - text_region.adjust_bg_color = False - color1, color2 = text_region.get_font_colors() - - results.append({ - 'text': trans, - 'minX': int(minX), - 'minY': int(minY), - 'maxX': int(maxX), - 'maxY': int(maxY), - 'textColor': { - 'fg': color1.tolist(), - 'bg': color2.tolist() - }, - 'language': text_regions[i].source_lang, - 'background': background - }) - if return_image and ctx.img_colorized is not None: - retval, buffer = cv2.imencode('.' + overlay_ext, np.array(ctx.img_colorized)) - jpg_as_text = base64.b64encode(buffer) - img = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8") - else: - img = None - return web.json_response({'details': results, 'img': img}) - - class PostSchema(Schema): - target_lang = fields.Str(required=False, validate=lambda a: a.upper() in VALID_LANGUAGES) - detector = fields.Str(required=False, validate=lambda a: a.lower() in DETECTORS) - ocr = fields.Str(required=False, validate=lambda a: a.lower() in OCRS) - inpainter = fields.Str(required=False, validate=lambda a: a.lower() in INPAINTERS) - upscaler = fields.Str(required=False, validate=lambda a: a.lower() in UPSCALERS) - translator = fields.Str(required=False, validate=lambda a: a.lower() in TRANSLATORS) - direction = fields.Str(required=False, validate=lambda a: a.lower() in {'auto', 'h', 'v'}) - skip_language = fields.Str(required=False) - upscale_ratio = fields.Integer(required=False) - translator_chain = fields.Str(required=False) - selective_translation = fields.Str(required=False) - attempts = fields.Integer(required=False) - detection_size = fields.Integer(required=False) - text_threshold = fields.Float(required=False) - box_threshold = fields.Float(required=False) - unclip_ratio = fields.Float(required=False) - inpainting_size = fields.Integer(required=False) - det_rotate = fields.Bool(required=False) - det_auto_rotate = fields.Bool(required=False) - det_invert = fields.Bool(required=False) - det_gamma_correct = fields.Bool(required=False) - min_text_length = fields.Integer(required=False) - colorization_size = fields.Integer(required=False) - denoise_sigma = fields.Integer(required=False) - mask_dilation_offset = fields.Integer(required=False) - ignore_bubble = fields.Integer(required=False) - gpt_config = fields.String(required=False) - filter_text = fields.String(required=False) - - # api specific - overlay_ext = fields.Str(required=False) - base64Images = fields.Raw(required=False) - image = fields.Raw(required=False) - url = fields.Raw(required=False) - - # no functionality except preventing errors when given - fingerprint = fields.Raw(required=False) - clientUuid = fields.Raw(required=False) + self.add_progress_hook(ph) \ No newline at end of file diff --git a/manga_translator/mode/api.py b/manga_translator/mode/api.py new file mode 100644 index 000000000..bfe712567 --- /dev/null +++ b/manga_translator/mode/api.py @@ -0,0 +1,290 @@ +# Experimental. May be replaced by a refactored server/web_main.py in the future. +import asyncio +import base64 +import io + +import cv2 +import numpy as np +from PIL import Image +from aiohttp import web +from aiohttp.web_middlewares import middleware +from marshmallow import fields, Schema, ValidationError + +from manga_translator import MangaTranslator, Context, UPSCALERS, TranslationInterrupt, logger +from manga_translator.args import translator_chain +from manga_translator.detection import DETECTORS +from manga_translator.inpainting import INPAINTERS +from manga_translator.ocr import OCRS +from manga_translator.translators import VALID_LANGUAGES, TRANSLATORS + + +class MangaTranslatorAPI(MangaTranslator): + def __init__(self, params: dict = None): + import nest_asyncio + nest_asyncio.apply() + super().__init__(params) + self.host = params.get('host', '127.0.0.1') + self.port = params.get('port', '5003') + self.log_web = params.get('log_web', False) + self.ignore_errors = params.get('ignore_errors', True) + self._task_id = None + self._params = None + self.params = params + self.queue = [] + + async def wait_queue(self, id: int): + while self.queue[0] != id: + await asyncio.sleep(0.05) + + def remove_from_queue(self, id: int): + self.queue.remove(id) + + def generate_id(self): + try: + x = max(self.queue) + except: + x = 0 + return x + 1 + + def middleware_factory(self): + @middleware + async def sample_middleware(request, handler): + id = self.generate_id() + self.queue.append(id) + try: + await self.wait_queue(id) + except Exception as e: + print(e) + try: + # todo make cancellable + response = await handler(request) + except: + response = web.json_response({'error': "Internal Server Error", 'status': 500}, + status=500) + # Handle cases where a user leaves the queue, request fails, or is completed + try: + self.remove_from_queue(id) + except Exception as e: + print(e) + return response + + return sample_middleware + + async def get_file(self, image, base64Images, url) -> Image: + if image is not None: + content = image.file.read() + elif base64Images is not None: + base64Images = base64Images + if base64Images.__contains__('base64,'): + base64Images = base64Images.split('base64,')[1] + content = base64.b64decode(base64Images) + elif url is not None: + from aiohttp import ClientSession + async with ClientSession() as session: + async with session.get(url) as resp: + if resp.status == 200: + content = await resp.read() + else: + return web.json_response({'status': 'error'}) + else: + raise ValidationError("donest exist") + img = Image.open(io.BytesIO(content)) + + img.verify() + img = Image.open(io.BytesIO(content)) + if img.width * img.height > 8000 ** 2: + raise ValidationError("to large") + return img + + async def listen(self, translation_params: dict = None): + self.params = translation_params + app = web.Application(client_max_size=1024 * 1024 * 50, middlewares=[self.middleware_factory()]) + + routes = web.RouteTableDef() + run_until_state = '' + + async def hook(state, finished): + if run_until_state and run_until_state == state and not finished: + raise TranslationInterrupt() + + self.add_progress_hook(hook) + + @routes.post("/get_text") + async def text_api(req): + nonlocal run_until_state + run_until_state = 'translating' + return await self.err_handling(self.run_translate, req, self.format_translate) + + @routes.post("/translate") + async def translate_api(req): + nonlocal run_until_state + run_until_state = 'after-translating' + return await self.err_handling(self.run_translate, req, self.format_translate) + + @routes.post("/inpaint_translate") + async def inpaint_translate_api(req): + nonlocal run_until_state + run_until_state = 'rendering' + return await self.err_handling(self.run_translate, req, self.format_translate) + + @routes.post("/colorize_translate") + async def colorize_translate_api(req): + nonlocal run_until_state + run_until_state = 'rendering' + return await self.err_handling(self.run_translate, req, self.format_translate, True) + + # #@routes.post("/file") + # async def file_api(req): + # #TODO: return file + # return await self.err_handling(self.file_exec, req, None) + + app.add_routes(routes) + web.run_app(app, host=self.host, port=self.port) + + async def run_translate(self, translation_params, img): + return await self.translate(img, translation_params) + + async def err_handling(self, func, req, format, ri=False): + try: + if req.content_type == 'application/json' or req.content_type == 'multipart/form-data': + if req.content_type == 'application/json': + d = await req.json() + else: + d = await req.post() + schema = self.PostSchema() + data = schema.load(d) + if 'translator_chain' in data: + data['translator_chain'] = translator_chain(data['translator_chain']) + if 'selective_translation' in data: + data['selective_translation'] = translator_chain(data['selective_translation']) + ctx = Context(**dict(self.params, **data)) + self._preprocess_params(ctx) + if data.get('image') is None and data.get('base64Images') is None and data.get('url') is None: + return web.json_response({'error': "Missing input", 'status': 422}) + fil = await self.get_file(data.get('image'), data.get('base64Images'), data.get('url')) + if 'image' in data: + del data['image'] + if 'base64Images' in data: + del data['base64Images'] + if 'url' in data: + del data['url'] + attempts = 0 + while ctx.attempts == -1 or attempts <= ctx.attempts: + if attempts > 0: + logger.info(f'Retrying translation! Attempt {attempts}' + ( + f' of {ctx.attempts}' if ctx.attempts != -1 else '')) + try: + await func(ctx, fil) + break + except TranslationInterrupt: + break + except Exception as e: + print(e) + attempts += 1 + if ctx.attempts != -1 and attempts > ctx.attempts: + return web.json_response({'error': "Internal Server Error", 'status': 500}, + status=500) + try: + return format(ctx, ri) + except Exception as e: + print(e) + return web.json_response({'error': "Failed to format", 'status': 500}, + status=500) + else: + return web.json_response({'error': "Wrong content type: " + req.content_type, 'status': 415}, + status=415) + except ValueError as e: + print(e) + return web.json_response({'error': "Wrong input type", 'status': 422}, status=422) + + except ValidationError as e: + print(e) + return web.json_response({'error': "Input invalid", 'status': 422}, status=422) + + def format_translate(self, ctx: Context, return_image: bool): + text_regions = ctx.text_regions + inpaint = ctx.img_inpainted + results = [] + if 'overlay_ext' in ctx: + overlay_ext = ctx['overlay_ext'] + else: + overlay_ext = 'jpg' + for i, blk in enumerate(text_regions): + minX, minY, maxX, maxY = blk.xyxy + if 'translations' in ctx: + trans = {key: value[i] for key, value in ctx['translations'].items()} + else: + trans = {} + trans["originalText"] = text_regions[i].text + if inpaint is not None: + overlay = inpaint[minY:maxY, minX:maxX] + + retval, buffer = cv2.imencode('.' + overlay_ext, overlay) + jpg_as_text = base64.b64encode(buffer) + background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8") + else: + background = None + text_region = text_regions[i] + text_region.adjust_bg_color = False + color1, color2 = text_region.get_font_colors() + + results.append({ + 'text': trans, + 'minX': int(minX), + 'minY': int(minY), + 'maxX': int(maxX), + 'maxY': int(maxY), + 'textColor': { + 'fg': color1.tolist(), + 'bg': color2.tolist() + }, + 'language': text_regions[i].source_lang, + 'background': background + }) + if return_image and ctx.img_colorized is not None: + retval, buffer = cv2.imencode('.' + overlay_ext, np.array(ctx.img_colorized)) + jpg_as_text = base64.b64encode(buffer) + img = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8") + else: + img = None + return web.json_response({'details': results, 'img': img}) + + class PostSchema(Schema): + target_lang = fields.Str(required=False, validate=lambda a: a.upper() in VALID_LANGUAGES) + detector = fields.Str(required=False, validate=lambda a: a.lower() in DETECTORS) + ocr = fields.Str(required=False, validate=lambda a: a.lower() in OCRS) + inpainter = fields.Str(required=False, validate=lambda a: a.lower() in INPAINTERS) + upscaler = fields.Str(required=False, validate=lambda a: a.lower() in UPSCALERS) + translator = fields.Str(required=False, validate=lambda a: a.lower() in TRANSLATORS) + direction = fields.Str(required=False, validate=lambda a: a.lower() in {'auto', 'h', 'v'}) + skip_language = fields.Str(required=False) + upscale_ratio = fields.Integer(required=False) + translator_chain = fields.Str(required=False) + selective_translation = fields.Str(required=False) + attempts = fields.Integer(required=False) + detection_size = fields.Integer(required=False) + text_threshold = fields.Float(required=False) + box_threshold = fields.Float(required=False) + unclip_ratio = fields.Float(required=False) + inpainting_size = fields.Integer(required=False) + det_rotate = fields.Bool(required=False) + det_auto_rotate = fields.Bool(required=False) + det_invert = fields.Bool(required=False) + det_gamma_correct = fields.Bool(required=False) + min_text_length = fields.Integer(required=False) + colorization_size = fields.Integer(required=False) + denoise_sigma = fields.Integer(required=False) + mask_dilation_offset = fields.Integer(required=False) + ignore_bubble = fields.Integer(required=False) + gpt_config = fields.String(required=False) + filter_text = fields.String(required=False) + + # api specific + overlay_ext = fields.Str(required=False) + base64Images = fields.Raw(required=False) + image = fields.Raw(required=False) + url = fields.Raw(required=False) + + # no functionality except preventing errors when given + fingerprint = fields.Raw(required=False) + clientUuid = fields.Raw(required=False) \ No newline at end of file diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py new file mode 100644 index 000000000..05e87b41b --- /dev/null +++ b/manga_translator/mode/local.py @@ -0,0 +1,201 @@ +import os +from typing import Union, List + +from PIL import Image + +from manga_translator import MangaTranslator, logger, Context, TranslationInterrupt +from ..save import save_result +from ..translators import ( + LanguageUnsupportedException, + dispatch as dispatch_translation, +) +from ..utils import natural_sort, replace_prefix, get_color_name, rgb2hex + + +class MangaTranslatorLocal(MangaTranslator): + async def translate_path(self, path: str, dest: str = None, params: dict[str, Union[int, str]] = None): + """ + Translates an image or folder (recursively) specified through the path. + """ + if not os.path.exists(path): + raise FileNotFoundError(path) + path = os.path.abspath(os.path.expanduser(path)) + dest = os.path.abspath(os.path.expanduser(dest)) if dest else '' + params = params or {} + + # Handle format + file_ext = params.get('format') + if params.get('save_quality', 100) < 100: + if not params.get('format'): + file_ext = 'jpg' + elif params.get('format') != 'jpg': + raise ValueError('--save-quality of lower than 100 is only supported for .jpg files') + + if os.path.isfile(path): + # Determine destination file path + if not dest: + # Use the same folder as the source + p, ext = os.path.splitext(path) + _dest = f'{p}-translated.{file_ext or ext[1:]}' + elif not os.path.basename(dest): + p, ext = os.path.splitext(os.path.basename(path)) + # If the folders differ use the original filename from the source + if os.path.dirname(path) != dest: + _dest = os.path.join(dest, f'{p}.{file_ext or ext[1:]}') + else: + _dest = os.path.join(dest, f'{p}-translated.{file_ext or ext[1:]}') + else: + p, ext = os.path.splitext(dest) + _dest = f'{p}.{file_ext or ext[1:]}' + await self.translate_file(path, _dest, params) + + elif os.path.isdir(path): + # Determine destination folder path + if path[-1] == '\\' or path[-1] == '/': + path = path[:-1] + _dest = dest or path + '-translated' + if os.path.exists(_dest) and not os.path.isdir(_dest): + raise FileExistsError(_dest) + + translated_count = 0 + for root, subdirs, files in os.walk(path): + files = natural_sort(files) + dest_root = replace_prefix(root, path, _dest) + os.makedirs(dest_root, exist_ok=True) + for f in files: + if f.lower() == '.thumb': + continue + + file_path = os.path.join(root, f) + output_dest = replace_prefix(file_path, path, _dest) + p, ext = os.path.splitext(output_dest) + output_dest = f'{p}.{file_ext or ext[1:]}' + + if await self.translate_file(file_path, output_dest, params): + translated_count += 1 + if translated_count == 0: + logger.info('No further untranslated files found. Use --overwrite to write over existing translations.') + else: + logger.info(f'Done. Translated {translated_count} image{"" if translated_count == 1 else "s"}') + + async def translate_file(self, path: str, dest: str, params: dict): + if not params.get('overwrite') and os.path.exists(dest): + logger.info( + f'Skipping as already translated: "{dest}". Use --overwrite to overwrite existing translations.') + await self._report_progress('saved', True) + return True + + logger.info(f'Translating: "{path}"') + + # Turn dict to context to make values also accessible through params. + params = params or {} + ctx = Context(**params) + self._preprocess_params(ctx) + + attempts = 0 + while ctx.attempts == -1 or attempts < ctx.attempts + 1: + if attempts > 0: + logger.info(f'Retrying translation! Attempt {attempts}' + + (f' of {ctx.attempts}' if ctx.attempts != -1 else '')) + try: + return await self._translate_file(path, dest, ctx) + + except TranslationInterrupt: + break + except Exception as e: + if isinstance(e, LanguageUnsupportedException): + await self._report_progress('error-lang', True) + else: + await self._report_progress('error', True) + if not self.ignore_errors and not (ctx.attempts == -1 or attempts < ctx.attempts): + raise + else: + logger.error(f'{e.__class__.__name__}: {e}', + exc_info=e if self.verbose else None) + attempts += 1 + return False + + async def _translate_file(self, path: str, dest: str, ctx: Context) -> bool: + if path.endswith('.txt'): + with open(path, 'r') as f: + queries = f.read().split('\n') + translated_sentences = \ + await dispatch_translation(ctx.translator, queries, ctx.use_mtpe, ctx, + 'cpu' if self._gpu_limited_memory else self.device) + p, ext = os.path.splitext(dest) + if ext != '.txt': + dest = p + '.txt' + logger.info(f'Saving "{dest}"') + with open(dest, 'w') as f: + f.write('\n'.join(translated_sentences)) + return True + + # TODO: Add .gif handler + + else: # Treat as image + try: + img = Image.open(path) + img.verify() + img = Image.open(path) + except Exception: + logger.warn(f'Failed to open image: {path}') + return False + + ctx = await self.translate(img, ctx) + result = ctx.result + + # Save result + if ctx.skip_no_text and not ctx.text_regions: + logger.debug('Not saving due to --skip-no-text') + return True + if result: + logger.info(f'Saving "{dest}"') + save_result(result, dest, ctx) + await self._report_progress('saved', True) + + if ctx.save_text or ctx.save_text_file or ctx.prep_manual: + if ctx.prep_manual: + # Save original image next to translated + p, ext = os.path.splitext(dest) + img_filename = p + '-orig' + ext + img_path = os.path.join(os.path.dirname(dest), img_filename) + img.save(img_path, quality=ctx.save_quality) + if ctx.text_regions: + self._save_text_to_file(path, ctx) + return True + return False + + def _save_text_to_file(self, image_path: str, ctx: Context): + cached_colors = [] + + def identify_colors(fg_rgb: List[int]): + idx = 0 + for rgb, _ in cached_colors: + # If similar color already saved + if abs(rgb[0] - fg_rgb[0]) + abs(rgb[1] - fg_rgb[1]) + abs(rgb[2] - fg_rgb[2]) < 50: + break + else: + idx += 1 + else: + cached_colors.append((fg_rgb, get_color_name(fg_rgb))) + return idx + 1, cached_colors[idx][1] + + s = f'\n[{image_path}]\n' + for i, region in enumerate(ctx.text_regions): + fore, back = region.get_font_colors() + color_id, color_name = identify_colors(fore) + + s += f'\n-- {i + 1} --\n' + s += f'color: #{color_id}: {color_name} (fg, bg: {rgb2hex(*fore)} {rgb2hex(*back)})\n' + s += f'text: {region.text}\n' + s += f'trans: {region.translation}\n' + for line in region.lines: + s += f'coords: {list(line.ravel())}\n' + s += '\n' + + text_output_file = ctx.text_output_file + if not text_output_file: + text_output_file = os.path.splitext(image_path)[0] + '_translations.txt' + + with open(text_output_file, 'a', encoding='utf-8') as f: + f.write(s) \ No newline at end of file diff --git a/manga_translator/mode/web.py b/manga_translator/mode/web.py new file mode 100644 index 000000000..2acc9faf7 --- /dev/null +++ b/manga_translator/mode/web.py @@ -0,0 +1,151 @@ +import asyncio +import time + +import requests + +from manga_translator import MangaTranslator, logger, Context +from manga_translator.translators import TRANSLATORS +from manga_translator.utils import add_file_logger, remove_file_logger + + +class MangaTranslatorWeb(MangaTranslator): + """ + Translator client that executes tasks on behalf of the webserver in web_main.py. + """ + + def __init__(self, params: dict = None): + super().__init__(params) + self.host = params.get('host', '127.0.0.1') + if self.host == '0.0.0.0': + self.host = '127.0.0.1' + self.port = params.get('port', 5003) + self.nonce = params.get('nonce', '') + self.ignore_errors = params.get('ignore_errors', True) + self._task_id = None + self._params = None + + async def _init_connection(self): + available_translators = [] + from ..translators import MissingAPIKeyException, get_translator + for key in TRANSLATORS: + try: + get_translator(key) + available_translators.append(key) + except MissingAPIKeyException: + pass + + data = { + 'nonce': self.nonce, + 'capabilities': { + 'translators': available_translators, + }, + } + requests.post(f'http://{self.host}:{self.port}/connect-internal', json=data) + + async def _send_state(self, state: str, finished: bool): + # wait for translation to be saved first (bad solution?) + finished = finished and not state == 'finished' + while True: + try: + data = { + 'task_id': self._task_id, + 'nonce': self.nonce, + 'state': state, + 'finished': finished, + } + requests.post(f'http://{self.host}:{self.port}/task-update-internal', json=data, timeout=20) + break + except Exception: + # if translation is finished server has to know + if finished: + continue + else: + break + + def _get_task(self): + try: + rjson = requests.get(f'http://{self.host}:{self.port}/task-internal?nonce={self.nonce}', + timeout=3600).json() + return rjson.get('task_id'), rjson.get('data') + except Exception: + return None, None + + async def listen(self, translation_params: dict = None): + """ + Listens for translation tasks from web server. + """ + logger.info('Waiting for translation tasks') + + await self._init_connection() + self.add_progress_hook(self._send_state) + + while True: + self._task_id, self._params = self._get_task() + if self._params and 'exit' in self._params: + break + if not (self._task_id and self._params): + await asyncio.sleep(0.1) + continue + + self.result_sub_folder = self._task_id + logger.info(f'Processing task {self._task_id}') + if translation_params is not None: + # Combine default params with params chosen by webserver + for p, default_value in translation_params.items(): + current_value = self._params.get(p) + self._params[p] = current_value if current_value is not None else default_value + if self.verbose: + # Write log file + log_file = self._result_path('log.txt') + add_file_logger(log_file) + + # final.png will be renamed if format param is set + await self.translate_path(self._result_path('input.png'), self._result_path('final.png'), + params=self._params) + print() + + if self.verbose: + remove_file_logger(log_file) + self._task_id = None + self._params = None + self.result_sub_folder = '' + + async def _run_text_translation(self, ctx: Context): + # Run machine translation as reference for manual translation (if `--translator=none` is not set) + text_regions = await super()._run_text_translation(ctx) + + if ctx.get('manual', False): + logger.info('Waiting for user input from manual translation') + requests.post(f'http://{self.host}:{self.port}/request-manual-internal', json={ + 'task_id': self._task_id, + 'nonce': self.nonce, + 'texts': [r.text for r in text_regions], + 'translations': [r.translation for r in text_regions], + }, timeout=20) + + # wait for at most 1 hour for manual translation + wait_until = time.time() + 3600 + while time.time() < wait_until: + ret = requests.post(f'http://{self.host}:{self.port}/get-manual-result-internal', json={ + 'task_id': self._task_id, + 'nonce': self.nonce + }, timeout=20).json() + if 'result' in ret: + manual_translations = ret['result'] + if isinstance(manual_translations, str): + if manual_translations == 'error': + return [] + i = 0 + for translation in manual_translations: + if not translation.strip(): + text_regions.pop(i) + i = i - 1 + else: + text_regions[i].translation = translation + text_regions[i].target_lang = ctx.translator.langs[-1] + i = i + 1 + break + elif 'cancel' in ret: + return 'cancel' + await asyncio.sleep(0.1) + return text_regions diff --git a/manga_translator/mode/ws.py b/manga_translator/mode/ws.py new file mode 100644 index 000000000..d696058f8 --- /dev/null +++ b/manga_translator/mode/ws.py @@ -0,0 +1,264 @@ +import asyncio +import logging +import os +from typing import Tuple + +import cv2 +import numpy as np +from PIL import Image + +from manga_translator import logger, Context, MangaTranslator +from manga_translator.utils import PriorityLock, Throttler + + +class MangaTranslatorWS(MangaTranslator): + def __init__(self, params: dict = None): + super().__init__(params) + self.url = params.get('ws_url') + self.secret = params.get('ws_secret', os.getenv('WS_SECRET', '')) + self.ignore_errors = params.get('ignore_errors', True) + + self._task_id = None + self._websocket = None + + async def listen(self, translation_params: dict = None): + from threading import Thread + import io + import aioshutil + from aiofiles import os + import websockets + from ..server import ws_pb2 + + self._server_loop = asyncio.new_event_loop() + self.task_lock = PriorityLock() + self.counter = 0 + + async def _send_and_yield(websocket, msg): + # send message and yield control to the event loop (to actually send the message) + await websocket.send(msg) + await asyncio.sleep(0) + + send_throttler = Throttler(0.2) + send_and_yield = send_throttler.wrap(_send_and_yield) + + async def sync_state(state, finished): + if self._websocket is None: + return + msg = ws_pb2.WebSocketMessage() + msg.status.id = self._task_id + msg.status.status = state + self._server_loop.call_soon_threadsafe( + asyncio.create_task, + send_and_yield(self._websocket, msg.SerializeToString()) + ) + + self.add_progress_hook(sync_state) + + async def translate(task_id, websocket, image, params): + async with self.task_lock((1 << 31) - params['ws_count']): + self._task_id = task_id + self._websocket = websocket + result = await self.translate(image, params) + self._task_id = None + self._websocket = None + return result + + async def server_send_status(websocket, task_id, status): + msg = ws_pb2.WebSocketMessage() + msg.status.id = task_id + msg.status.status = status + await websocket.send(msg.SerializeToString()) + await asyncio.sleep(0) + + async def server_process_inner(main_loop, logger_task, session, websocket, task) -> Tuple[bool, bool]: + logger_task.info(f'-- Processing task {task.id}') + await server_send_status(websocket, task.id, 'pending') + + if self.verbose: + await aioshutil.rmtree(f'result/{task.id}', ignore_errors=True) + await os.makedirs(f'result/{task.id}', exist_ok=True) + + params = { + 'target_lang': task.target_language, + 'skip_lang': task.skip_language, + 'detector': task.detector, + 'direction': task.direction, + 'translator': task.translator, + 'size': task.size, + 'ws_event_loop': asyncio.get_event_loop(), + 'ws_count': self.counter, + } + self.counter += 1 + + logger_task.info(f'-- Downloading image from {task.source_image}') + await server_send_status(websocket, task.id, 'downloading') + async with session.get(task.source_image) as resp: + if resp.status == 200: + source_image = await resp.read() + else: + msg = ws_pb2.WebSocketMessage() + msg.status.id = task.id + msg.status.status = 'error-download' + await websocket.send(msg.SerializeToString()) + await asyncio.sleep(0) + return False, False + + logger_task.info(f'-- Translating image') + if translation_params: + for p, default_value in translation_params.items(): + current_value = params.get(p) + params[p] = current_value if current_value is not None else default_value + + image = Image.open(io.BytesIO(source_image)) + + (ori_w, ori_h) = image.size + if max(ori_h, ori_w) > 1200: + params['upscale_ratio'] = 1 + + await server_send_status(websocket, task.id, 'preparing') + # translation_dict = await self.translate(image, params) + translation_dict = await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe( + translate(task.id, websocket, image, params), + main_loop + ) + ) + await send_throttler.flush() + + output: Image.Image = translation_dict.result + if output is not None: + await server_send_status(websocket, task.id, 'saving') + + output = output.resize((ori_w, ori_h), resample=Image.LANCZOS) + + img = io.BytesIO() + output.save(img, format='PNG') + if self.verbose: + output.save(self._result_path('ws_final.png')) + + img_bytes = img.getvalue() + logger_task.info(f'-- Uploading result to {task.translation_mask}') + await server_send_status(websocket, task.id, 'uploading') + async with session.put(task.translation_mask, data=img_bytes) as resp: + if resp.status != 200: + logger_task.error(f'-- Failed to upload result:') + logger_task.error(f'{resp.status}: {resp.reason}') + msg = ws_pb2.WebSocketMessage() + msg.status.id = task.id + msg.status.status = 'error-upload' + await websocket.send(msg.SerializeToString()) + await asyncio.sleep(0) + return False, False + + return True, output is not None + + async def server_process(main_loop, session, websocket, task) -> bool: + logger_task = logger.getChild(f'{task.id}') + try: + (success, has_translation_mask) = await server_process_inner(main_loop, logger_task, session, websocket, + task) + except Exception as e: + logger_task.error(f'-- Task failed with exception:') + logger_task.error(f'{e.__class__.__name__}: {e}', exc_info=e if self.verbose else None) + (success, has_translation_mask) = False, False + finally: + result = ws_pb2.WebSocketMessage() + result.finish_task.id = task.id + result.finish_task.success = success + result.finish_task.has_translation_mask = has_translation_mask + await websocket.send(result.SerializeToString()) + await asyncio.sleep(0) + logger_task.info(f'-- Task finished') + + async def async_server_thread(main_loop): + from aiohttp import ClientSession, ClientTimeout + timeout = ClientTimeout(total=30) + async with ClientSession(timeout=timeout) as session: + logger_conn = logger.getChild('connection') + if self.verbose: + logger_conn.setLevel(logging.DEBUG) + async for websocket in websockets.connect( + self.url, + extra_headers={ + 'x-secret': self.secret, + }, + max_size=1_000_000, + logger=logger_conn + ): + bg_tasks = set() + try: + logger.info('-- Connected to websocket server') + + async for raw in websocket: + # logger.info(f'Got message: {raw}') + msg = ws_pb2.WebSocketMessage() + msg.ParseFromString(raw) + if msg.WhichOneof('message') == 'new_task': + task = msg.new_task + bg_task = asyncio.create_task(server_process(main_loop, session, websocket, task)) + bg_tasks.add(bg_task) + bg_task.add_done_callback(bg_tasks.discard) + + except Exception as e: + logger.error(f'{e.__class__.__name__}: {e}', exc_info=e if self.verbose else None) + + finally: + logger.info('-- Disconnected from websocket server') + for bg_task in bg_tasks: + bg_task.cancel() + + def server_thread(future, main_loop, server_loop): + asyncio.set_event_loop(server_loop) + try: + server_loop.run_until_complete(async_server_thread(main_loop)) + finally: + future.set_result(None) + + future = asyncio.Future() + Thread( + target=server_thread, + args=(future, asyncio.get_running_loop(), self._server_loop), + daemon=True + ).start() + + # create a future that is never done + await future + + async def _run_text_translation(self, ctx: Context): + coroutine = super()._run_text_translation(ctx) + if ctx.translator.has_offline(): + return await coroutine + else: + task_id = self._task_id + websocket = self._websocket + await self.task_lock.release() + result = await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe( + coroutine, + ctx.ws_event_loop + ) + ) + await self.task_lock.acquire((1 << 30) - ctx.ws_count) + self._task_id = task_id + self._websocket = websocket + return result + + async def _run_text_rendering(self, ctx: Context): + render_mask = (ctx.mask >= 127).astype(np.uint8)[:, :, None] + + output = await super()._run_text_rendering(ctx) + render_mask[np.sum(ctx.img_rgb != output, axis=2) > 0] = 1 + ctx.render_mask = render_mask + if self.verbose: + cv2.imwrite(self._result_path('ws_render_in.png'), cv2.cvtColor(ctx.img_rgb, cv2.COLOR_RGB2BGR)) + cv2.imwrite(self._result_path('ws_render_out.png'), cv2.cvtColor(output, cv2.COLOR_RGB2BGR)) + cv2.imwrite(self._result_path('ws_mask.png'), render_mask * 255) + + # only keep sections in mask + if self.verbose: + cv2.imwrite(self._result_path('ws_inmask.png'), cv2.cvtColor(ctx.img_rgb, cv2.COLOR_RGB2BGRA) * render_mask) + output = cv2.cvtColor(output, cv2.COLOR_RGB2RGBA) * render_mask + if self.verbose: + cv2.imwrite(self._result_path('ws_output.png'), cv2.cvtColor(output, cv2.COLOR_RGBA2BGRA) * render_mask) + + return output From ed2352adfa2ad56fb1935902a3bb0d65d0e79da8 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sun, 17 Nov 2024 00:45:42 +0100 Subject: [PATCH 05/75] rename batch, demo to local --- manga_translator/__main__.py | 9 ++++----- manga_translator/args.py | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py index c0e01e508..8d06b9fc6 100644 --- a/manga_translator/__main__.py +++ b/manga_translator/__main__.py @@ -23,18 +23,17 @@ async def dispatch(args: Namespace): logger.info(f'Running in {args.mode} mode') - if args.mode in ('demo', 'batch'): + if args.mode == 'local': if not args.input: raise Exception('No input image was supplied. Use -i ') - translator = MangaTranslator(args_dict) + from manga_translator.mode.local import MangaTranslatorLocal + translator = MangaTranslatorLocal(args_dict) # Load pre-translation and post-translation dictionaries pre_dict = translator.load_dictionary(args.pre_dict) post_dict = translator.load_dictionary(args.post_dict) - if args.mode == 'demo': - if len(args.input) != 1 or not os.path.isfile(args.input[0]): - raise FileNotFoundError(f'Invalid single image file path for demo mode: "{" ".join(args.input)}". Use `-m batch`.') + if len(args.input) == 1 and os.path.isfile(args.input[0]): dest = os.path.join(BASE_PATH, 'result/final.png') args.overwrite = True # Do overwrite result/final.png file diff --git a/manga_translator/args.py b/manga_translator/args.py index 0ac6a65fa..2eb72c91d 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -101,7 +101,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str: subparsers = parser.add_subparsers(dest='mode', required=True, help='Mode of operation') # Batch mode -parser_batch = subparsers.add_parser('batch', help='Run in batch translation mode') +parser_batch = subparsers.add_parser('local', help='Run in batch translation mode') parser_batch.add_argument('-i', '--input', required=True, type=dir_path, help='Path to an image folder') parser_batch.add_argument('-o', '--dest', default='', type=str, help='Path to the destination folder for translated images') parser_batch.add_argument('-f', '--format', default=None, choices=OUTPUT_FORMATS, help='Output format of the translation.') From b7c8ace4363141ac8c5769cfd2e37180c8541d52 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sun, 17 Nov 2024 01:28:57 +0100 Subject: [PATCH 06/75] use config in manga_translator --- manga_translator/config.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/manga_translator/config.py b/manga_translator/config.py index 30c44193c..ee78ddefa 100644 --- a/manga_translator/config.py +++ b/manga_translator/config.py @@ -6,6 +6,7 @@ class Renderer(IntEnum): default = 0 manga2Eng = 1 + none = 2 class Alignment(IntEnum): auto = 0 @@ -100,7 +101,7 @@ class RenderConfig(BaseModel): """If renderer should be splitting up words using a hyphen character (-)""" font_color: Optional[str] = None """Overwrite the text fg/bg color detected by the OCR model. Use hex string without the "#" such as FFFFFF for a white foreground or FFFFFF:000000 to also have a black background around the text.""" - line_spacing: Optional[float] = None + line_spacing: Optional[int] = None """Line spacing is font_size * this value. Default is 0.01 for horizontal text and 0.2 for vertical.""" font_size: Optional[int] = None """Use fixed font size for rendering""" @@ -111,7 +112,7 @@ class UpscaleConfig(BaseModel): """Upscaler to use. --upscale-ratio has to be set for it to take effect""" revert_upscaling: bool = False """Downscales the previously upscaled image after translation back to original size (Use with --upscale-ratio).""" - upscale_ratio: Optional[float] = None + upscale_ratio: Optional[int] = None """Image upscale ratio applied before detection. Can improve text detection.""" class TranslatorConfig(BaseModel): @@ -148,6 +149,10 @@ class DetectorConfig(BaseModel): """Applies gamma correction for detection. Might improve detection.""" ignore_bubble: int = 0 """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles""" + box_threshold: float = 0.7 + """Threshold for bbox generation""" + unclip_ratio: float = 2.3 + """How much to extend text skeleton to form bounding box""" class InpainterConfig(BaseModel): inpainter: Inpainter = Inpainter.lama_large @@ -199,11 +204,7 @@ class Config(BaseModel): ocr: OcrConfig """Ocr configs""" # ? - unclip_ratio: float = 2.3 - """How much to extend text skeleton to form bounding box""" kernel_size: int = 3 """Set the convolution kernel size of the text erasure area to completely clean up text residues""" mask_dilation_offset: int = 0 """By how much to extend the text mask to remove left-over text pixels of the original image.""" - box_threshold: float = 0.7 - """Threshold for bbox generation""" From 36a1a14d85551f66d3eebcdb9e57bc6032e7cf80 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sun, 17 Nov 2024 01:29:10 +0100 Subject: [PATCH 07/75] use config in manga_translator --- manga_translator/manga_translator.py | 228 +++++++++++++-------------- 1 file changed, 114 insertions(+), 114 deletions(-) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index cb17850e2..0f85e7a4d 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -8,10 +8,10 @@ import logging import numpy as np from PIL import Image -from typing import Union - +from typing import Optional, Any from .args import DEFAULT_ARGS +from .config import Config, Colorizer, Detector, Translator, Renderer from .utils import ( BASE_PATH, LANGUAGE_ORIENTATION_PRESETS, @@ -58,8 +58,23 @@ class TranslationInterrupt(Exception): class MangaTranslator: + verbose: bool + ignore_errors: bool + _gpu_limited_memory: bool + device: Optional[str] + kernel_size: Optional[int] + _progress_hooks: list[Any] + result_sub_folder: str def __init__(self, params: dict = None): + self.font_path = None + self.use_mtpe = False + self.kernel_size = None + self.device = None + self._gpu_limited_memory = False + self.ignore_errors = False + self.verbose = False + self._progress_hooks = [] self._add_logger_hook() @@ -76,6 +91,9 @@ def __init__(self, params: dict = None): def parse_init_params(self, params: dict): self.verbose = params.get('verbose', False) + self.use_mtpe = params.get('use_mtpe', False) + self.font_path = params.get('font_path', None) + self.ignore_errors = params.get('ignore_errors', False) # check mps for apple silicon or cuda for nvidia device = 'mps' if torch.backends.mps.is_available() else 'cuda' @@ -85,7 +103,7 @@ def parse_init_params(self, params: dict): self.device = device if self.using_gpu and ( not torch.cuda.is_available() and not torch.backends.mps.is_available()): raise Exception( - 'CUDA or Metal compatible device could not be found in torch whilst --use-gpu args was set.\n' \ + 'CUDA or Metal compatible device could not be found in torch whilst --use-gpu args was set.\n' 'Is the correct pytorch version installed? (See https://pytorch.org/)') if params.get('model_dir'): ModelWrapper._MODEL_DIR = params.get('model_dir') @@ -96,7 +114,7 @@ def parse_init_params(self, params: dict): def using_gpu(self): return self.device.startswith('cuda') or self.device == 'mps' - async def translate(self, image: Image.Image, params: Union[dict, Context] = None) -> Context: + async def translate(self, image: Image.Image, config: Config) -> Context: """ Translates a PIL image from a manga. Returns dict with result and intermediates of translation. Default params are taken from args.py. @@ -108,28 +126,29 @@ async def translate(self, image: Image.Image, params: Union[dict, Context] = Non """ # TODO: Take list of images to speed up batch processing - if not isinstance(params, Context): - params = params or {} - ctx = Context(**params) - self._preprocess_params(ctx) - else: - ctx = params + ctx = Context() ctx.input = image ctx.result = None # preload and download models (not strictly necessary, remove to lazy load) logger.info('Loading models') - if ctx.upscale_ratio: - await prepare_upscaling(ctx.upscaler) - await prepare_detection(ctx.detector) - await prepare_ocr(ctx.ocr, self.device) - await prepare_inpainting(ctx.inpainter, self.device) - await prepare_translation(ctx.translator) - if ctx.colorizer: - await prepare_colorization(ctx.colorizer) + if config.upscale.upscale_ratio: + # todo: fix + await prepare_upscaling(config.upscale.upscaler) + # todo: fix + await prepare_detection(config.detector.detector) + # todo: fix + await prepare_ocr(config.ocr.ocr, self.device) + # todo: fix + await prepare_inpainting(config.inpainter.inpainter, self.device) + # todo: fix + await prepare_translation(config.translator.translator) + if config.colorizer.colorizer != Colorizer.none: + #todo: fix + await prepare_colorization(config.colorizer.colorizer) # translate - return await self._translate(ctx) + return await self._translate(config, ctx) def load_dictionary(self, file_path): dictionary = [] @@ -161,31 +180,12 @@ def apply_dictionary(self, text, dictionary): return text def _preprocess_params(self, ctx: Context): + # todo: fix # params auto completion # TODO: Move args into ctx.args and only calculate once, or just copy into ctx for arg in DEFAULT_ARGS: ctx.setdefault(arg, DEFAULT_ARGS[arg]) - if 'direction' not in ctx: - if ctx.force_horizontal: - ctx.direction = 'h' - elif ctx.force_vertical: - ctx.direction = 'v' - else: - ctx.direction = 'auto' - if 'alignment' not in ctx: - if ctx.align_left: - ctx.alignment = 'left' - elif ctx.align_center: - ctx.alignment = 'center' - elif ctx.align_right: - ctx.alignment = 'right' - else: - ctx.alignment = 'auto' - if ctx.prep_manual: - ctx.renderer = 'none' - ctx.setdefault('renderer', 'manga2eng' if ctx.manga2eng else 'default') - if ctx.selective_translation is not None: ctx.selective_translation.target_lang = ctx.target_lang ctx.translator = ctx.selective_translation @@ -208,21 +208,21 @@ def _preprocess_params(self, ctx: Context): except: raise Exception(f'Invalid --font-color value: {ctx.font_color}. Use a hex value such as FF0000') - async def _translate(self, ctx: Context) -> Context: + async def _translate(self, config: Config, ctx: Context) -> Context: # -- Colorization - if ctx.colorizer: + if config.colorizer.colorizer != Colorizer.none: await self._report_progress('colorizing') - ctx.img_colorized = await self._run_colorizer(ctx) + ctx.img_colorized = await self._run_colorizer(config, ctx) else: ctx.img_colorized = ctx.input # -- Upscaling # The default text detector doesn't work very well on smaller images, might want to # consider adding automatic upscaling on certain kinds of small images. - if ctx.upscale_ratio: + if config.upscale.upscale_ratio: await self._report_progress('upscaling') - ctx.upscaled = await self._run_upscaling(ctx) + ctx.upscaled = await self._run_upscaling(config, ctx) else: ctx.upscaled = ctx.img_colorized @@ -230,7 +230,7 @@ async def _translate(self, ctx: Context) -> Context: # -- Detection await self._report_progress('detection') - ctx.textlines, ctx.mask_raw, ctx.mask = await self._run_detection(ctx) + ctx.textlines, ctx.mask_raw, ctx.mask = await self._run_detection(config, ctx) if self.verbose: cv2.imwrite(self._result_path('mask_raw.png'), ctx.mask_raw) @@ -238,7 +238,7 @@ async def _translate(self, ctx: Context) -> Context: await self._report_progress('skip-no-regions', True) # If no text was found result is intermediate image product ctx.result = ctx.upscaled - return await self._revert_upscale(ctx) + return await self._revert_upscale(config, ctx) if self.verbose: img_bbox_raw = np.copy(ctx.img_rgb) @@ -248,11 +248,11 @@ async def _translate(self, ctx: Context) -> Context: # -- OCR await self._report_progress('ocr') - ctx.textlines = await self._run_ocr(ctx) + ctx.textlines = await self._run_ocr(config, ctx) - if ctx.skip_lang is not None : + if config.translator.skip_lang is not None : filtered_textlines = [] - skip_langs = ctx.skip_lang.split(',') + skip_langs = config.translator.skip_lang.split(',') for txtln in ctx.textlines : try : source_language = LANGDETECT_MAP.get(langdetect.detect(txtln.text), 'UNKNOWN') @@ -266,10 +266,10 @@ async def _translate(self, ctx: Context) -> Context: await self._report_progress('skip-no-text', True) # If no text was found result is intermediate image product ctx.result = ctx.upscaled - return await self._revert_upscale(ctx) + return await self._revert_upscale(config, ctx) # Apply pre-dictionary after OCR - pre_dict = self.load_dictionary(ctx.pre_dict) + pre_dict = self.load_dictionary(config.pre_dict) pre_replacements = [] for textline in ctx.textlines: original = textline.text @@ -286,7 +286,7 @@ async def _translate(self, ctx: Context) -> Context: # -- Textline merge await self._report_progress('textline_merge') - ctx.text_regions = await self._run_textline_merge(ctx) + ctx.text_regions = await self._run_textline_merge(config, ctx) if self.verbose: bboxes = visualize_textblocks(cv2.cvtColor(ctx.img_rgb, cv2.COLOR_BGR2RGB), ctx.text_regions) @@ -294,34 +294,34 @@ async def _translate(self, ctx: Context) -> Context: # -- Translation await self._report_progress('translating') - ctx.text_regions = await self._run_text_translation(ctx) + ctx.text_regions = await self._run_text_translation(config, ctx) await self._report_progress('after-translating') if not ctx.text_regions: await self._report_progress('error-translating', True) ctx.result = ctx.upscaled - return await self._revert_upscale(ctx) + return await self._revert_upscale(config, ctx) elif ctx.text_regions == 'cancel': await self._report_progress('cancelled', True) ctx.result = ctx.upscaled - return await self._revert_upscale(ctx) + return await self._revert_upscale(config, ctx) # -- Mask refinement # (Delayed to take advantage of the region filtering done after ocr and translation) if ctx.mask is None: await self._report_progress('mask-generation') - ctx.mask = await self._run_mask_refinement(ctx) + ctx.mask = await self._run_mask_refinement(config, ctx) if self.verbose: - inpaint_input_img = await dispatch_inpainting('none', ctx.img_rgb, ctx.mask, ctx.inpainting_size, + inpaint_input_img = await dispatch_inpainting('none', ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size, self.using_gpu, self.verbose) cv2.imwrite(self._result_path('inpaint_input.png'), cv2.cvtColor(inpaint_input_img, cv2.COLOR_RGB2BGR)) cv2.imwrite(self._result_path('mask_final.png'), ctx.mask) # -- Inpainting await self._report_progress('inpainting') - ctx.img_inpainted = await self._run_inpainting(ctx) + ctx.img_inpainted = await self._run_inpainting(config, ctx) ctx.gimp_mask = np.dstack((cv2.cvtColor(ctx.img_inpainted, cv2.COLOR_RGB2BGR), ctx.mask)) @@ -330,37 +330,37 @@ async def _translate(self, ctx: Context) -> Context: # -- Rendering await self._report_progress('rendering') - ctx.img_rendered = await self._run_text_rendering(ctx) + ctx.img_rendered = await self._run_text_rendering(config, ctx) await self._report_progress('finished', True) ctx.result = dump_image(ctx.input, ctx.img_rendered, ctx.img_alpha) - return await self._revert_upscale(ctx) + return await self._revert_upscale(config, ctx) # If `revert_upscaling` is True, revert to input size # Else leave `ctx` as-is - async def _revert_upscale(self, ctx: Context): - if ctx.revert_upscaling: + async def _revert_upscale(self, config: Config, ctx: Context): + if config.upscale.revert_upscaling: await self._report_progress('downscaling') ctx.result = ctx.result.resize(ctx.input.size) return ctx - async def _run_colorizer(self, ctx: Context): - return await dispatch_colorization(ctx.colorizer, device=self.device, image=ctx.input, **ctx) + async def _run_colorizer(self, config: Config, ctx: Context): + return await dispatch_colorization(config.colorizer.colorizer, device=self.device, image=ctx.input, **ctx) - async def _run_upscaling(self, ctx: Context): - return (await dispatch_upscaling(ctx.upscaler, [ctx.img_colorized], ctx.upscale_ratio, self.device))[0] + async def _run_upscaling(self, config: Config, ctx: Context): + return (await dispatch_upscaling(config.upscale.upscaler, [ctx.img_colorized], config.upscale.upscale_ratio, self.device))[0] - async def _run_detection(self, ctx: Context): - return await dispatch_detection(ctx.detector, ctx.img_rgb, ctx.detection_size, ctx.text_threshold, - ctx.box_threshold, - ctx.unclip_ratio, ctx.det_invert, ctx.det_gamma_correct, ctx.det_rotate, - ctx.det_auto_rotate, + async def _run_detection(self, config: Config, ctx: Context): + return await dispatch_detection(config.detector.detector, ctx.img_rgb, config.detector.detection_size, config.detector.text_threshold, + config.detector.box_threshold, + config.detector.unclip_ratio, config.detector.det_invert, config.detector.det_gamma_correct, config.detector.det_rotate, + config.detector.det_auto_rotate, self.device, self.verbose) - async def _run_ocr(self, ctx: Context): - textlines = await dispatch_ocr(ctx.ocr, ctx.img_rgb, ctx.textlines, ctx, self.device, self.verbose) + async def _run_ocr(self, config: Config, ctx: Context): + textlines = await dispatch_ocr(config.ocr.ocr, ctx.img_rgb, ctx.textlines, ctx, self.device, self.verbose) new_textlines = [] for textline in textlines: @@ -372,21 +372,21 @@ async def _run_ocr(self, ctx: Context): new_textlines.append(textline) return new_textlines - async def _run_textline_merge(self, ctx: Context): + async def _run_textline_merge(self, config: Config, ctx: Context): text_regions = await dispatch_textline_merge(ctx.textlines, ctx.img_rgb.shape[1], ctx.img_rgb.shape[0], verbose=self.verbose) new_text_regions = [] for region in text_regions: - if len(region.text) >= ctx.min_text_length \ + if len(region.text) >= config.ocr.min_text_length \ and not is_valuable_text(region.text) \ - or (not ctx.no_text_lang_skip and langcodes.tag_distance(region.source_lang, ctx.target_lang) == 0): + or (not config.translator.no_text_lang_skip and langcodes.tag_distance(region.source_lang, config.translator.target_lang) == 0): if region.text.strip(): logger.info(f'Filtered out: {region.text}') - if len(region.text) < ctx.min_text_length: + if len(region.text) < config.ocr.min_text_length: logger.info('Reason: Text length is less than the minimum required length.') elif not is_valuable_text(region.text): logger.info('Reason: Text is not considered valuable.') - elif langcodes.tag_distance(region.source_lang, ctx.target_lang) == 0: + elif langcodes.tag_distance(region.source_lang, config.translator.target_lang) == 0: logger.info('Reason: Text language matches the target language and no_text_lang_skip is False.') else: if ctx.font_color_fg or ctx.font_color_bg: @@ -398,28 +398,28 @@ async def _run_textline_merge(self, ctx: Context): # Sort ctd (comic text detector) regions left to right. Otherwise right to left. # Sorting will improve text translation quality. - text_regions = sort_regions(text_regions, right_to_left=True if ctx.detector != 'ctd' else False) + text_regions = sort_regions(text_regions, right_to_left=True if config.detector.detector != Detector.ctd else False) return text_regions - async def _run_text_translation(self, ctx: Context): + async def _run_text_translation(self, config: Config, ctx: Context): translated_sentences = \ - await dispatch_translation(ctx.translator, + await dispatch_translation(config.translator.translator, [region.text for region in ctx.text_regions], - ctx.use_mtpe, + self.use_mtpe, ctx, 'cpu' if self._gpu_limited_memory else self.device) for region, translation in zip(ctx.text_regions, translated_sentences): - if ctx.uppercase: + if config.render.uppercase: translation = translation.upper() - elif ctx.lowercase: + elif config.render.lowercase: translation = translation.upper() region.translation = translation - region.target_lang = ctx.target_lang - region._alignment = ctx.alignment - region._direction = ctx.direction + region.target_lang = config.translator.target_lang + region._alignment = config.render.alignment + region._direction = config.render.direction # Apply post dictionary after translating - post_dict = self.load_dictionary(ctx.post_dict) + post_dict = self.load_dictionary(config.post_dict) post_replacements = [] for region in ctx.text_regions: original = region.translation @@ -441,7 +441,7 @@ async def _run_text_translation(self, ctx: Context): special_langs = ['CHS', 'CHT', 'JPN', 'KOR', 'IND', 'UKR', 'RUS', 'THA', 'ARA'] # Process special language scenarios - if ctx.target_lang in special_langs: + if config.translator.target_lang in special_langs: # Categorize regions same_target_regions = [] # Target language regions with identical translation diff_target_regions = [] # Target language regions with different translation @@ -453,21 +453,21 @@ async def _run_text_translation(self, ctx: Context): has_target_lang = False # Target language detection - if ctx.target_lang in ['CHS', 'CHT']: # Chinese + if config.translator.target_lang in ['CHS', 'CHT']: # Chinese has_target_lang = bool(re.search('[\u4e00-\u9fff]', region.text)) - elif ctx.target_lang == 'JPN': # Japanese + elif config.translator.target_lang == 'JPN': # Japanese has_target_lang = bool(re.search('[\u3040-\u309f\u30a0-\u30ff\u4e00-\u9fff]', region.text)) - elif ctx.target_lang == 'KOR': # Korean + elif config.translator.target_lang == 'KOR': # Korean has_target_lang = bool(re.search('[\uac00-\ud7af\u1100-\u11ff]', region.text)) - elif ctx.target_lang == 'ARA': # Arabic + elif config.translator.target_lang == 'ARA': # Arabic has_target_lang = bool(re.search('[\u0600-\u06ff]', region.text)) - elif ctx.target_lang == 'THA': # Thai + elif config.translator.target_lang == 'THA': # Thai has_target_lang = bool(re.search('[\u0e00-\u0e7f]', region.text)) - elif ctx.target_lang == 'RUS': # Russian + elif config.translator.target_lang == 'RUS': # Russian has_target_lang = bool(re.search('[\u0400-\u04ff]', region.text)) - elif ctx.target_lang == 'UKR': # Ukrainian + elif config.translator.target_lang == 'UKR': # Ukrainian has_target_lang = bool(re.search('[\u0400-\u04ff]', region.text)) - elif ctx.target_lang == 'IND': # Indonesian + elif config.translator.target_lang == 'IND': # Indonesian has_target_lang = bool(re.search('[A-Za-z]', region.text)) # Skip numeric translations and filtered text @@ -476,9 +476,9 @@ async def _run_text_translation(self, ctx: Context): logger.info('Reason: Numeric translation') continue - if ctx.filter_text and re.search(ctx.filter_text, region.translation): + if config.filter_text and re.search(config.filter_text, region.translation): logger.info(f'Filtered out: {region.translation}') - logger.info(f'Reason: Matched filter text: {ctx.filter_text}') + logger.info(f'Reason: Matched filter text: {config.filter_text}') continue if has_target_lang: @@ -510,14 +510,14 @@ async def _run_text_translation(self, ctx: Context): should_filter = False filter_reason = "" - if not ctx.translator == 'none': + if not config.translator.translator == Translator.none: if region.translation.isnumeric(): should_filter = True filter_reason = "Numeric translation" - elif ctx.filter_text and re.search(ctx.filter_text, region.translation): + elif config.filter_text and re.search(config.filter_text, region.translation): should_filter = True - filter_reason = f"Matched filter text: {ctx.filter_text}" - elif not ctx.translator == 'original': + filter_reason = f"Matched filter text: {config.filter_text}" + elif not config.translator.translator == Translator.original: text_equal = region.text.lower().strip() == region.translation.lower().strip() if text_equal: should_filter = True @@ -533,25 +533,25 @@ async def _run_text_translation(self, ctx: Context): return new_text_regions - async def _run_mask_refinement(self, ctx: Context): + async def _run_mask_refinement(self, config: Config, ctx: Context): return await dispatch_mask_refinement(ctx.text_regions, ctx.img_rgb, ctx.mask_raw, 'fit_text', - ctx.mask_dilation_offset, ctx.ignore_bubble, self.verbose,self.kernel_size) + config.mask_dilation_offset, config.detector.ignore_bubble, self.verbose,self.kernel_size) - async def _run_inpainting(self, ctx: Context): - return await dispatch_inpainting(ctx.inpainter, ctx.img_rgb, ctx.mask, ctx.inpainting_size, self.device, + async def _run_inpainting(self, config: Config,ctx: Context): + return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size, self.device, self.verbose) - async def _run_text_rendering(self, ctx: Context): - if ctx.renderer == 'none': + async def _run_text_rendering(self, config: Config, ctx: Context): + if config.render.renderer == Renderer.none: output = ctx.img_inpainted # manga2eng currently only supports horizontal left to right rendering - elif ctx.renderer == 'manga2eng' and ctx.text_regions and LANGUAGE_ORIENTATION_PRESETS.get( + elif config.render.renderer == Renderer.manga2Eng and ctx.text_regions and LANGUAGE_ORIENTATION_PRESETS.get( ctx.text_regions[0].target_lang) == 'h': - output = await dispatch_eng_render(ctx.img_inpainted, ctx.img_rgb, ctx.text_regions, ctx.font_path, ctx.line_spacing) + output = await dispatch_eng_render(ctx.img_inpainted, ctx.img_rgb, ctx.text_regions, self.font_path, config.render.line_spacing) else: - output = await dispatch_rendering(ctx.img_inpainted, ctx.text_regions, ctx.font_path, ctx.font_size, - ctx.font_size_offset, - ctx.font_size_minimum, not ctx.no_hyphenation, ctx.render_mask, ctx.line_spacing) + output = await dispatch_rendering(ctx.img_inpainted, ctx.text_regions, self.font_path, config.render.font_size, + config.render.font_size_offset, + config.render.font_size_minimum, not config.render.no_hyphenation, config.render.render_mask, config.render.line_spacing) return output def _result_path(self, path: str) -> str: From 341b5454b3d4585b70594fb21c8c6f65551d5a3f Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sun, 17 Nov 2024 01:35:52 +0100 Subject: [PATCH 08/75] more ctx fixes --- manga_translator/__main__.py | 15 ++- manga_translator/colorization/__init__.py | 6 +- manga_translator/detection/__init__.py | 6 +- manga_translator/inpainting/__init__.py | 6 +- manga_translator/manga_translator.py | 138 +++++++++++----------- manga_translator/mode/api.py | 6 +- manga_translator/mode/local.py | 3 +- manga_translator/ocr/__init__.py | 6 +- manga_translator/upscaling/__init__.py | 6 +- 9 files changed, 96 insertions(+), 96 deletions(-) diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py index 8d06b9fc6..104d41fb4 100644 --- a/manga_translator/__main__.py +++ b/manga_translator/__main__.py @@ -4,8 +4,7 @@ from argparse import Namespace from .manga_translator import ( - MangaTranslator, - set_main_logger, + set_main_logger, load_dictionary, apply_dictionary, ) from .args import parser from .utils import ( @@ -30,8 +29,8 @@ async def dispatch(args: Namespace): translator = MangaTranslatorLocal(args_dict) # Load pre-translation and post-translation dictionaries - pre_dict = translator.load_dictionary(args.pre_dict) - post_dict = translator.load_dictionary(args.post_dict) + pre_dict = load_dictionary(args.pre_dict) + post_dict = load_dictionary(args.post_dict) if len(args.input) == 1 and os.path.isfile(args.input[0]): dest = os.path.join(BASE_PATH, 'result/final.png') @@ -40,12 +39,12 @@ async def dispatch(args: Namespace): # Apply pre-translation dictionaries await translator.translate_path(args.input[0], dest, args_dict) for textline in translator.textlines: - textline.text = translator.apply_dictionary(textline.text, pre_dict) + textline.text = apply_dictionary(textline.text, pre_dict) logger.info(f'Pre-translation dictionary applied: {textline.text}') # Apply post-translation dictionaries for textline in translator.textlines: - textline.translation = translator.apply_dictionary(textline.translation, post_dict) + textline.translation = apply_dictionary(textline.translation, post_dict) logger.info(f'Post-translation dictionary applied: {textline.translation}') else: # batch @@ -54,12 +53,12 @@ async def dispatch(args: Namespace): # Apply pre-translation dictionaries await translator.translate_path(path, dest, args_dict) for textline in translator.textlines: - textline.text = translator.apply_dictionary(textline.text, pre_dict) + textline.text = apply_dictionary(textline.text, pre_dict) logger.info(f'Pre-translation dictionary applied: {textline.text}') # Apply post-translation dictionaries for textline in translator.textlines: - textline.translation = translator.apply_dictionary(textline.translation, post_dict) + textline.translation = apply_dictionary(textline.translation, post_dict) logger.info(f'Post-translation dictionary applied: {textline.translation}') elif args.mode == 'web': diff --git a/manga_translator/colorization/__init__.py b/manga_translator/colorization/__init__.py index d7423286c..61a0c0997 100644 --- a/manga_translator/colorization/__init__.py +++ b/manga_translator/colorization/__init__.py @@ -9,7 +9,7 @@ } colorizer_cache = {} -def get_colorizer(key: str, *args, **kwargs) -> CommonColorizer: +def get_colorizer(key: Colorizer, *args, **kwargs) -> CommonColorizer: if key not in COLORIZERS: raise ValueError(f'Could not find colorizer for: "{key}". Choose from the following: %s' % ','.join(COLORIZERS)) if not colorizer_cache.get(key): @@ -17,12 +17,12 @@ def get_colorizer(key: str, *args, **kwargs) -> CommonColorizer: colorizer_cache[key] = upscaler(*args, **kwargs) return colorizer_cache[key] -async def prepare(key: str): +async def prepare(key: Colorizer): upscaler = get_colorizer(key) if isinstance(upscaler, OfflineColorizer): await upscaler.download() -async def dispatch(key: str, device: str = 'cpu', **kwargs) -> Image.Image: +async def dispatch(key: Colorizer, device: str = 'cpu', **kwargs) -> Image.Image: colorizer = get_colorizer(key) if isinstance(colorizer, OfflineColorizer): await colorizer.load(device) diff --git a/manga_translator/detection/__init__.py b/manga_translator/detection/__init__.py index 2f8bd24e4..c2655a83f 100644 --- a/manga_translator/detection/__init__.py +++ b/manga_translator/detection/__init__.py @@ -17,7 +17,7 @@ } detector_cache = {} -def get_detector(key: str, *args, **kwargs) -> CommonDetector: +def get_detector(key: Detector, *args, **kwargs) -> CommonDetector: if key not in DETECTORS: raise ValueError(f'Could not find detector for: "{key}". Choose from the following: %s' % ','.join(DETECTORS)) if not detector_cache.get(key): @@ -25,12 +25,12 @@ def get_detector(key: str, *args, **kwargs) -> CommonDetector: detector_cache[key] = detector(*args, **kwargs) return detector_cache[key] -async def prepare(detector_key: str): +async def prepare(detector_key: Detector): detector = get_detector(detector_key) if isinstance(detector, OfflineDetector): await detector.download() -async def dispatch(detector_key: str, image: np.ndarray, detect_size: int, text_threshold: float, box_threshold: float, unclip_ratio: float, +async def dispatch(detector_key: Detector, image: np.ndarray, detect_size: int, text_threshold: float, box_threshold: float, unclip_ratio: float, invert: bool, gamma_correct: bool, rotate: bool, auto_rotate: bool = False, device: str = 'cpu', verbose: bool = False): detector = get_detector(detector_key) if isinstance(detector, OfflineDetector): diff --git a/manga_translator/inpainting/__init__.py b/manga_translator/inpainting/__init__.py index 16f242126..c4ca1ca13 100644 --- a/manga_translator/inpainting/__init__.py +++ b/manga_translator/inpainting/__init__.py @@ -18,7 +18,7 @@ } inpainter_cache = {} -def get_inpainter(key: str, *args, **kwargs) -> CommonInpainter: +def get_inpainter(key: Inpainter, *args, **kwargs) -> CommonInpainter: if key not in INPAINTERS: raise ValueError(f'Could not find inpainter for: "{key}". Choose from the following: %s' % ','.join(INPAINTERS)) if not inpainter_cache.get(key): @@ -26,13 +26,13 @@ def get_inpainter(key: str, *args, **kwargs) -> CommonInpainter: inpainter_cache[key] = inpainter(*args, **kwargs) return inpainter_cache[key] -async def prepare(inpainter_key: str, device: str = 'cpu'): +async def prepare(inpainter_key: Inpainter, device: str = 'cpu'): inpainter = get_inpainter(inpainter_key) if isinstance(inpainter, OfflineInpainter): await inpainter.download() await inpainter.load(device) -async def dispatch(inpainter_key: str, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, device: str = 'cpu', verbose: bool = False) -> np.ndarray: +async def dispatch(inpainter_key: Inpainter, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, device: str = 'cpu', verbose: bool = False) -> np.ndarray: inpainter = get_inpainter(inpainter_key) if isinstance(inpainter, OfflineInpainter): await inpainter.load(device) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 0f85e7a4d..682216d74 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -11,7 +11,7 @@ from typing import Optional, Any from .args import DEFAULT_ARGS -from .config import Config, Colorizer, Detector, Translator, Renderer +from .config import Config, Colorizer, Detector, Translator, Renderer, Inpainter from .utils import ( BASE_PATH, LANGUAGE_ORIENTATION_PRESETS, @@ -26,7 +26,7 @@ ) from .detection import dispatch as dispatch_detection, prepare as prepare_detection -from .upscaling import dispatch as dispatch_upscaling, prepare as prepare_upscaling, UPSCALERS +from .upscaling import dispatch as dispatch_upscaling, prepare as prepare_upscaling from .ocr import dispatch as dispatch_ocr, prepare as prepare_ocr from .textline_merge import dispatch as dispatch_textline_merge from .mask_refinement import dispatch as dispatch_mask_refinement @@ -57,6 +57,67 @@ class TranslationInterrupt(Exception): pass +def load_dictionary(file_path): + dictionary = [] + if file_path and os.path.exists(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + for line_number, line in enumerate(file, start=1): + # Ignore empty lines and lines starting with '#' or '//' + if not line.strip() or line.strip().startswith('#') or line.strip().startswith('//'): + continue + # Remove comment parts + line = line.split('#')[0].strip() + line = line.split('//')[0].strip() + parts = line.split() + if len(parts) == 1: + # If there is only the left part, the right part defaults to an empty string, meaning delete the left part + pattern = re.compile(parts[0]) + dictionary.append((pattern, '')) + elif len(parts) == 2: + # If both left and right parts are present, perform the replacement + pattern = re.compile(parts[0]) + dictionary.append((pattern, parts[1])) + else: + logger.error(f'Invalid dictionary entry at line {line_number}: {line.strip()}') + return dictionary + + +def _preprocess_params(ctx: Context): + # todo: fix + # params auto completion + # TODO: Move args into ctx.args and only calculate once, or just copy into ctx + for arg in DEFAULT_ARGS: + ctx.setdefault(arg, DEFAULT_ARGS[arg]) + + if ctx.selective_translation is not None: + ctx.selective_translation.target_lang = ctx.target_lang + ctx.translator = ctx.selective_translation + elif ctx.translator_chain is not None: + ctx.target_lang = ctx.translator_chain.langs[-1] + ctx.translator = ctx.translator_chain + else: + ctx.translator = TranslatorChain(f'{ctx.translator}:{ctx.target_lang}') + if ctx.gpt_config: + ctx.gpt_config = OmegaConf.load(ctx.gpt_config) + + if ctx.filter_text: + ctx.filter_text = re.compile(ctx.filter_text) + + if ctx.font_color: + colors = ctx.font_color.split(':') + try: + ctx.font_color_fg = hex2rgb(colors[0]) + ctx.font_color_bg = hex2rgb(colors[1]) if len(colors) > 1 else None + except: + raise Exception(f'Invalid --font-color value: {ctx.font_color}. Use a hex value such as FF0000') + + +def apply_dictionary(text, dictionary): + for pattern, value in dictionary: + text = pattern.sub(value, text) + return text + + class MangaTranslator: verbose: bool ignore_errors: bool @@ -134,80 +195,17 @@ async def translate(self, image: Image.Image, config: Config) -> Context: # preload and download models (not strictly necessary, remove to lazy load) logger.info('Loading models') if config.upscale.upscale_ratio: - # todo: fix await prepare_upscaling(config.upscale.upscaler) - # todo: fix await prepare_detection(config.detector.detector) - # todo: fix await prepare_ocr(config.ocr.ocr, self.device) - # todo: fix await prepare_inpainting(config.inpainter.inpainter, self.device) # todo: fix await prepare_translation(config.translator.translator) if config.colorizer.colorizer != Colorizer.none: - #todo: fix await prepare_colorization(config.colorizer.colorizer) # translate return await self._translate(config, ctx) - def load_dictionary(self, file_path): - dictionary = [] - if file_path and os.path.exists(file_path): - with open(file_path, 'r', encoding='utf-8') as file: - for line_number, line in enumerate(file, start=1): - # Ignore empty lines and lines starting with '#' or '//' - if not line.strip() or line.strip().startswith('#') or line.strip().startswith('//'): - continue - # Remove comment parts - line = line.split('#')[0].strip() - line = line.split('//')[0].strip() - parts = line.split() - if len(parts) == 1: - # If there is only the left part, the right part defaults to an empty string, meaning delete the left part - pattern = re.compile(parts[0]) - dictionary.append((pattern, '')) - elif len(parts) == 2: - # If both left and right parts are present, perform the replacement - pattern = re.compile(parts[0]) - dictionary.append((pattern, parts[1])) - else: - logger.error(f'Invalid dictionary entry at line {line_number}: {line.strip()}') - return dictionary - - def apply_dictionary(self, text, dictionary): - for pattern, value in dictionary: - text = pattern.sub(value, text) - return text - - def _preprocess_params(self, ctx: Context): - # todo: fix - # params auto completion - # TODO: Move args into ctx.args and only calculate once, or just copy into ctx - for arg in DEFAULT_ARGS: - ctx.setdefault(arg, DEFAULT_ARGS[arg]) - - if ctx.selective_translation is not None: - ctx.selective_translation.target_lang = ctx.target_lang - ctx.translator = ctx.selective_translation - elif ctx.translator_chain is not None: - ctx.target_lang = ctx.translator_chain.langs[-1] - ctx.translator = ctx.translator_chain - else: - ctx.translator = TranslatorChain(f'{ctx.translator}:{ctx.target_lang}') - if ctx.gpt_config: - ctx.gpt_config = OmegaConf.load(ctx.gpt_config) - - if ctx.filter_text: - ctx.filter_text = re.compile(ctx.filter_text) - - if ctx.font_color: - colors = ctx.font_color.split(':') - try: - ctx.font_color_fg = hex2rgb(colors[0]) - ctx.font_color_bg = hex2rgb(colors[1]) if len(colors) > 1 else None - except: - raise Exception(f'Invalid --font-color value: {ctx.font_color}. Use a hex value such as FF0000') - async def _translate(self, config: Config, ctx: Context) -> Context: # -- Colorization @@ -269,11 +267,11 @@ async def _translate(self, config: Config, ctx: Context) -> Context: return await self._revert_upscale(config, ctx) # Apply pre-dictionary after OCR - pre_dict = self.load_dictionary(config.pre_dict) + pre_dict = load_dictionary(config.pre_dict) pre_replacements = [] for textline in ctx.textlines: original = textline.text - textline.text = self.apply_dictionary(textline.text, pre_dict) + textline.text = apply_dictionary(textline.text, pre_dict) if original != textline.text: pre_replacements.append(f"{original} => {textline.text}") @@ -314,7 +312,7 @@ async def _translate(self, config: Config, ctx: Context) -> Context: ctx.mask = await self._run_mask_refinement(config, ctx) if self.verbose: - inpaint_input_img = await dispatch_inpainting('none', ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size, + inpaint_input_img = await dispatch_inpainting(Inpainter.none, ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size, self.using_gpu, self.verbose) cv2.imwrite(self._result_path('inpaint_input.png'), cv2.cvtColor(inpaint_input_img, cv2.COLOR_RGB2BGR)) cv2.imwrite(self._result_path('mask_final.png'), ctx.mask) @@ -419,11 +417,11 @@ async def _run_text_translation(self, config: Config, ctx: Context): region._direction = config.render.direction # Apply post dictionary after translating - post_dict = self.load_dictionary(config.post_dict) + post_dict = load_dictionary(config.post_dict) post_replacements = [] for region in ctx.text_regions: original = region.translation - region.translation = self.apply_dictionary(region.translation, post_dict) + region.translation = apply_dictionary(region.translation, post_dict) if original != region.translation: post_replacements.append(f"{original} => {region.translation}") diff --git a/manga_translator/mode/api.py b/manga_translator/mode/api.py index bfe712567..fc71d32e3 100644 --- a/manga_translator/mode/api.py +++ b/manga_translator/mode/api.py @@ -10,12 +10,14 @@ from aiohttp.web_middlewares import middleware from marshmallow import fields, Schema, ValidationError -from manga_translator import MangaTranslator, Context, UPSCALERS, TranslationInterrupt, logger +from manga_translator import MangaTranslator, Context, TranslationInterrupt, logger from manga_translator.args import translator_chain from manga_translator.detection import DETECTORS from manga_translator.inpainting import INPAINTERS +from manga_translator.manga_translator import _preprocess_params from manga_translator.ocr import OCRS from manga_translator.translators import VALID_LANGUAGES, TRANSLATORS +from manga_translator.upscaling import UPSCALERS class MangaTranslatorAPI(MangaTranslator): @@ -158,7 +160,7 @@ async def err_handling(self, func, req, format, ri=False): if 'selective_translation' in data: data['selective_translation'] = translator_chain(data['selective_translation']) ctx = Context(**dict(self.params, **data)) - self._preprocess_params(ctx) + _preprocess_params(ctx) if data.get('image') is None and data.get('base64Images') is None and data.get('url') is None: return web.json_response({'error': "Missing input", 'status': 422}) fil = await self.get_file(data.get('image'), data.get('base64Images'), data.get('url')) diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py index 05e87b41b..67e67ebb5 100644 --- a/manga_translator/mode/local.py +++ b/manga_translator/mode/local.py @@ -4,6 +4,7 @@ from PIL import Image from manga_translator import MangaTranslator, logger, Context, TranslationInterrupt +from ..manga_translator import _preprocess_params from ..save import save_result from ..translators import ( LanguageUnsupportedException, @@ -90,7 +91,7 @@ async def translate_file(self, path: str, dest: str, params: dict): # Turn dict to context to make values also accessible through params. params = params or {} ctx = Context(**params) - self._preprocess_params(ctx) + _preprocess_params(ctx) attempts = 0 while ctx.attempts == -1 or attempts < ctx.attempts + 1: diff --git a/manga_translator/ocr/__init__.py b/manga_translator/ocr/__init__.py index 75390d764..14d580f69 100644 --- a/manga_translator/ocr/__init__.py +++ b/manga_translator/ocr/__init__.py @@ -17,7 +17,7 @@ } ocr_cache = {} -def get_ocr(key: str, *args, **kwargs) -> CommonOCR: +def get_ocr(key: Ocr, *args, **kwargs) -> CommonOCR: if key not in OCRS: raise ValueError(f'Could not find OCR for: "{key}". Choose from the following: %s' % ','.join(OCRS)) if not ocr_cache.get(key): @@ -25,13 +25,13 @@ def get_ocr(key: str, *args, **kwargs) -> CommonOCR: ocr_cache[key] = ocr(*args, **kwargs) return ocr_cache[key] -async def prepare(ocr_key: str, device: str = 'cpu'): +async def prepare(ocr_key: Ocr, device: str = 'cpu'): ocr = get_ocr(ocr_key) if isinstance(ocr, OfflineOCR): await ocr.download() await ocr.load(device) -async def dispatch(ocr_key: str, image: np.ndarray, regions: List[Quadrilateral], args = None, device: str = 'cpu', verbose: bool = False) -> List[Quadrilateral]: +async def dispatch(ocr_key: Ocr, image: np.ndarray, regions: List[Quadrilateral], args = None, device: str = 'cpu', verbose: bool = False) -> List[Quadrilateral]: ocr = get_ocr(ocr_key) if isinstance(ocr, OfflineOCR): await ocr.load(device) diff --git a/manga_translator/upscaling/__init__.py b/manga_translator/upscaling/__init__.py index 768527890..df88d93e2 100644 --- a/manga_translator/upscaling/__init__.py +++ b/manga_translator/upscaling/__init__.py @@ -14,7 +14,7 @@ } upscaler_cache = {} -def get_upscaler(key: str, *args, **kwargs) -> CommonUpscaler: +def get_upscaler(key: Upscaler, *args, **kwargs) -> CommonUpscaler: if key not in UPSCALERS: raise ValueError(f'Could not find upscaler for: "{key}". Choose from the following: %s' % ','.join(UPSCALERS)) if not upscaler_cache.get(key): @@ -22,12 +22,12 @@ def get_upscaler(key: str, *args, **kwargs) -> CommonUpscaler: upscaler_cache[key] = upscaler(*args, **kwargs) return upscaler_cache[key] -async def prepare(upscaler_key: str): +async def prepare(upscaler_key: Upscaler): upscaler = get_upscaler(upscaler_key) if isinstance(upscaler, OfflineUpscaler): await upscaler.download() -async def dispatch(upscaler_key: str, image_batch: List[Image.Image], upscale_ratio: int, device: str = 'cpu') -> List[Image.Image]: +async def dispatch(upscaler_key: Upscaler, image_batch: List[Image.Image], upscale_ratio: int, device: str = 'cpu') -> List[Image.Image]: if upscale_ratio == 1: return image_batch upscaler = get_upscaler(upscaler_key) From 14bc8ca1fe0cc66031ef3839f7e5b59e25ad8de9 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sun, 17 Nov 2024 11:42:52 +0100 Subject: [PATCH 09/75] move preprocessing into config --- manga_translator/config.py | 57 ++++++++++++++++++++++++ manga_translator/manga_translator.py | 53 ++++++---------------- manga_translator/translators/__init__.py | 2 +- 3 files changed, 71 insertions(+), 41 deletions(-) diff --git a/manga_translator/config.py b/manga_translator/config.py index ee78ddefa..863268522 100644 --- a/manga_translator/config.py +++ b/manga_translator/config.py @@ -1,8 +1,14 @@ +import re from enum import Enum, IntEnum +from omegaconf import OmegaConf from pydantic import BaseModel from typing import Optional +from manga_translator import TranslatorChain, hex2rgb +from manga_translator.args import translator_chain + + class Renderer(IntEnum): default = 0 manga2Eng = 1 @@ -106,6 +112,29 @@ class RenderConfig(BaseModel): font_size: Optional[int] = None """Use fixed font size for rendering""" + @property + def font_color_fg(self): + if self.font_color and not self._font_color_fg: + colors = self.font_color.split(':') + try: + self._font_color_fg = hex2rgb(colors[0]) + self._font_color_fg = hex2rgb(colors[1]) if len(colors) > 1 else None + except: + raise Exception( + f'Invalid --font-color value: {self.font_color}. Use a hex value such as FF0000') + return self._font_color_fg + + @property + def font_color_bg(self): + if self.font_color and not self._font_color_bg: + colors = self.font_color.split(':') + try: + self._font_color_fg = hex2rgb(colors[0]) + self._font_color_bg = hex2rgb(colors[1]) if len(colors) > 1 else None + except: + raise Exception( + f'Invalid --font-color value: {self.font_color}. Use a hex value such as FF0000') + return self._font_color_bg class UpscaleConfig(BaseModel): upscaler: Upscaler = Upscaler.esrgan @@ -131,6 +160,28 @@ class TranslatorConfig(BaseModel): selective_translation: Optional[str] = None # todo: add parser translator_chain #todo: merge into one """Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".'""" + @property + def translator_gen(self): + if self._translator_gen is None: + if self.selective_translation is not None: + trans = translator_chain(self.selective_translation) + trans.target_lang = self.target_lang + self._translator_gen = trans + elif self.translator_chain is not None: + trans = translator_chain(self.translator_chain) + trans.target_lang = trans.langs[-1] + self._translator_gen = trans + else: + self._translator_gen = TranslatorChain(f'{self.translator}:{self.target_lang}') + return self._translator_gen + + def chatgpt_config(self): + if self.gpt_config is not None and self._gpt_config is None: + #todo: load from already loaded file + self._gpt_config = OmegaConf.load(self.gpt_config) + return self._gpt_config + + class DetectorConfig(BaseModel): """""" detector: Detector =Detector.default @@ -208,3 +259,9 @@ class Config(BaseModel): """Set the convolution kernel size of the text erasure area to completely clean up text residues""" mask_dilation_offset: int = 0 """By how much to extend the text mask to remove left-over text pixels of the original image.""" + + @property + def re_filter_text(self): + if self._filter_text is None: + self._filter_text = re.compile(self.filter_text) + return self._filter_text diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 682216d74..62d4d154a 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -81,37 +81,6 @@ def load_dictionary(file_path): logger.error(f'Invalid dictionary entry at line {line_number}: {line.strip()}') return dictionary - -def _preprocess_params(ctx: Context): - # todo: fix - # params auto completion - # TODO: Move args into ctx.args and only calculate once, or just copy into ctx - for arg in DEFAULT_ARGS: - ctx.setdefault(arg, DEFAULT_ARGS[arg]) - - if ctx.selective_translation is not None: - ctx.selective_translation.target_lang = ctx.target_lang - ctx.translator = ctx.selective_translation - elif ctx.translator_chain is not None: - ctx.target_lang = ctx.translator_chain.langs[-1] - ctx.translator = ctx.translator_chain - else: - ctx.translator = TranslatorChain(f'{ctx.translator}:{ctx.target_lang}') - if ctx.gpt_config: - ctx.gpt_config = OmegaConf.load(ctx.gpt_config) - - if ctx.filter_text: - ctx.filter_text = re.compile(ctx.filter_text) - - if ctx.font_color: - colors = ctx.font_color.split(':') - try: - ctx.font_color_fg = hex2rgb(colors[0]) - ctx.font_color_bg = hex2rgb(colors[1]) if len(colors) > 1 else None - except: - raise Exception(f'Invalid --font-color value: {ctx.font_color}. Use a hex value such as FF0000') - - def apply_dictionary(text, dictionary): for pattern, value in dictionary: text = pattern.sub(value, text) @@ -168,6 +137,7 @@ def parse_init_params(self, params: dict): 'Is the correct pytorch version installed? (See https://pytorch.org/)') if params.get('model_dir'): ModelWrapper._MODEL_DIR = params.get('model_dir') + #todo: fix self.kernel_size=int(params.get('kernel_size')) os.environ['INPAINTING_PRECISION'] = params.get('inpainting_precision', 'fp32') @@ -199,8 +169,7 @@ async def translate(self, image: Image.Image, config: Config) -> Context: await prepare_detection(config.detector.detector) await prepare_ocr(config.ocr.ocr, self.device) await prepare_inpainting(config.inpainter.inpainter, self.device) - # todo: fix - await prepare_translation(config.translator.translator) + await prepare_translation(config.translator.translator_gen) if config.colorizer.colorizer != Colorizer.none: await prepare_colorization(config.colorizer.colorizer) # translate @@ -319,6 +288,7 @@ async def _translate(self, config: Config, ctx: Context) -> Context: # -- Inpainting await self._report_progress('inpainting') + #todo: fix _run_inpainting takes ctx ctx.img_inpainted = await self._run_inpainting(config, ctx) ctx.gimp_mask = np.dstack((cv2.cvtColor(ctx.img_inpainted, cv2.COLOR_RGB2BGR), ctx.mask)) @@ -345,6 +315,7 @@ async def _revert_upscale(self, config: Config, ctx: Context): return ctx async def _run_colorizer(self, config: Config, ctx: Context): + #todo: fix dispatch_colorization takes ctx return await dispatch_colorization(config.colorizer.colorizer, device=self.device, image=ctx.input, **ctx) async def _run_upscaling(self, config: Config, ctx: Context): @@ -358,15 +329,16 @@ async def _run_detection(self, config: Config, ctx: Context): self.device, self.verbose) async def _run_ocr(self, config: Config, ctx: Context): + #todo: fix dispatch_ocr takes ctx textlines = await dispatch_ocr(config.ocr.ocr, ctx.img_rgb, ctx.textlines, ctx, self.device, self.verbose) new_textlines = [] for textline in textlines: if textline.text.strip(): - if ctx.font_color_fg: - textline.fg_r, textline.fg_g, textline.fg_b = ctx.font_color_fg - if ctx.font_color_bg: - textline.bg_r, textline.bg_g, textline.bg_b = ctx.font_color_bg + if config.render.font_color_fg: + textline.fg_r, textline.fg_g, textline.fg_b = config.render.font_color_fg + if config.render.font_color_bg: + textline.bg_r, textline.bg_g, textline.bg_b = config.render.font_color_bg new_textlines.append(textline) return new_textlines @@ -400,8 +372,9 @@ async def _run_textline_merge(self, config: Config, ctx: Context): return text_regions async def _run_text_translation(self, config: Config, ctx: Context): + #todo: fix dispatch_translation takes ctx translated_sentences = \ - await dispatch_translation(config.translator.translator, + await dispatch_translation(config.translator.translator_gen, [region.text for region in ctx.text_regions], self.use_mtpe, ctx, 'cpu' if self._gpu_limited_memory else self.device) @@ -474,7 +447,7 @@ async def _run_text_translation(self, config: Config, ctx: Context): logger.info('Reason: Numeric translation') continue - if config.filter_text and re.search(config.filter_text, region.translation): + if config.filter_text and re.search(config.re_filter_text, region.translation): logger.info(f'Filtered out: {region.translation}') logger.info(f'Reason: Matched filter text: {config.filter_text}') continue @@ -512,7 +485,7 @@ async def _run_text_translation(self, config: Config, ctx: Context): if region.translation.isnumeric(): should_filter = True filter_reason = "Numeric translation" - elif config.filter_text and re.search(config.filter_text, region.translation): + elif config.filter_text and re.search(config.re_filter_text, region.translation): should_filter = True filter_reason = f"Matched filter text: {config.filter_text}" elif not config.translator.translator == Translator.original: diff --git a/manga_translator/translators/__init__.py b/manga_translator/translators/__init__.py index 30bff773e..f17b5f685 100644 --- a/manga_translator/translators/__init__.py +++ b/manga_translator/translators/__init__.py @@ -65,7 +65,7 @@ def get_translator(key: str, *args, **kwargs) -> CommonTranslator: prepare_selective_translator(get_translator) # TODO: Refactor -class TranslatorChain(): +class TranslatorChain: def __init__(self, string: str): """ Parses string in form 'trans1:lang1;trans2:lang2' into chains, From 82f6ea4de9e4f1eaff7cdbf4282aa0377ce1d29b Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sun, 17 Nov 2024 12:19:26 +0100 Subject: [PATCH 10/75] replace ctx in components --- manga_translator/config.py | 18 ++++++++++-------- manga_translator/inpainting/__init__.py | 8 +++++--- manga_translator/inpainting/common.py | 10 +++++----- .../inpainting/inpainting_lama_mpe.py | 6 ++++-- manga_translator/manga_translator.py | 18 ++++++------------ manga_translator/ocr/__init__.py | 10 +++++----- manga_translator/ocr/common.py | 9 +++++---- manga_translator/ocr/model_32px.py | 5 +++-- manga_translator/ocr/model_48px.py | 3 ++- manga_translator/ocr/model_48px_ctc.py | 5 +++-- manga_translator/ocr/model_manga_ocr.py | 10 +++------- manga_translator/translators/__init__.py | 13 +++++++++---- manga_translator/translators/chatgpt.py | 7 +++++-- manga_translator/translators/deepseek.py | 7 +++++-- manga_translator/translators/groq.py | 10 ++++------ manga_translator/utils/inference.py | 3 ++- 16 files changed, 76 insertions(+), 66 deletions(-) diff --git a/manga_translator/config.py b/manga_translator/config.py index 863268522..aea9f0141 100644 --- a/manga_translator/config.py +++ b/manga_translator/config.py @@ -30,6 +30,9 @@ class InpaintPrecision(IntEnum): fp16 = 1 bf16 = 2 + def __str__(self): + return self.name + class Detector(IntEnum): default = 0 dbconvnext = 1 @@ -118,7 +121,7 @@ def font_color_fg(self): colors = self.font_color.split(':') try: self._font_color_fg = hex2rgb(colors[0]) - self._font_color_fg = hex2rgb(colors[1]) if len(colors) > 1 else None + self._font_color_bg = hex2rgb(colors[1]) if len(colors) > 1 else None except: raise Exception( f'Invalid --font-color value: {self.font_color}. Use a hex value such as FF0000') @@ -155,15 +158,16 @@ class TranslatorConfig(BaseModel): """Skip translation if source image is one of the provide languages, use comma to separate multiple languages. Example: JPN,ENG""" gpt_config: Optional[str] = None # todo: no more path """Path to GPT config file, more info in README""" - translator_chain: Optional[str] = None # todo: add parser translator_chain #todo: merge into one + translator_chain: Optional[str] = None """Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG".""" - selective_translation: Optional[str] = None # todo: add parser translator_chain #todo: merge into one + selective_translation: Optional[str] = None """Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".'""" @property def translator_gen(self): if self._translator_gen is None: if self.selective_translation is not None: + #todo: refactor TranslatorChain trans = translator_chain(self.selective_translation) trans.target_lang = self.target_lang self._translator_gen = trans @@ -175,6 +179,7 @@ def translator_gen(self): self._translator_gen = TranslatorChain(f'{self.translator}:{self.target_lang}') return self._translator_gen + @property def chatgpt_config(self): if self.gpt_config is not None and self._gpt_config is None: #todo: load from already loaded file @@ -198,8 +203,6 @@ class DetectorConfig(BaseModel): """Invert the image colors for detection. Might improve detection.""" det_gamma_correct: bool = False """Applies gamma correction for detection. Might improve detection.""" - ignore_bubble: int = 0 - """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles""" box_threshold: float = 0.7 """Threshold for bbox generation""" unclip_ratio: float = 2.3 @@ -213,7 +216,6 @@ class InpainterConfig(BaseModel): inpainting_precision: InpaintPrecision = InpaintPrecision.fp32 """Inpainting precision for lama, use bf16 while you can.""" - class ColorizerConfig(BaseModel): colorization_size: int = 576 """Size of image used for colorization. Set to -1 to use full image size""" @@ -222,8 +224,6 @@ class ColorizerConfig(BaseModel): colorizer: Colorizer = Colorizer.none """Colorization model to use.""" - - class OcrConfig(BaseModel): use_mocr_merge: bool = False """Use bbox merge when Manga OCR inference.""" @@ -231,6 +231,8 @@ class OcrConfig(BaseModel): """Optical character recognition (OCR) model to use""" min_text_length: int = 0 """Minimum text length of a text region""" + ignore_bubble: int = 0 + """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles""" class Config(BaseModel): # unclear diff --git a/manga_translator/inpainting/__init__.py b/manga_translator/inpainting/__init__.py index c4ca1ca13..52635c39f 100644 --- a/manga_translator/inpainting/__init__.py +++ b/manga_translator/inpainting/__init__.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np from .common import CommonInpainter, OfflineInpainter @@ -6,7 +8,7 @@ from .inpainting_sd import StableDiffusionInpainter from .none import NoneInpainter from .original import OriginalInpainter -from ..config import Inpainter +from ..config import Inpainter, InpainterConfig INPAINTERS = { Inpainter.default: AotInpainter, @@ -32,8 +34,8 @@ async def prepare(inpainter_key: Inpainter, device: str = 'cpu'): await inpainter.download() await inpainter.load(device) -async def dispatch(inpainter_key: Inpainter, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, device: str = 'cpu', verbose: bool = False) -> np.ndarray: +async def dispatch(inpainter_key: Inpainter, image: np.ndarray, mask: np.ndarray, config: Optional[InpainterConfig], inpainting_size: int = 1024, device: str = 'cpu', verbose: bool = False) -> np.ndarray: inpainter = get_inpainter(inpainter_key) if isinstance(inpainter, OfflineInpainter): await inpainter.load(device) - return await inpainter.inpaint(image, mask, inpainting_size, verbose) + return await inpainter.inpaint(image, mask, config or InpainterConfig(), inpainting_size, verbose) diff --git a/manga_translator/inpainting/common.py b/manga_translator/inpainting/common.py index 208700144..6aa25da19 100644 --- a/manga_translator/inpainting/common.py +++ b/manga_translator/inpainting/common.py @@ -1,16 +1,16 @@ -import os import numpy as np from abc import abstractmethod +from ..config import InpainterConfig from ..utils import InfererModule, ModelWrapper class CommonInpainter(InfererModule): - async def inpaint(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray: - return await self._inpaint(image, mask, inpainting_size, verbose) + async def inpaint(self, image: np.ndarray, mask: np.ndarray, config: InpainterConfig, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray: + return await self._inpaint(image, mask, config, inpainting_size, verbose) @abstractmethod - async def _inpaint(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray: + async def _inpaint(self, image: np.ndarray, mask: np.ndarray, config: InpainterConfig, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray: pass class OfflineInpainter(CommonInpainter, ModelWrapper): @@ -20,5 +20,5 @@ async def _inpaint(self, *args, **kwargs): return await self.infer(*args, **kwargs) @abstractmethod - async def _infer(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray: + async def _infer(self, image: np.ndarray, mask: np.ndarray, config: InpainterConfig, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray: pass diff --git a/manga_translator/inpainting/inpainting_lama_mpe.py b/manga_translator/inpainting/inpainting_lama_mpe.py index ae7f0ae65..ddfa0cfc8 100644 --- a/manga_translator/inpainting/inpainting_lama_mpe.py +++ b/manga_translator/inpainting/inpainting_lama_mpe.py @@ -12,6 +12,7 @@ from torch import Tensor from .common import OfflineInpainter +from ..config import InpainterConfig from ..utils import resize_keep_aspect @@ -52,7 +53,7 @@ async def _load(self, device: str): async def _unload(self): del self.model - async def _infer(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray: + async def _infer(self, image: np.ndarray, mask: np.ndarray, config: InpainterConfig, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray: img_original = np.copy(image) mask_original = np.copy(mask) mask_original[mask_original < 127] = 0 @@ -95,7 +96,8 @@ async def _infer(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int else: # Note: lama's weight shouldn't be convert to fp16 or bf16 otherwise it produces darkened results. # but it can inference under torch.autocast - precision = TORCH_DTYPE_MAP[os.environ.get("INPAINTING_PRECISION", "fp32")] + + precision = TORCH_DTYPE_MAP[str(config.inpainting_precision)] if precision == torch.float16: precision = torch.bfloat16 diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 62d4d154a..57ab5eac7 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -10,7 +10,6 @@ from PIL import Image from typing import Optional, Any -from .args import DEFAULT_ARGS from .config import Config, Colorizer, Detector, Translator, Renderer, Inpainter from .utils import ( BASE_PATH, @@ -21,7 +20,6 @@ dump_image, visualize_textblocks, is_valuable_text, - hex2rgb, sort_regions, ) @@ -33,7 +31,6 @@ from .inpainting import dispatch as dispatch_inpainting, prepare as prepare_inpainting from .translators import ( LANGDETECT_MAP, - TranslatorChain, dispatch as dispatch_translation, prepare as prepare_translation, ) @@ -137,9 +134,8 @@ def parse_init_params(self, params: dict): 'Is the correct pytorch version installed? (See https://pytorch.org/)') if params.get('model_dir'): ModelWrapper._MODEL_DIR = params.get('model_dir') - #todo: fix + #todo: fix why is kernel size loaded in the constructor self.kernel_size=int(params.get('kernel_size')) - os.environ['INPAINTING_PRECISION'] = params.get('inpainting_precision', 'fp32') @property def using_gpu(self): @@ -281,14 +277,13 @@ async def _translate(self, config: Config, ctx: Context) -> Context: ctx.mask = await self._run_mask_refinement(config, ctx) if self.verbose: - inpaint_input_img = await dispatch_inpainting(Inpainter.none, ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size, + inpaint_input_img = await dispatch_inpainting(Inpainter.none, ctx.img_rgb, ctx.mask, config.inpainter, self.using_gpu, self.verbose) cv2.imwrite(self._result_path('inpaint_input.png'), cv2.cvtColor(inpaint_input_img, cv2.COLOR_RGB2BGR)) cv2.imwrite(self._result_path('mask_final.png'), ctx.mask) # -- Inpainting await self._report_progress('inpainting') - #todo: fix _run_inpainting takes ctx ctx.img_inpainted = await self._run_inpainting(config, ctx) ctx.gimp_mask = np.dstack((cv2.cvtColor(ctx.img_inpainted, cv2.COLOR_RGB2BGR), ctx.mask)) @@ -315,7 +310,7 @@ async def _revert_upscale(self, config: Config, ctx: Context): return ctx async def _run_colorizer(self, config: Config, ctx: Context): - #todo: fix dispatch_colorization takes ctx + #todo: im pretty sure the ctx is never used. does it need to be passed in? return await dispatch_colorization(config.colorizer.colorizer, device=self.device, image=ctx.input, **ctx) async def _run_upscaling(self, config: Config, ctx: Context): @@ -329,8 +324,7 @@ async def _run_detection(self, config: Config, ctx: Context): self.device, self.verbose) async def _run_ocr(self, config: Config, ctx: Context): - #todo: fix dispatch_ocr takes ctx - textlines = await dispatch_ocr(config.ocr.ocr, ctx.img_rgb, ctx.textlines, ctx, self.device, self.verbose) + textlines = await dispatch_ocr(config.ocr.ocr, ctx.img_rgb, ctx.textlines, config.ocr, self.device, self.verbose) new_textlines = [] for textline in textlines: @@ -372,10 +366,10 @@ async def _run_textline_merge(self, config: Config, ctx: Context): return text_regions async def _run_text_translation(self, config: Config, ctx: Context): - #todo: fix dispatch_translation takes ctx translated_sentences = \ await dispatch_translation(config.translator.translator_gen, [region.text for region in ctx.text_regions], + config.translator, self.use_mtpe, ctx, 'cpu' if self._gpu_limited_memory else self.device) @@ -508,7 +502,7 @@ async def _run_mask_refinement(self, config: Config, ctx: Context): return await dispatch_mask_refinement(ctx.text_regions, ctx.img_rgb, ctx.mask_raw, 'fit_text', config.mask_dilation_offset, config.detector.ignore_bubble, self.verbose,self.kernel_size) - async def _run_inpainting(self, config: Config,ctx: Context): + async def _run_inpainting(self, config: Config, ctx: Context): return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size, self.device, self.verbose) diff --git a/manga_translator/ocr/__init__.py b/manga_translator/ocr/__init__.py index 14d580f69..c7bd9b8c5 100644 --- a/manga_translator/ocr/__init__.py +++ b/manga_translator/ocr/__init__.py @@ -1,12 +1,12 @@ import numpy as np -from typing import List +from typing import List, Optional from .common import CommonOCR, OfflineOCR from .model_32px import Model32pxOCR from .model_48px import Model48pxOCR from .model_48px_ctc import Model48pxCTCOCR from .model_manga_ocr import ModelMangaOCR -from ..config import Ocr +from ..config import Ocr, OcrConfig from ..utils import Quadrilateral OCRS = { @@ -31,9 +31,9 @@ async def prepare(ocr_key: Ocr, device: str = 'cpu'): await ocr.download() await ocr.load(device) -async def dispatch(ocr_key: Ocr, image: np.ndarray, regions: List[Quadrilateral], args = None, device: str = 'cpu', verbose: bool = False) -> List[Quadrilateral]: +async def dispatch(ocr_key: Ocr, image: np.ndarray, regions: List[Quadrilateral], config:Optional[OcrConfig] = None, device: str = 'cpu', verbose: bool = False) -> List[Quadrilateral]: ocr = get_ocr(ocr_key) if isinstance(ocr, OfflineOCR): await ocr.load(device) - args = args or {} - return await ocr.recognize(image, regions, args, verbose) + config = config or OcrConfig() + return await ocr.recognize(image, regions, config, verbose) diff --git a/manga_translator/ocr/common.py b/manga_translator/ocr/common.py index 1c056195f..ff3b088f4 100644 --- a/manga_translator/ocr/common.py +++ b/manga_translator/ocr/common.py @@ -5,6 +5,7 @@ import networkx as nx import itertools +from ..config import OcrConfig from ..utils import InfererModule, TextBlock, ModelWrapper, Quadrilateral class CommonOCR(InfererModule): @@ -37,15 +38,15 @@ def _generate_text_direction(self, bboxes: List[Union[Quadrilateral, TextBlock]] for node in nodes: yield bboxes[node], majority_dir - async def recognize(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False) -> List[Quadrilateral]: + async def recognize(self, image: np.ndarray, textlines: List[Quadrilateral], config: OcrConfig, verbose: bool = False) -> List[Quadrilateral]: ''' Performs the optical character recognition, using the `textlines` as areas of interests. Returns a `textlines` list with the `textline.text` property set to the detected text string. ''' - return await self._recognize(image, textlines, args, verbose) + return await self._recognize(image, textlines, config, verbose) @abstractmethod - async def _recognize(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False) -> List[Quadrilateral]: + async def _recognize(self, image: np.ndarray, textlines: List[Quadrilateral], config: OcrConfig, verbose: bool = False) -> List[Quadrilateral]: pass @@ -56,5 +57,5 @@ async def _recognize(self, *args, **kwargs): return await self.infer(*args, **kwargs) @abstractmethod - async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False) -> List[Quadrilateral]: + async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: OcrConfig, verbose: bool = False) -> List[Quadrilateral]: pass diff --git a/manga_translator/ocr/model_32px.py b/manga_translator/ocr/model_32px.py index 6c1b1193f..829fb31ac 100644 --- a/manga_translator/ocr/model_32px.py +++ b/manga_translator/ocr/model_32px.py @@ -11,6 +11,7 @@ import torch.nn as nn import torch.nn.functional as F +from manga_translator.config import OcrConfig from .common import OfflineOCR from ..utils import TextBlock, Quadrilateral, chunks from ..utils.bubble import is_ignore @@ -55,10 +56,10 @@ async def _load(self, device: str): async def _unload(self): del self.model - async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False) -> List[TextBlock]: + async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], config: OcrConfig, verbose: bool = False) -> List[TextBlock]: text_height = 32 max_chunk_size = 16 - ignore_bubble = args.get('ignore_bubble', 0) + ignore_bubble = config.ignore_bubble quadrilaterals = list(self._generate_text_direction(textlines)) region_imgs = [q.get_transformed_region(image, d, text_height) for q, d in quadrilaterals] diff --git a/manga_translator/ocr/model_48px.py b/manga_translator/ocr/model_48px.py index fcd45eb03..029788af4 100644 --- a/manga_translator/ocr/model_48px.py +++ b/manga_translator/ocr/model_48px.py @@ -12,6 +12,7 @@ import torch.nn as nn import torch.nn.functional as F +from manga_translator.config import OcrConfig from .xpos_relative_position import XPOS # Roformer with Xpos and Local Attention ViT @@ -63,7 +64,7 @@ async def _load(self, device: str): async def _unload(self): del self.model - async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False, ignore_bubble: int = 0) -> List[TextBlock]: + async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], config: OcrConfig, verbose: bool = False, ignore_bubble: int = 0) -> List[TextBlock]: text_height = 48 max_chunk_size = 16 diff --git a/manga_translator/ocr/model_48px_ctc.py b/manga_translator/ocr/model_48px_ctc.py index d42866e93..062b93e18 100644 --- a/manga_translator/ocr/model_48px_ctc.py +++ b/manga_translator/ocr/model_48px_ctc.py @@ -10,6 +10,7 @@ import torch.nn as nn import torch.nn.functional as F +from manga_translator.config import OcrConfig from .common import OfflineOCR from ..utils import TextBlock, Quadrilateral, AvgMeter, chunks from ..utils.bubble import is_ignore @@ -58,10 +59,10 @@ async def _load(self, device: str): async def _unload(self): del self.model - async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False) -> List[TextBlock]: + async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], config: OcrConfig, verbose: bool = False) -> List[TextBlock]: text_height = 48 max_chunk_size = 16 - ignore_bubble = args.get('ignore_bubble', 0) + ignore_bubble = config.ignore_bubble quadrilaterals = list(self._generate_text_direction(textlines)) region_imgs = [q.get_transformed_region(image, d, text_height) for q, d in quadrilaterals] diff --git a/manga_translator/ocr/model_manga_ocr.py b/manga_translator/ocr/model_manga_ocr.py index ee8ba65ab..34f6146fb 100644 --- a/manga_translator/ocr/model_manga_ocr.py +++ b/manga_translator/ocr/model_manga_ocr.py @@ -12,19 +12,15 @@ from shapely.geometry import Polygon import torch -import torch.nn as nn -import torch.nn.functional as F from manga_ocr import MangaOcr -from .xpos_relative_position import XPOS - from .common import OfflineOCR from .model_48px import OCR +from ..config import OcrConfig from ..textline_merge import split_text_region from ..utils import TextBlock, Quadrilateral, quadrilateral_can_merge_region, chunks from ..utils.generic import AvgMeter -from ..utils.bubble import is_ignore async def merge_bboxes(bboxes: List[Quadrilateral], width: int, height: int) -> Tuple[List[Quadrilateral], int]: # step 1: divide into multiple text region candidates @@ -133,7 +129,7 @@ async def _unload(self): del self.model del self.mocr - async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: dict, verbose: bool = False, ignore_bubble: int = 0) -> List[TextBlock]: + async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], config: OcrConfig, verbose: bool = False, ignore_bubble: int = 0) -> List[TextBlock]: text_height = 48 max_chunk_size = 16 @@ -147,7 +143,7 @@ async def _infer(self, image: np.ndarray, textlines: List[Quadrilateral], args: is_quadrilaterals = True texts = {} - if args.get('use_mocr_merge', False): + if config.use_mocr_merge: merged_textlines, merged_idx = await merge_bboxes(textlines, image.shape[1], image.shape[0]) merged_quadrilaterals = list(self._generate_text_direction(merged_textlines)) else: diff --git a/manga_translator/translators/__init__.py b/manga_translator/translators/__init__.py index f17b5f685..ab2445f3e 100644 --- a/manga_translator/translators/__init__.py +++ b/manga_translator/translators/__init__.py @@ -1,3 +1,5 @@ +from typing import Optional + import py3langid as langid from .common import * @@ -19,7 +21,8 @@ from .sakura import SakuraTranslator from .qwen2 import Qwen2Translator, Qwen2BigTranslator from .groq import GroqTranslator -from ..config import Translator +from .. import Context +from ..config import Translator, TranslatorConfig OFFLINE_TRANSLATORS = { Translator.offline: SelectiveOfflineTranslator, @@ -103,7 +106,7 @@ async def prepare(chain: TranslatorChain): await translator.download() # TODO: Optionally take in strings instead of TranslatorChain for simplicity -async def dispatch(chain: TranslatorChain, queries: List[str], use_mtpe: bool = False, args = None, device: str = 'cpu') -> List[str]: +async def dispatch(chain: TranslatorChain, queries: List[str], translator_config: Optional[TranslatorConfig] = None, use_mtpe: bool = False, args:Optional[Context] = None, device: str = 'cpu') -> List[str]: if not queries: return queries @@ -118,7 +121,8 @@ async def dispatch(chain: TranslatorChain, queries: List[str], use_mtpe: bool = translator = get_translator(chain.langs[0]) if isinstance(translator, OfflineTranslator): await translator.load('auto', chain.target_lang, device) - translator.parse_args(args) + if translator_config: + translator.parse_args(translator_config) queries = await translator.translate('auto', chain.target_lang, queries, use_mtpe) return queries if args is not None: @@ -127,7 +131,8 @@ async def dispatch(chain: TranslatorChain, queries: List[str], use_mtpe: bool = translator = get_translator(key) if isinstance(translator, OfflineTranslator): await translator.load('auto', tgt_lang, device) - translator.parse_args(args) + if translator_config: + translator.parse_args(translator_config) queries = await translator.translate('auto', tgt_lang, queries, use_mtpe) if args is not None: args['translations'][tgt_lang] = queries diff --git a/manga_translator/translators/chatgpt.py b/manga_translator/translators/chatgpt.py index b65e6461b..7763e0b41 100644 --- a/manga_translator/translators/chatgpt.py +++ b/manga_translator/translators/chatgpt.py @@ -1,4 +1,7 @@ import re + +from ..config import TranslatorConfig + try: import openai except ImportError: @@ -71,8 +74,8 @@ def __init__(self, check_openai_key = True): self.token_count_last = 0 self.config = None - def parse_args(self, args): - self.config = args.gpt_config + def parse_args(self, args: TranslatorConfig): + self.config = args.chatgpt_config def _config_get(self, key: str, default=None): if not self.config: diff --git a/manga_translator/translators/deepseek.py b/manga_translator/translators/deepseek.py index 7a108d4d8..7c712d7e3 100644 --- a/manga_translator/translators/deepseek.py +++ b/manga_translator/translators/deepseek.py @@ -1,4 +1,7 @@ import re + +from ..config import TranslatorConfig + try: import openai except ImportError: @@ -94,8 +97,8 @@ def __init__(self, check_openai_key=True): self.token_count_last = 0 self.config = None - def parse_args(self, args): - self.config = args.gpt_config + def parse_args(self, args: TranslatorConfig): + self.config = args.chatgpt_config def _config_get(self, key: str, default=None): if not self.config: diff --git a/manga_translator/translators/groq.py b/manga_translator/translators/groq.py index f9218b065..2de7becae 100644 --- a/manga_translator/translators/groq.py +++ b/manga_translator/translators/groq.py @@ -1,11 +1,8 @@ import groq -import asyncio -import time -import json -import re import os from typing import List +from . import TranslatorChain from .common import CommonTranslator, MissingAPIKeyException from .keys import GROQ_API_KEY @@ -64,8 +61,9 @@ def __init__(self, check_groq_key=True): - def parse_args(self, args): - self.config = args.groq_config + def parse_args(self, args: TranslatorChain): + #todo: is nver set + self.config = None def _config_get(self, key: str, default=None): if not self.config: diff --git a/manga_translator/utils/inference.py b/manga_translator/utils/inference.py index e18ae3b04..47fef6b01 100644 --- a/manga_translator/utils/inference.py +++ b/manga_translator/utils/inference.py @@ -19,6 +19,7 @@ get_filename_from_url, ) from .log import get_logger +from ..config import TranslatorConfig class InfererModule(ABC): @@ -26,7 +27,7 @@ def __init__(self): self.logger = get_logger(self.__class__.__name__) super().__init__() - def parse_args(self, args: Context): + def parse_args(self, args: TranslatorConfig): """May be overwritten by super classes to parse commandline arguments""" pass From f8996c5426ec718731a91aeb2dd550b2ce6c6419 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sun, 17 Nov 2024 12:21:26 +0100 Subject: [PATCH 11/75] fix imports --- manga_translator/config.py | 5 +++-- manga_translator/manga_translator.py | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/manga_translator/config.py b/manga_translator/config.py index aea9f0141..b8e4c81cb 100644 --- a/manga_translator/config.py +++ b/manga_translator/config.py @@ -1,12 +1,13 @@ import re -from enum import Enum, IntEnum +from enum import IntEnum from omegaconf import OmegaConf from pydantic import BaseModel from typing import Optional -from manga_translator import TranslatorChain, hex2rgb from manga_translator.args import translator_chain +from manga_translator.translators import TranslatorChain +from manga_translator.utils import hex2rgb class Renderer(IntEnum): diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 57ab5eac7..13ad6860a 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -1,5 +1,4 @@ import cv2 -from omegaconf import OmegaConf import langcodes import langdetect import os From e23510cc935092ad9f6e0e905962a6cde1fe5ef9 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sun, 17 Nov 2024 12:24:28 +0100 Subject: [PATCH 12/75] fix stuff i forgot --- manga_translator/manga_translator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 13ad6860a..746a59be7 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -352,8 +352,8 @@ async def _run_textline_merge(self, config: Config, ctx: Context): elif langcodes.tag_distance(region.source_lang, config.translator.target_lang) == 0: logger.info('Reason: Text language matches the target language and no_text_lang_skip is False.') else: - if ctx.font_color_fg or ctx.font_color_bg: - if ctx.font_color_bg: + if config.render.font_color_fg or config.render.font_color_bg: + if config.render.font_color_bg: region.adjust_bg_color = False new_text_regions.append(region) text_regions = new_text_regions @@ -502,7 +502,7 @@ async def _run_mask_refinement(self, config: Config, ctx: Context): config.mask_dilation_offset, config.detector.ignore_bubble, self.verbose,self.kernel_size) async def _run_inpainting(self, config: Config, ctx: Context): - return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, config.inpainter.inpainting_size, self.device, + return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, config.inpainter, self.device, self.verbose) async def _run_text_rendering(self, config: Config, ctx: Context): From fa6cd8e90c4fe5714e1e8057e1687dfe1fbc5e10 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sun, 17 Nov 2024 12:40:16 +0100 Subject: [PATCH 13/75] convert str to enum in translatorchain --- manga_translator/config.py | 7 ++++++- manga_translator/translators/__init__.py | 7 ++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/manga_translator/config.py b/manga_translator/config.py index b8e4c81cb..fef1ee371 100644 --- a/manga_translator/config.py +++ b/manga_translator/config.py @@ -58,6 +58,7 @@ class Ocr(IntEnum): ocr48px = 1 ocr48px_ctc = 2 mocr = 3 + class Translator(IntEnum): youdao = 0 baidu = 1 @@ -83,6 +84,10 @@ class Translator(IntEnum): mbart50 = 21 qwen2 = 22 qwen2_big = 23 + + def __str__(self): + return self.name + class Upscaler: waifu2x = 0 esrgan = 1 @@ -177,7 +182,7 @@ def translator_gen(self): trans.target_lang = trans.langs[-1] self._translator_gen = trans else: - self._translator_gen = TranslatorChain(f'{self.translator}:{self.target_lang}') + self._translator_gen = TranslatorChain(f'{str(self.translator)}:{self.target_lang}') return self._translator_gen @property diff --git a/manga_translator/translators/__init__.py b/manga_translator/translators/__init__.py index ab2445f3e..0ac0c00c6 100644 --- a/manga_translator/translators/__init__.py +++ b/manga_translator/translators/__init__.py @@ -57,7 +57,7 @@ } translator_cache = {} -def get_translator(key: str, *args, **kwargs) -> CommonTranslator: +def get_translator(key: Translator, *args, **kwargs) -> CommonTranslator: if key not in TRANSLATORS: raise ValueError(f'Could not find translator for: "{key}". Choose from the following: %s' % ','.join(TRANSLATORS)) if not translator_cache.get(key): @@ -80,11 +80,12 @@ def __init__(self, string: str): self.target_lang = None for g in string.split(';'): trans, lang = g.split(':') - if trans not in TRANSLATORS: + translator = Translator[trans] + if translator not in TRANSLATORS: raise ValueError(f'Invalid choice: %s (choose from %s)' % (trans, ', '.join(map(repr, TRANSLATORS)))) if lang not in VALID_LANGUAGES: raise ValueError(f'Invalid choice: %s (choose from %s)' % (lang, ', '.join(map(repr, VALID_LANGUAGES)))) - self.chain.append((trans, lang)) + self.chain.append((translator, lang)) self.translators, self.langs = list(zip(*self.chain)) def has_offline(self) -> bool: From 85d6739b98199afa95b581341bfc67d57fe67fa2 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Mon, 18 Nov 2024 19:44:19 +0100 Subject: [PATCH 14/75] circular import fix --- manga_translator/args.py | 9 --- manga_translator/config.py | 70 +++++++++++++++++++----- manga_translator/mode/local.py | 44 +++++++++------ manga_translator/translators/__init__.py | 36 +----------- manga_translator/translators/groq.py | 3 +- manga_translator/utils/inference.py | 1 - 6 files changed, 87 insertions(+), 76 deletions(-) diff --git a/manga_translator/args.py b/manga_translator/args.py index 2eb72c91d..c35e02f8a 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -51,15 +51,6 @@ def dir_path(string): # return string # return _func -def translator_chain(string): - try: - return TranslatorChain(string) - except ValueError as e: - raise argparse.ArgumentTypeError(e) - except Exception: - raise argparse.ArgumentTypeError(f'Invalid translator_chain value: "{string}". Example usage: --translator "google:sugoi" -l "JPN:ENG"') - - class HelpFormatter(argparse.HelpFormatter): INDENT_INCREMENT = 2 MAX_HELP_POSITION = 24 diff --git a/manga_translator/config.py b/manga_translator/config.py index fef1ee371..428685b82 100644 --- a/manga_translator/config.py +++ b/manga_translator/config.py @@ -1,14 +1,58 @@ +import argparse import re from enum import IntEnum -from omegaconf import OmegaConf -from pydantic import BaseModel from typing import Optional -from manga_translator.args import translator_chain -from manga_translator.translators import TranslatorChain -from manga_translator.utils import hex2rgb +from omegaconf import OmegaConf + +# TODO: Refactor +class TranslatorChain: + def __init__(self, string: str): + """ + Parses string in form 'trans1:lang1;trans2:lang2' into chains, + which will be executed one after another when passed to the dispatch function. + """ + from manga_translator.translators import TRANSLATORS, VALID_LANGUAGES + if not string: + raise Exception('Invalid translator chain') + self.chain = [] + self.target_lang = None + for g in string.split(';'): + trans, lang = g.split(':') + translator = Translator[trans] + if translator not in TRANSLATORS: + raise ValueError(f'Invalid choice: %s (choose from %s)' % (trans, ', '.join(map(repr, TRANSLATORS)))) + if lang not in VALID_LANGUAGES: + raise ValueError(f'Invalid choice: %s (choose from %s)' % (lang, ', '.join(map(repr, VALID_LANGUAGES)))) + self.chain.append((translator, lang)) + self.translators, self.langs = list(zip(*self.chain)) + + def has_offline(self) -> bool: + """ + Returns True if the chain contains offline translators. + """ + from manga_translator.translators import OFFLINE_TRANSLATORS + return any(translator in OFFLINE_TRANSLATORS for translator in self.translators) + + def __eq__(self, __o: object) -> bool: + if type(__o) is str: + return __o == self.translators[0] + return super.__eq__(self, __o) + + +def translator_chain(string): + try: + return TranslatorChain(string) + except ValueError as e: + raise argparse.ArgumentTypeError(e) + except Exception: + raise argparse.ArgumentTypeError(f'Invalid translator_chain value: "{string}". Example usage: --translator "google:sugoi" -l "JPN:ENG"') + +def hex2rgb(h): + h = h.lstrip('#') + return tuple(int(h[i:i+2], 16) for i in (0, 2, 4)) class Renderer(IntEnum): default = 0 @@ -93,7 +137,7 @@ class Upscaler: esrgan = 1 upscler4xultrasharp = 2 -class RenderConfig(BaseModel): +class RenderConfig: renderer: Renderer = Renderer.default """Render english text translated from manga with some additional typesetting. Ignores some other argument options""" alignment: Alignment = Alignment.auto @@ -145,7 +189,7 @@ def font_color_bg(self): f'Invalid --font-color value: {self.font_color}. Use a hex value such as FF0000') return self._font_color_bg -class UpscaleConfig(BaseModel): +class UpscaleConfig: upscaler: Upscaler = Upscaler.esrgan """Upscaler to use. --upscale-ratio has to be set for it to take effect""" revert_upscaling: bool = False @@ -153,7 +197,7 @@ class UpscaleConfig(BaseModel): upscale_ratio: Optional[int] = None """Image upscale ratio applied before detection. Can improve text detection.""" -class TranslatorConfig(BaseModel): +class TranslatorConfig: translator: Translator = Translator.sugoi """Language translator to use""" target_lang: str = 'ENG' #todo: validate VALID_LANGUAGES #todo: convert to enum @@ -193,7 +237,7 @@ def chatgpt_config(self): return self._gpt_config -class DetectorConfig(BaseModel): +class DetectorConfig: """""" detector: Detector =Detector.default """"Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it""" @@ -214,7 +258,7 @@ class DetectorConfig(BaseModel): unclip_ratio: float = 2.3 """How much to extend text skeleton to form bounding box""" -class InpainterConfig(BaseModel): +class InpainterConfig: inpainter: Inpainter = Inpainter.lama_large """Inpainting model to use""" inpainting_size: int = 2048 @@ -222,7 +266,7 @@ class InpainterConfig(BaseModel): inpainting_precision: InpaintPrecision = InpaintPrecision.fp32 """Inpainting precision for lama, use bf16 while you can.""" -class ColorizerConfig(BaseModel): +class ColorizerConfig: colorization_size: int = 576 """Size of image used for colorization. Set to -1 to use full image size""" denoise_sigma: int = 30 @@ -230,7 +274,7 @@ class ColorizerConfig(BaseModel): colorizer: Colorizer = Colorizer.none """Colorization model to use.""" -class OcrConfig(BaseModel): +class OcrConfig: use_mocr_merge: bool = False """Use bbox merge when Manga OCR inference.""" ocr: Ocr = Ocr.ocr48px @@ -240,7 +284,7 @@ class OcrConfig(BaseModel): ignore_bubble: int = 0 """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles""" -class Config(BaseModel): +class Config: # unclear pre_dict: Optional[str] = None post_dict: Optional[str] = None diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py index 67e67ebb5..dc395cf80 100644 --- a/manga_translator/mode/local.py +++ b/manga_translator/mode/local.py @@ -3,8 +3,7 @@ from PIL import Image -from manga_translator import MangaTranslator, logger, Context, TranslationInterrupt -from ..manga_translator import _preprocess_params +from manga_translator import MangaTranslator, logger, Context, TranslationInterrupt, Config from ..save import save_result from ..translators import ( LanguageUnsupportedException, @@ -14,6 +13,17 @@ class MangaTranslatorLocal(MangaTranslator): + def __init__(self, params: dict = None): + super().__init__(params) + self.attempts = params.get('attempts', None) + self.skip_no_text = params.get('skip_no_text', False) + self.text_output_file = params.get('text_output_file', None) + self.save_quality = params.get('save_quality', None) + self.text_regions = params.get('text_regions', None) + self.save_text_file = params.get('save_text_file', None) + self.save_text = params.get('save_text', None) + self.prep_manual = params.get('prep_manual', None) + async def translate_path(self, path: str, dest: str = None, params: dict[str, Union[int, str]] = None): """ Translates an image or folder (recursively) specified through the path. @@ -79,7 +89,7 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un else: logger.info(f'Done. Translated {translated_count} image{"" if translated_count == 1 else "s"}') - async def translate_file(self, path: str, dest: str, params: dict): + async def translate_file(self, path: str, dest: str, params: dict, config: Config): if not params.get('overwrite') and os.path.exists(dest): logger.info( f'Skipping as already translated: "{dest}". Use --overwrite to overwrite existing translations.') @@ -91,15 +101,14 @@ async def translate_file(self, path: str, dest: str, params: dict): # Turn dict to context to make values also accessible through params. params = params or {} ctx = Context(**params) - _preprocess_params(ctx) attempts = 0 - while ctx.attempts == -1 or attempts < ctx.attempts + 1: + while self.attempts == -1 or attempts < self.attempts + 1: if attempts > 0: logger.info(f'Retrying translation! Attempt {attempts}' - + (f' of {ctx.attempts}' if ctx.attempts != -1 else '')) + + (f' of {self.attempts}' if self.attempts != -1 else '')) try: - return await self._translate_file(path, dest, ctx) + return await self._translate_file(path, dest, config, ctx) except TranslationInterrupt: break @@ -108,7 +117,7 @@ async def translate_file(self, path: str, dest: str, params: dict): await self._report_progress('error-lang', True) else: await self._report_progress('error', True) - if not self.ignore_errors and not (ctx.attempts == -1 or attempts < ctx.attempts): + if not self.ignore_errors and not (self.attempts == -1 or attempts < self.attempts): raise else: logger.error(f'{e.__class__.__name__}: {e}', @@ -116,12 +125,12 @@ async def translate_file(self, path: str, dest: str, params: dict): attempts += 1 return False - async def _translate_file(self, path: str, dest: str, ctx: Context) -> bool: + async def _translate_file(self, path: str, dest: str, config: Config, ctx: Context) -> bool: if path.endswith('.txt'): with open(path, 'r') as f: queries = f.read().split('\n') translated_sentences = \ - await dispatch_translation(ctx.translator, queries, ctx.use_mtpe, ctx, + await dispatch_translation(config.translator.translator_gen, queries, self.use_mtpe, ctx, 'cpu' if self._gpu_limited_memory else self.device) p, ext = os.path.splitext(dest) if ext != '.txt': @@ -142,26 +151,27 @@ async def _translate_file(self, path: str, dest: str, ctx: Context) -> bool: logger.warn(f'Failed to open image: {path}') return False - ctx = await self.translate(img, ctx) + ctx = await self.translate(img, config) result = ctx.result # Save result - if ctx.skip_no_text and not ctx.text_regions: + if self.skip_no_text and not ctx.text_regions: logger.debug('Not saving due to --skip-no-text') return True if result: logger.info(f'Saving "{dest}"') + ctx.save_quality = self.save_quality save_result(result, dest, ctx) await self._report_progress('saved', True) - if ctx.save_text or ctx.save_text_file or ctx.prep_manual: - if ctx.prep_manual: + if self.save_text or self.save_text_file or self.prep_manual: + if self.prep_manual: # Save original image next to translated p, ext = os.path.splitext(dest) img_filename = p + '-orig' + ext img_path = os.path.join(os.path.dirname(dest), img_filename) - img.save(img_path, quality=ctx.save_quality) - if ctx.text_regions: + img.save(img_path, quality=self.save_quality) + if self.text_regions: self._save_text_to_file(path, ctx) return True return False @@ -194,7 +204,7 @@ def identify_colors(fg_rgb: List[int]): s += f'coords: {list(line.ravel())}\n' s += '\n' - text_output_file = ctx.text_output_file + text_output_file = self.text_output_file if not text_output_file: text_output_file = os.path.splitext(image_path)[0] + '_translations.txt' diff --git a/manga_translator/translators/__init__.py b/manga_translator/translators/__init__.py index 0ac0c00c6..36005fea6 100644 --- a/manga_translator/translators/__init__.py +++ b/manga_translator/translators/__init__.py @@ -21,8 +21,8 @@ from .sakura import SakuraTranslator from .qwen2 import Qwen2Translator, Qwen2BigTranslator from .groq import GroqTranslator -from .. import Context -from ..config import Translator, TranslatorConfig +from ..config import Translator, TranslatorConfig, TranslatorChain +from ..utils import Context OFFLINE_TRANSLATORS = { Translator.offline: SelectiveOfflineTranslator, @@ -67,38 +67,6 @@ def get_translator(key: Translator, *args, **kwargs) -> CommonTranslator: prepare_selective_translator(get_translator) -# TODO: Refactor -class TranslatorChain: - def __init__(self, string: str): - """ - Parses string in form 'trans1:lang1;trans2:lang2' into chains, - which will be executed one after another when passed to the dispatch function. - """ - if not string: - raise Exception('Invalid translator chain') - self.chain = [] - self.target_lang = None - for g in string.split(';'): - trans, lang = g.split(':') - translator = Translator[trans] - if translator not in TRANSLATORS: - raise ValueError(f'Invalid choice: %s (choose from %s)' % (trans, ', '.join(map(repr, TRANSLATORS)))) - if lang not in VALID_LANGUAGES: - raise ValueError(f'Invalid choice: %s (choose from %s)' % (lang, ', '.join(map(repr, VALID_LANGUAGES)))) - self.chain.append((translator, lang)) - self.translators, self.langs = list(zip(*self.chain)) - - def has_offline(self) -> bool: - """ - Returns True if the chain contains offline translators. - """ - return any(translator in OFFLINE_TRANSLATORS for translator in self.translators) - - def __eq__(self, __o: object) -> bool: - if type(__o) is str: - return __o == self.translators[0] - return super.__eq__(self, __o) - async def prepare(chain: TranslatorChain): for key, tgt_lang in chain.chain: translator = get_translator(key) diff --git a/manga_translator/translators/groq.py b/manga_translator/translators/groq.py index 2de7becae..7b016ed92 100644 --- a/manga_translator/translators/groq.py +++ b/manga_translator/translators/groq.py @@ -2,7 +2,6 @@ import os from typing import List -from . import TranslatorChain from .common import CommonTranslator, MissingAPIKeyException from .keys import GROQ_API_KEY @@ -61,7 +60,7 @@ def __init__(self, check_groq_key=True): - def parse_args(self, args: TranslatorChain): + def parse_args(self, args): #todo: is nver set self.config = None diff --git a/manga_translator/utils/inference.py b/manga_translator/utils/inference.py index 47fef6b01..86c8e8d74 100644 --- a/manga_translator/utils/inference.py +++ b/manga_translator/utils/inference.py @@ -11,7 +11,6 @@ from .generic import ( BASE_PATH, - Context, download_url_with_progressbar, prompt_yes_no, replace_prefix, From 2d87f49e6a23905e605e568bed71db3ef7f81990 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Mon, 18 Nov 2024 20:16:33 +0100 Subject: [PATCH 15/75] load empty config --- manga_translator/args.py | 8 ++++---- manga_translator/config.py | 34 ++++++++++++++++++---------------- manga_translator/mode/local.py | 6 ++++-- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/manga_translator/args.py b/manga_translator/args.py index c35e02f8a..761c057bd 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -2,6 +2,8 @@ import os from urllib.parse import unquote +from torch.optim.optimizer import required + from .detection import DETECTORS from .ocr import OCRS from .inpainting import INPAINTERS @@ -88,12 +90,13 @@ def _format_action_invocation(self, action: argparse.Action) -> str: parser.add_argument('--font-path', default='', type=file_path, help='Path to font file') parser.add_argument('--pre-dict', default=None, type=file_path, help='Path to the pre-translation dictionary file') parser.add_argument('--post-dict', default=None, type=file_path, help='Path to the post-translation dictionary file') +parser.add_argument('--kernel-size', default=3, type=int, help='Set the convolution kernel size of the text erasure area to completely clean up text residues') subparsers = parser.add_subparsers(dest='mode', required=True, help='Mode of operation') # Batch mode parser_batch = subparsers.add_parser('local', help='Run in batch translation mode') -parser_batch.add_argument('-i', '--input', required=True, type=dir_path, help='Path to an image folder') +parser_batch.add_argument('-i', '--input', required=True, type=path, nargs='+', help='Path to an image folder') parser_batch.add_argument('-o', '--dest', default='', type=str, help='Path to the destination folder for translated images') parser_batch.add_argument('-f', '--format', default=None, choices=OUTPUT_FORMATS, help='Output format of the translation.') parser_batch.add_argument('--overwrite', action='store_true', help='Overwrite already translated images') @@ -130,6 +133,3 @@ def _format_action_invocation(self, action: argparse.Action) -> str: parser_api.add_argument('--port', default=5003, type=int, help='Port for API service') parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication') - -# Generares dict with a default value for each argument -DEFAULT_ARGS = vars(parser.parse_args([])) diff --git a/manga_translator/config.py b/manga_translator/config.py index 428685b82..65df9e5f7 100644 --- a/manga_translator/config.py +++ b/manga_translator/config.py @@ -5,6 +5,8 @@ from typing import Optional from omegaconf import OmegaConf +from pydantic import BaseModel + # TODO: Refactor class TranslatorChain: @@ -132,12 +134,12 @@ class Translator(IntEnum): def __str__(self): return self.name -class Upscaler: +class Upscaler(IntEnum): waifu2x = 0 esrgan = 1 upscler4xultrasharp = 2 -class RenderConfig: +class RenderConfig(BaseModel): renderer: Renderer = Renderer.default """Render english text translated from manga with some additional typesetting. Ignores some other argument options""" alignment: Alignment = Alignment.auto @@ -189,7 +191,7 @@ def font_color_bg(self): f'Invalid --font-color value: {self.font_color}. Use a hex value such as FF0000') return self._font_color_bg -class UpscaleConfig: +class UpscaleConfig(BaseModel): upscaler: Upscaler = Upscaler.esrgan """Upscaler to use. --upscale-ratio has to be set for it to take effect""" revert_upscaling: bool = False @@ -197,7 +199,7 @@ class UpscaleConfig: upscale_ratio: Optional[int] = None """Image upscale ratio applied before detection. Can improve text detection.""" -class TranslatorConfig: +class TranslatorConfig(BaseModel): translator: Translator = Translator.sugoi """Language translator to use""" target_lang: str = 'ENG' #todo: validate VALID_LANGUAGES #todo: convert to enum @@ -237,7 +239,7 @@ def chatgpt_config(self): return self._gpt_config -class DetectorConfig: +class DetectorConfig(BaseModel): """""" detector: Detector =Detector.default """"Text detector used for creating a text mask from an image, DO NOT use craft for manga, it\'s not designed for it""" @@ -258,7 +260,7 @@ class DetectorConfig: unclip_ratio: float = 2.3 """How much to extend text skeleton to form bounding box""" -class InpainterConfig: +class InpainterConfig(BaseModel): inpainter: Inpainter = Inpainter.lama_large """Inpainting model to use""" inpainting_size: int = 2048 @@ -266,7 +268,7 @@ class InpainterConfig: inpainting_precision: InpaintPrecision = InpaintPrecision.fp32 """Inpainting precision for lama, use bf16 while you can.""" -class ColorizerConfig: +class ColorizerConfig(BaseModel): colorization_size: int = 576 """Size of image used for colorization. Set to -1 to use full image size""" denoise_sigma: int = 30 @@ -274,7 +276,7 @@ class ColorizerConfig: colorizer: Colorizer = Colorizer.none """Colorization model to use.""" -class OcrConfig: +class OcrConfig(BaseModel): use_mocr_merge: bool = False """Use bbox merge when Manga OCR inference.""" ocr: Ocr = Ocr.ocr48px @@ -284,7 +286,7 @@ class OcrConfig: ignore_bubble: int = 0 """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles""" -class Config: +class Config(BaseModel): # unclear pre_dict: Optional[str] = None post_dict: Optional[str] = None @@ -292,19 +294,19 @@ class Config: # json filter_text: Optional[str] = None """Filter regions by their text with a regex. Example usage: '.*badtext.*'""" - render: RenderConfig + render: RenderConfig = RenderConfig() """render configs""" - upscale: UpscaleConfig + upscale: UpscaleConfig = UpscaleConfig() """upscaler configs""" - translator: TranslatorConfig + translator: TranslatorConfig = TranslatorConfig() """tanslator configs""" - detector: DetectorConfig + detector: DetectorConfig = DetectorConfig() """detector configs""" - colorizer: ColorizerConfig + colorizer: ColorizerConfig = ColorizerConfig() """colorizer configs""" - inpainter: InpainterConfig + inpainter: InpainterConfig = InpainterConfig() """inpainter configs""" - ocr: OcrConfig + ocr: OcrConfig = OcrConfig() """Ocr configs""" # ? kernel_size: int = 3 diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py index dc395cf80..f81673f83 100644 --- a/manga_translator/mode/local.py +++ b/manga_translator/mode/local.py @@ -1,3 +1,4 @@ +import json import os from typing import Union, List @@ -28,6 +29,7 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un """ Translates an image or folder (recursively) specified through the path. """ + config = Config(**json.loads('{}')) if not os.path.exists(path): raise FileNotFoundError(path) path = os.path.abspath(os.path.expanduser(path)) @@ -58,7 +60,7 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un else: p, ext = os.path.splitext(dest) _dest = f'{p}.{file_ext or ext[1:]}' - await self.translate_file(path, _dest, params) + await self.translate_file(path, _dest, params,config) elif os.path.isdir(path): # Determine destination folder path @@ -82,7 +84,7 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un p, ext = os.path.splitext(output_dest) output_dest = f'{p}.{file_ext or ext[1:]}' - if await self.translate_file(file_path, output_dest, params): + if await self.translate_file(file_path, output_dest, params, config): translated_count += 1 if translated_count == 0: logger.info('No further untranslated files found. Use --overwrite to write over existing translations.') From c6b39d3526172c39e2447d4e3acb2b4610fcb0ea Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 13:52:01 +0100 Subject: [PATCH 16/75] fix inpainting --- manga_translator/config.py | 13 ++++++------- manga_translator/inpainting/__init__.py | 3 ++- manga_translator/inpainting/none.py | 4 +++- manga_translator/inpainting/original.py | 4 +++- manga_translator/mode/local.py | 9 ++++++--- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/manga_translator/config.py b/manga_translator/config.py index 65df9e5f7..4c34c5d36 100644 --- a/manga_translator/config.py +++ b/manga_translator/config.py @@ -166,7 +166,8 @@ class RenderConfig(BaseModel): """Line spacing is font_size * this value. Default is 0.01 for horizontal text and 0.2 for vertical.""" font_size: Optional[int] = None """Use fixed font size for rendering""" - + _font_color_fg = None + _font_color_bg = None @property def font_color_fg(self): if self.font_color and not self._font_color_fg: @@ -214,6 +215,8 @@ class TranslatorConfig(BaseModel): """Output of one translator goes in another. Example: --translator-chain "google:JPN;sugoi:ENG".""" selective_translation: Optional[str] = None """Select a translator based on detected language in image. Note the first translation service acts as default if the language isn\'t defined. Example: --translator-chain "google:JPN;sugoi:ENG".'""" + _translator_gen = None + _gpt_config = None @property def translator_gen(self): @@ -261,7 +264,7 @@ class DetectorConfig(BaseModel): """How much to extend text skeleton to form bounding box""" class InpainterConfig(BaseModel): - inpainter: Inpainter = Inpainter.lama_large + inpainter: Inpainter = Inpainter.none """Inpainting model to use""" inpainting_size: int = 2048 """Size of image used for inpainting (too large will result in OOM)""" @@ -287,11 +290,6 @@ class OcrConfig(BaseModel): """The threshold for ignoring text in non bubble areas, with valid values ranging from 1 to 50, does not ignore others. Recommendation 5 to 10. If it is too low, normal bubble areas may be ignored, and if it is too large, non bubble areas may be considered normal bubbles""" class Config(BaseModel): - # unclear - pre_dict: Optional[str] = None - post_dict: Optional[str] = None - - # json filter_text: Optional[str] = None """Filter regions by their text with a regex. Example usage: '.*badtext.*'""" render: RenderConfig = RenderConfig() @@ -313,6 +311,7 @@ class Config(BaseModel): """Set the convolution kernel size of the text erasure area to completely clean up text residues""" mask_dilation_offset: int = 0 """By how much to extend the text mask to remove left-over text pixels of the original image.""" + _filter_text = None @property def re_filter_text(self): diff --git a/manga_translator/inpainting/__init__.py b/manga_translator/inpainting/__init__.py index 52635c39f..9148f7640 100644 --- a/manga_translator/inpainting/__init__.py +++ b/manga_translator/inpainting/__init__.py @@ -38,4 +38,5 @@ async def dispatch(inpainter_key: Inpainter, image: np.ndarray, mask: np.ndarray inpainter = get_inpainter(inpainter_key) if isinstance(inpainter, OfflineInpainter): await inpainter.load(device) - return await inpainter.inpaint(image, mask, config or InpainterConfig(), inpainting_size, verbose) + config = config or InpainterConfig() + return await inpainter.inpaint(image, mask, config, inpainting_size, verbose) diff --git a/manga_translator/inpainting/none.py b/manga_translator/inpainting/none.py index 38d1c2a4c..39ef7090f 100644 --- a/manga_translator/inpainting/none.py +++ b/manga_translator/inpainting/none.py @@ -1,10 +1,12 @@ import numpy as np from .common import CommonInpainter +from ..config import InpainterConfig + class NoneInpainter(CommonInpainter): - async def _inpaint(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray: + async def _inpaint(self, image: np.ndarray, mask: np.ndarray, config: InpainterConfig, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray: img_inpainted = np.copy(image) img_inpainted[mask > 0] = np.array([255, 255, 255], np.uint8) return img_inpainted diff --git a/manga_translator/inpainting/original.py b/manga_translator/inpainting/original.py index 464a1f766..c4639a2c1 100644 --- a/manga_translator/inpainting/original.py +++ b/manga_translator/inpainting/original.py @@ -1,8 +1,10 @@ import numpy as np from .common import CommonInpainter +from ..config import InpainterConfig + class OriginalInpainter(CommonInpainter): - async def _inpaint(self, image: np.ndarray, mask: np.ndarray, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray: + async def _inpaint(self, image: np.ndarray, mask: np.ndarray, config: InpainterConfig, inpainting_size: int = 1024, verbose: bool = False) -> np.ndarray: return np.copy(image) diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py index f81673f83..55fe76e8c 100644 --- a/manga_translator/mode/local.py +++ b/manga_translator/mode/local.py @@ -83,9 +83,12 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un output_dest = replace_prefix(file_path, path, _dest) p, ext = os.path.splitext(output_dest) output_dest = f'{p}.{file_ext or ext[1:]}' - - if await self.translate_file(file_path, output_dest, params, config): - translated_count += 1 + try: + if await self.translate_file(file_path, output_dest, params, config): + translated_count += 1 + except Exception as e: + logger.error(e) + raise e if translated_count == 0: logger.info('No further untranslated files found. Use --overwrite to write over existing translations.') else: From 43625c6e5c2f90e2d3e1fe5e07ec890a3927fce6 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 13:52:13 +0100 Subject: [PATCH 17/75] fix inpainting --- manga_translator/manga_translator.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 746a59be7..9bc471454 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -276,7 +276,7 @@ async def _translate(self, config: Config, ctx: Context) -> Context: ctx.mask = await self._run_mask_refinement(config, ctx) if self.verbose: - inpaint_input_img = await dispatch_inpainting(Inpainter.none, ctx.img_rgb, ctx.mask, config.inpainter, + inpaint_input_img = await dispatch_inpainting(Inpainter.none, ctx.img_rgb, ctx.mask, config.inpainter,config.inpainter.inpainting_size, self.using_gpu, self.verbose) cv2.imwrite(self._result_path('inpaint_input.png'), cv2.cvtColor(inpaint_input_img, cv2.COLOR_RGB2BGR)) cv2.imwrite(self._result_path('mask_final.png'), ctx.mask) @@ -284,12 +284,10 @@ async def _translate(self, config: Config, ctx: Context) -> Context: # -- Inpainting await self._report_progress('inpainting') ctx.img_inpainted = await self._run_inpainting(config, ctx) - ctx.gimp_mask = np.dstack((cv2.cvtColor(ctx.img_inpainted, cv2.COLOR_RGB2BGR), ctx.mask)) if self.verbose: cv2.imwrite(self._result_path('inpainted.png'), cv2.cvtColor(ctx.img_inpainted, cv2.COLOR_RGB2BGR)) - # -- Rendering await self._report_progress('rendering') ctx.img_rendered = await self._run_text_rendering(config, ctx) @@ -499,10 +497,10 @@ async def _run_text_translation(self, config: Config, ctx: Context): async def _run_mask_refinement(self, config: Config, ctx: Context): return await dispatch_mask_refinement(ctx.text_regions, ctx.img_rgb, ctx.mask_raw, 'fit_text', - config.mask_dilation_offset, config.detector.ignore_bubble, self.verbose,self.kernel_size) + config.mask_dilation_offset, config.ocr.ignore_bubble, self.verbose,self.kernel_size) async def _run_inpainting(self, config: Config, ctx: Context): - return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, config.inpainter, self.device, + return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, self.device, self.verbose) async def _run_text_rendering(self, config: Config, ctx: Context): @@ -515,7 +513,7 @@ async def _run_text_rendering(self, config: Config, ctx: Context): else: output = await dispatch_rendering(ctx.img_inpainted, ctx.text_regions, self.font_path, config.render.font_size, config.render.font_size_offset, - config.render.font_size_minimum, not config.render.no_hyphenation, config.render.render_mask, config.render.line_spacing) + config.render.font_size_minimum, not config.render.no_hyphenation, ctx.render_mask, config.render.line_spacing) return output def _result_path(self, path: str) -> str: From b73d1dd410b88e23cb48f5dfc1808c73b315a460 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 14:05:24 +0100 Subject: [PATCH 18/75] load from config --- manga_translator/args.py | 1 + manga_translator/mode/local.py | 12 +++++++++++- manga_translator/mode/web.py | 12 +++++++----- manga_translator/mode/ws.py | 14 +++++++------- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/manga_translator/args.py b/manga_translator/args.py index 761c057bd..e59f514a9 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -107,6 +107,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str: g_batch.add_argument('--save-text-file', default='', type=str, help='Like --save-text but with a specified file path.') parser_batch.add_argument('--prep-manual', action='store_true', help='Prepare for manual typesetting by outputting blank, inpainted images, plus copies of the original for reference') parser_batch.add_argument('--save-quality', default=100, type=int, help='Quality of saved JPEG image, range from 0 to 100 with 100 being best') +parser.add_argument('--config-file', default=None, type=str, help='path to the config file') # Web mode parser_web = subparsers.add_parser('web', help='Run in web service mode') diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py index 55fe76e8c..564efbc0f 100644 --- a/manga_translator/mode/local.py +++ b/manga_translator/mode/local.py @@ -29,12 +29,22 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un """ Translates an image or folder (recursively) specified through the path. """ - config = Config(**json.loads('{}')) if not os.path.exists(path): raise FileNotFoundError(path) path = os.path.abspath(os.path.expanduser(path)) dest = os.path.abspath(os.path.expanduser(dest)) if dest else '' params = params or {} + config_file_path = params.get("config_file", None) + config_content = "{}" + + if config_file_path: + try: + with open(config_file_path, 'r') as file: + config_content = file.read() + except Exception as e: + print("Couldnt read file") + pass + config = Config(**json.loads(config_content)) # Handle format file_ext = params.get('format') diff --git a/manga_translator/mode/web.py b/manga_translator/mode/web.py index 2acc9faf7..3f0ccb889 100644 --- a/manga_translator/mode/web.py +++ b/manga_translator/mode/web.py @@ -3,12 +3,13 @@ import requests -from manga_translator import MangaTranslator, logger, Context +from manga_translator import logger, Context, Config +from manga_translator.mode.local import MangaTranslatorLocal from manga_translator.translators import TRANSLATORS from manga_translator.utils import add_file_logger, remove_file_logger -class MangaTranslatorWeb(MangaTranslator): +class MangaTranslatorWeb(MangaTranslatorLocal): """ Translator client that executes tasks on behalf of the webserver in web_main.py. """ @@ -110,10 +111,11 @@ async def listen(self, translation_params: dict = None): self._params = None self.result_sub_folder = '' - async def _run_text_translation(self, ctx: Context): + async def _run_text_translation(self, config: Config, ctx: Context): # Run machine translation as reference for manual translation (if `--translator=none` is not set) - text_regions = await super()._run_text_translation(ctx) + text_regions = await super()._run_text_translation(config, ctx) + #todo: manual never set if ctx.get('manual', False): logger.info('Waiting for user input from manual translation') requests.post(f'http://{self.host}:{self.port}/request-manual-internal', json={ @@ -142,7 +144,7 @@ async def _run_text_translation(self, ctx: Context): i = i - 1 else: text_regions[i].translation = translation - text_regions[i].target_lang = ctx.translator.langs[-1] + text_regions[i].target_lang = config.translator.translator_gen.langs[-1] i = i + 1 break elif 'cancel' in ret: diff --git a/manga_translator/mode/ws.py b/manga_translator/mode/ws.py index d696058f8..9c6067eb6 100644 --- a/manga_translator/mode/ws.py +++ b/manga_translator/mode/ws.py @@ -7,7 +7,7 @@ import numpy as np from PIL import Image -from manga_translator import logger, Context, MangaTranslator +from manga_translator import logger, Context, MangaTranslator, Config from manga_translator.utils import PriorityLock, Throttler @@ -152,7 +152,7 @@ async def server_process_inner(main_loop, logger_task, session, websocket, task) return True, output is not None - async def server_process(main_loop, session, websocket, task) -> bool: + async def server_process(main_loop, session, websocket, task): logger_task = logger.getChild(f'{task.id}') try: (success, has_translation_mask) = await server_process_inner(main_loop, logger_task, session, websocket, @@ -224,9 +224,9 @@ def server_thread(future, main_loop, server_loop): # create a future that is never done await future - async def _run_text_translation(self, ctx: Context): - coroutine = super()._run_text_translation(ctx) - if ctx.translator.has_offline(): + async def _run_text_translation(self, config: Config, ctx: Context): + coroutine = super()._run_text_translation(config, ctx) + if config.translator.translator_gen.has_offline(): return await coroutine else: task_id = self._task_id @@ -243,10 +243,10 @@ async def _run_text_translation(self, ctx: Context): self._websocket = websocket return result - async def _run_text_rendering(self, ctx: Context): + async def _run_text_rendering(self, config: Config, ctx: Context): render_mask = (ctx.mask >= 127).astype(np.uint8)[:, :, None] - output = await super()._run_text_rendering(ctx) + output = await super()._run_text_rendering(config, ctx) render_mask[np.sum(ctx.img_rgb != output, axis=2) > 0] = 1 ctx.render_mask = render_mask if self.verbose: From f473ed7a197f01aeefe70e603b33a924b42cef0f Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 18:38:34 +0100 Subject: [PATCH 19/75] server --- manga_translator/args.py | 2 +- server/args.py | 9 +++++ server/instance.py | 66 ++++++++++++++++++++++++++++++++++++ server/main.py | 73 ++++++++++++++++++++++++++++++++++++++++ server/myqueue.py | 25 ++++++++++++++ server/sent_data.py | 50 +++++++++++++++++++++++++++ 6 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 server/args.py create mode 100644 server/instance.py create mode 100644 server/main.py create mode 100644 server/myqueue.py create mode 100644 server/sent_data.py diff --git a/manga_translator/args.py b/manga_translator/args.py index e59f514a9..0516c075b 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -129,7 +129,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str: parser_ws.add_argument('--ws-url', default='ws://localhost:5000', type=str, help='Server URL for WebSocket mode') # API mode -parser_api = subparsers.add_parser('api', help='Run in API mode') +parser_api = subparsers.add_parser('shared', help='Run in API mode') parser_api.add_argument('--host', default='127.0.0.1', type=str, help='Host for API service') parser_api.add_argument('--port', default=5003, type=int, help='Port for API service') parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication') diff --git a/server/args.py b/server/args.py new file mode 100644 index 000000000..ac3d92b33 --- /dev/null +++ b/server/args.py @@ -0,0 +1,9 @@ +import argparse + + +def parse_arguments(): + parser = argparse.ArgumentParser(description="Specify host and port for the server.") + parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)') + parser.add_argument('--port', type=int, default=8080, help='The port number (default: 8080)') + + return parser.parse_args() \ No newline at end of file diff --git a/server/instance.py b/server/instance.py new file mode 100644 index 000000000..68260c962 --- /dev/null +++ b/server/instance.py @@ -0,0 +1,66 @@ +from asyncio import Event, Lock +from typing import List, Optional + +from PIL import Image +from pydantic import BaseModel + +from manga_translator import Config +from server.sent_data import fetch_data_stream, NotifyType + + +class ExecutorInstance(BaseModel): + ip: str + port: int + busy: bool = False + + def free_executor(self): + self.busy = False + + async def sent(self, image: Image, config: Config, sender: NotifyType): + await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender) + +class Executors: + def __init__(self): + self.list: List[ExecutorInstance] = [] + self.lock: Lock = Lock() + self.event = Event() + + def register(self, instance: ExecutorInstance): + self.list.append(instance) + + def free_executors(self) -> int: + return len([item for item in self.list if not item.busy]) + + async def _find_instance(self): + while True: + instance = next((x for x in self.list if x.busy == False), None) + if instance is not None: + return instance + #todo: cricial error: warn should never happen + await self.event.wait() + + async def find_executor(self) -> ExecutorInstance: + async with self.lock: # Using async with for lock management + instance = await self._find_instance() + instance.busy = True + return instance + + def free_executor(self, instance: ExecutorInstance): + instance.free_executor() + self.event.set() + self.event.clear() + +def example_notify(a: int, b: Optional[int] = None) -> None: + print(f"Notify called with a={a} and b={b}") + +async def main(): + executor = ExecutorInstance(ip="127.0.0.1", port=5003) + + image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png") + config = Config() + + await executor.sent(image, config, example_notify) + +if __name__ == "__main__": + import asyncio + asyncio.run(main()) \ No newline at end of file diff --git a/server/main.py b/server/main.py new file mode 100644 index 000000000..8da8c2677 --- /dev/null +++ b/server/main.py @@ -0,0 +1,73 @@ +import asyncio +from typing import List, Dict, Optional, Callable + +from fastapi import FastAPI, Request, HTTPException + +from server.instance import ExecutorInstance, Executors +from server.myqueue import TaskQueue +from server.sent_data import NotifyType + +app = FastAPI() +executor_instances: Executors = Executors() +task_queue = TaskQueue() + +@app.post("/register") +async def register_instance(instance: ExecutorInstance, request: Request): + instance.ip = request.client.host + executor_instances.register(instance) + return {"code": 0} + + + +async def wait(task, notify: NotifyType): + """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" + while True: + queue_pos = task_queue.get_pos(task) + if notify: + notify(3, queue_pos) + if queue_pos < executor_instances.free_executors(): + instance = await executor_instances.find_executor() + task_queue.remove(task) + if notify: + notify(4, 0) + result = "" #todo: implement logic + instance.busy = False + instance.event.set() + + if notify: + return + else: + return result + else: + if queue_pos == 0: + raise HTTPException(500, detail="No translator registered") + await task_queue.wait_for_event() +@app.post("/json") +async def json(req: TranslateRequest): + pass + +@app.post("/bytes") +async def bytes(req: TranslateRequest): + pass + +@app.post("/image") +async def image(req: TranslateRequest): + pass + +@app.post("/stream_json") +async def image(req: TranslateRequest): + pass + +@app.post("/stream_bytes") +async def image(req: TranslateRequest): + pass + +@app.post("/stream_image") +async def image(req: TranslateRequest): + pass + +if __name__ == '__main__': + import uvicorn + from args import parse_arguments + args = parse_arguments() + uvicorn.run(app, host=args.host, port=args.port) \ No newline at end of file diff --git a/server/myqueue.py b/server/myqueue.py new file mode 100644 index 000000000..61c1abac3 --- /dev/null +++ b/server/myqueue.py @@ -0,0 +1,25 @@ +import asyncio +from typing import List, Dict + + +class TaskQueue: + def __init__(self): + self.queue: List[Dict] = [] + self.queue_event: asyncio.Event = asyncio.Event() + + def add_task(self, task): + self.queue.append(task) + + def get_pos(self, task): + return self.queue.index(task) + + def update_event(self): + self.queue_event.set() + self.queue_event.clear() + + def remove(self, task): + self.queue.remove(task) + self.update_event() + + async def wait_for_event(self): + await self.queue_event.wait() \ No newline at end of file diff --git a/server/sent_data.py b/server/sent_data.py new file mode 100644 index 000000000..619dca71b --- /dev/null +++ b/server/sent_data.py @@ -0,0 +1,50 @@ +import pickle +from typing import Mapping, Optional, Callable + +import aiohttp +from PIL.Image import Image +from fastapi import HTTPException + +from manga_translator import Config + +NotifyType = Optional[Callable[[int, Optional[int]], None]] + +async def fetch_data_stream(url, image: Image, config: Config, sender: NotifyType, headers: Mapping[str, str] = {}): + attributes = {"image": image, "config": config} + data = pickle.dumps(attributes) + + async with aiohttp.ClientSession() as session: + async with session.post(url, data=data, headers=headers) as response: + if response.status == 200: + await process_stream(response, sender) + else: + raise HTTPException(response.status, detail=response.text()) + + +async def process_stream(response, sender: NotifyType): + buffer = b'' + + async for chunk in response.content.iter_any(): + if chunk: + buffer += chunk + await handle_buffer(buffer, sender) + + +async def handle_buffer(buffer, sender: NotifyType): + while len(buffer) >= 5: + status, expected_size = extract_header(buffer) + + if len(buffer) >= 5 + expected_size: + data = buffer[5:5 + expected_size] + sender(status, data) + buffer = buffer[5 + expected_size:] + else: + break + + +def extract_header(buffer): + """Extract the status and expected size from the buffer.""" + status = int.from_bytes(buffer[0:1], byteorder='big') + expected_size = int.from_bytes(buffer[1:5], byteorder='big') + return status, expected_size + From f8095685d44a1d5f133b0f3ffbfd14f858a93449 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 18:40:02 +0100 Subject: [PATCH 20/75] fix pre & post dict --- manga_translator/manga_translator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 9bc471454..ffd750c82 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -93,6 +93,8 @@ class MangaTranslator: result_sub_folder: str def __init__(self, params: dict = None): + self.pre_dict = params.get('pre_dict', None) + self.post_dict = params.get('post_dict', None) self.font_path = None self.use_mtpe = False self.kernel_size = None @@ -231,7 +233,7 @@ async def _translate(self, config: Config, ctx: Context) -> Context: return await self._revert_upscale(config, ctx) # Apply pre-dictionary after OCR - pre_dict = load_dictionary(config.pre_dict) + pre_dict = load_dictionary(self.pre_dict) pre_replacements = [] for textline in ctx.textlines: original = textline.text @@ -381,7 +383,7 @@ async def _run_text_translation(self, config: Config, ctx: Context): region._direction = config.render.direction # Apply post dictionary after translating - post_dict = load_dictionary(config.post_dict) + post_dict = load_dictionary(self.post_dict) post_replacements = [] for region in ctx.text_regions: original = region.translation From bc7f2fcf528834d9b80e6f21f41bd988828b6503 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 19:04:12 +0100 Subject: [PATCH 21/75] python sucks --- execute_test.py | 5 ++++- server/instance.py | 12 ++++++++---- server/sent_data.py | 7 +++++-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/execute_test.py b/execute_test.py index 3a1d0e8af..8705fb2b1 100644 --- a/execute_test.py +++ b/execute_test.py @@ -4,6 +4,9 @@ import requests from PIL import Image +from manga_translator import Config + + async def execute_method(method_name, attributes): url = f"http://127.0.0.1:5003/execute/{method_name}" headers = {'Content-Type': 'application/octet-stream'} @@ -39,5 +42,5 @@ async def execute_method(method_name, attributes): if __name__ == '__main__': image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png") - attributes = {"image": image, "params": {"translator": "none", "inpainter": "none"}} + attributes = {"image": image, "config": Config()} asyncio.run(execute_method("translate", attributes)) \ No newline at end of file diff --git a/server/instance.py b/server/instance.py index 68260c962..da550613c 100644 --- a/server/instance.py +++ b/server/instance.py @@ -1,3 +1,4 @@ +import pickle from asyncio import Event, Lock from typing import List, Optional @@ -16,7 +17,7 @@ class ExecutorInstance(BaseModel): def free_executor(self): self.busy = False - async def sent(self, image: Image, config: Config, sender: NotifyType): + async def sent_stream(self, image: Image, config: Config, sender: NotifyType): await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender) class Executors: @@ -50,8 +51,11 @@ def free_executor(self, instance: ExecutorInstance): self.event.set() self.event.clear() -def example_notify(a: int, b: Optional[int] = None) -> None: - print(f"Notify called with a={a} and b={b}") +def example_notify(a: int, b) -> None: + if a == 0: + print(pickle.loads(b)) + else: + print(f"Notify called with a={a} and b={b}") async def main(): executor = ExecutorInstance(ip="127.0.0.1", port=5003) @@ -59,7 +63,7 @@ async def main(): image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png") config = Config() - await executor.sent(image, config, example_notify) + await executor.sent_stream(image, config, example_notify) if __name__ == "__main__": import asyncio diff --git a/server/sent_data.py b/server/sent_data.py index 619dca71b..02053bf80 100644 --- a/server/sent_data.py +++ b/server/sent_data.py @@ -1,3 +1,4 @@ +import asyncio import pickle from typing import Mapping, Optional, Callable @@ -27,10 +28,11 @@ async def process_stream(response, sender: NotifyType): async for chunk in response.content.iter_any(): if chunk: buffer += chunk - await handle_buffer(buffer, sender) + buffer = handle_buffer(buffer, sender) -async def handle_buffer(buffer, sender: NotifyType): + +def handle_buffer(buffer, sender: NotifyType): while len(buffer) >= 5: status, expected_size = extract_header(buffer) @@ -40,6 +42,7 @@ async def handle_buffer(buffer, sender: NotifyType): buffer = buffer[5 + expected_size:] else: break + return buffer def extract_header(buffer): From 30d9b3559d74a233f99f9e89d22f3fba522e9fa8 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 22:02:42 +0100 Subject: [PATCH 22/75] api logic --- requirements.txt | 3 +- server/args.py | 2 +- server/instance.py | 4 ++ server/main.py | 151 ++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 143 insertions(+), 17 deletions(-) diff --git a/requirements.txt b/requirements.txt index 25c2e953b..e843bf354 100644 --- a/requirements.txt +++ b/requirements.txt @@ -51,4 +51,5 @@ accelerate bitsandbytes uvicorn fastapi -pydantic \ No newline at end of file +pydantic +python-multipart \ No newline at end of file diff --git a/server/args.py b/server/args.py index ac3d92b33..a00e24a22 100644 --- a/server/args.py +++ b/server/args.py @@ -4,6 +4,6 @@ def parse_arguments(): parser = argparse.ArgumentParser(description="Specify host and port for the server.") parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)') - parser.add_argument('--port', type=int, default=8080, help='The port number (default: 8080)') + parser.add_argument('--port', type=int, default=8000, help='The port number (default: 8080)') return parser.parse_args() \ No newline at end of file diff --git a/server/instance.py b/server/instance.py index da550613c..0b426b606 100644 --- a/server/instance.py +++ b/server/instance.py @@ -7,6 +7,7 @@ from manga_translator import Config from server.sent_data import fetch_data_stream, NotifyType +from fastapi import Response class ExecutorInstance(BaseModel): @@ -17,6 +18,9 @@ class ExecutorInstance(BaseModel): def free_executor(self): self.busy = False + async def sent(self) -> Response: + pass + async def sent_stream(self, image: Image, config: Config, sender: NotifyType): await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender) diff --git a/server/main.py b/server/main.py index 8da8c2677..3c3507312 100644 --- a/server/main.py +++ b/server/main.py @@ -1,16 +1,34 @@ import asyncio -from typing import List, Dict, Optional, Callable +import builtins +import io +import re +from base64 import b64decode +from typing import Union +import requests +from PIL import Image from fastapi import FastAPI, Request, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from starlette.responses import StreamingResponse +from manga_translator import Config from server.instance import ExecutorInstance, Executors from server.myqueue import TaskQueue from server.sent_data import NotifyType app = FastAPI() + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) executor_instances: Executors = Executors() task_queue = TaskQueue() + @app.post("/register") async def register_instance(instance: ExecutorInstance, request: Request): instance.ip = request.client.host @@ -18,6 +36,44 @@ async def register_instance(instance: ExecutorInstance, request: Request): return {"code": 0} +async def to_pil_image(image: Union[str, bytes]) -> Image.Image: + try: + if isinstance(image, builtins.bytes): + image = Image.open(io.BytesIO(image)) + return image + else: + if re.match(r'^data:image/.+;base64,', image): + value = image.split(',', 1)[1] + image_data = b64decode(value) + image = Image.open(io.BytesIO(image_data)) + return image + else: + response = requests.get(image) + image = Image.open(io.BytesIO(response.content)) + return image + except Exception as e: + raise HTTPException(status_code=422, detail=str(e)) + + +async def multi_content_type(request: Request): + content_type = request.headers.get("content-type") + + if content_type and content_type.startswith("multipart/form-data"): + form = await request.form() + config = form.get("config", "{}") + image = form.get("image") + image_content = await image.read() + config = Config.parse_raw(config) + return config, image_content + elif content_type and content_type.startswith("application/json"): + body = await request.json() + config = Config(**body.get("config", {})) + image = body.get("image") + return config, image + + else: + raise HTTPException(status_code=400, detail="Unsupported Content-Type") + async def wait(task, notify: NotifyType): """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" @@ -30,7 +86,11 @@ async def wait(task, notify: NotifyType): task_queue.remove(task) if notify: notify(4, 0) - result = "" #todo: implement logic + if notify: + await instance.sent_stream(task.image, task.config, notify) + else: + result = await instance.sent(task.image, task.config) + instance.busy = False instance.event.set() @@ -42,32 +102,93 @@ async def wait(task, notify: NotifyType): if queue_pos == 0: raise HTTPException(500, detail="No translator registered") await task_queue.wait_for_event() + + +async def stream(messages): + while True: + message = await messages.get() + yield message + if message[0] == 0 or message[0] == 2: + break + +def notify(code, data, transform_to_bytes, messages): + if code == 0: + result_bytes = transform_to_bytes(data) + encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes + messages.put_nowait(encoded_result) + else: + result_bytes = str(data).encode("utf-8") + encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes + messages.put_nowait(encoded_result) + +def transform_to_image(data): + return b"" + @app.post("/json") -async def json(req: TranslateRequest): - pass +async def json(req: Request): + data, img = await multi_content_type(req) + img = await to_pil_image(img) + data = await wait((data, img), None) @app.post("/bytes") -async def bytes(req: TranslateRequest): - pass +async def bytes(req: Request): + data, img = await multi_content_type(req) + img = await to_pil_image(img) + data = await wait((data, img), None) + @app.post("/image") -async def image(req: TranslateRequest): - pass +async def image(req: Request): + data, img = await multi_content_type(req) + img = await to_pil_image(img) + data = await wait((data, img), None) + @app.post("/stream_json") -async def image(req: TranslateRequest): - pass +async def stream_json(req: Request): + data, img = await multi_content_type(req) + img = await to_pil_image(img) + + messages = asyncio.Queue() + + def example_notify(code: int, data) -> None: + notify(code, data, transform_to_image, messages) + + streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") + asyncio.create_task(wait((data, img), example_notify)) + return streaming_response @app.post("/stream_bytes") -async def image(req: TranslateRequest): - pass +async def stream_bytes(req: Request): + data, img = await multi_content_type(req) + img = await to_pil_image(img) + + messages = asyncio.Queue() + + def example_notify(code: int, data) -> None: + notify(code, data, transform_to_image, messages) + + streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") + asyncio.create_task(wait((data, img), example_notify)) + return streaming_response @app.post("/stream_image") -async def image(req: TranslateRequest): - pass +async def stream_image(req: Request): + data, img = await multi_content_type(req) + img = await to_pil_image(img) + + messages = asyncio.Queue() + + def example_notify(code: int, data) -> None: + notify(code, data, transform_to_image, messages) + + streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") + asyncio.create_task(wait((data, img), example_notify)) + return streaming_response if __name__ == '__main__': import uvicorn from args import parse_arguments + args = parse_arguments() - uvicorn.run(app, host=args.host, port=args.port) \ No newline at end of file + uvicorn.run(app, host=args.host, port=args.port) From b66237ef6892458bea88a85f5c47f943e0991bfa Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 22:52:22 +0100 Subject: [PATCH 23/75] replace api --- manga_translator/__main__.py | 4 - manga_translator/mode/api.py | 292 ----------------------------------- server/instance.py | 9 +- server/main.py | 36 ++++- server/sent_data.py | 10 ++ server/to_json.py | 66 ++++++++ 6 files changed, 108 insertions(+), 309 deletions(-) delete mode 100644 manga_translator/mode/api.py create mode 100644 server/to_json.py diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py index db711b2c8..eb348798f 100644 --- a/manga_translator/__main__.py +++ b/manga_translator/__main__.py @@ -79,10 +79,6 @@ async def dispatch(args: Namespace): translator = MangaTranslatorWS(args_dict) await translator.listen(args_dict) - elif args.mode == 'api': - from manga_translator.mode.api import MangaTranslatorAPI - translator = MangaTranslatorAPI(args_dict) - await translator.listen(args_dict) elif args.mode == 'shared': translator = MangaShare(args_dict) await translator.listen(args_dict) diff --git a/manga_translator/mode/api.py b/manga_translator/mode/api.py deleted file mode 100644 index fc71d32e3..000000000 --- a/manga_translator/mode/api.py +++ /dev/null @@ -1,292 +0,0 @@ -# Experimental. May be replaced by a refactored server/web_main.py in the future. -import asyncio -import base64 -import io - -import cv2 -import numpy as np -from PIL import Image -from aiohttp import web -from aiohttp.web_middlewares import middleware -from marshmallow import fields, Schema, ValidationError - -from manga_translator import MangaTranslator, Context, TranslationInterrupt, logger -from manga_translator.args import translator_chain -from manga_translator.detection import DETECTORS -from manga_translator.inpainting import INPAINTERS -from manga_translator.manga_translator import _preprocess_params -from manga_translator.ocr import OCRS -from manga_translator.translators import VALID_LANGUAGES, TRANSLATORS -from manga_translator.upscaling import UPSCALERS - - -class MangaTranslatorAPI(MangaTranslator): - def __init__(self, params: dict = None): - import nest_asyncio - nest_asyncio.apply() - super().__init__(params) - self.host = params.get('host', '127.0.0.1') - self.port = params.get('port', '5003') - self.log_web = params.get('log_web', False) - self.ignore_errors = params.get('ignore_errors', True) - self._task_id = None - self._params = None - self.params = params - self.queue = [] - - async def wait_queue(self, id: int): - while self.queue[0] != id: - await asyncio.sleep(0.05) - - def remove_from_queue(self, id: int): - self.queue.remove(id) - - def generate_id(self): - try: - x = max(self.queue) - except: - x = 0 - return x + 1 - - def middleware_factory(self): - @middleware - async def sample_middleware(request, handler): - id = self.generate_id() - self.queue.append(id) - try: - await self.wait_queue(id) - except Exception as e: - print(e) - try: - # todo make cancellable - response = await handler(request) - except: - response = web.json_response({'error': "Internal Server Error", 'status': 500}, - status=500) - # Handle cases where a user leaves the queue, request fails, or is completed - try: - self.remove_from_queue(id) - except Exception as e: - print(e) - return response - - return sample_middleware - - async def get_file(self, image, base64Images, url) -> Image: - if image is not None: - content = image.file.read() - elif base64Images is not None: - base64Images = base64Images - if base64Images.__contains__('base64,'): - base64Images = base64Images.split('base64,')[1] - content = base64.b64decode(base64Images) - elif url is not None: - from aiohttp import ClientSession - async with ClientSession() as session: - async with session.get(url) as resp: - if resp.status == 200: - content = await resp.read() - else: - return web.json_response({'status': 'error'}) - else: - raise ValidationError("donest exist") - img = Image.open(io.BytesIO(content)) - - img.verify() - img = Image.open(io.BytesIO(content)) - if img.width * img.height > 8000 ** 2: - raise ValidationError("to large") - return img - - async def listen(self, translation_params: dict = None): - self.params = translation_params - app = web.Application(client_max_size=1024 * 1024 * 50, middlewares=[self.middleware_factory()]) - - routes = web.RouteTableDef() - run_until_state = '' - - async def hook(state, finished): - if run_until_state and run_until_state == state and not finished: - raise TranslationInterrupt() - - self.add_progress_hook(hook) - - @routes.post("/get_text") - async def text_api(req): - nonlocal run_until_state - run_until_state = 'translating' - return await self.err_handling(self.run_translate, req, self.format_translate) - - @routes.post("/translate") - async def translate_api(req): - nonlocal run_until_state - run_until_state = 'after-translating' - return await self.err_handling(self.run_translate, req, self.format_translate) - - @routes.post("/inpaint_translate") - async def inpaint_translate_api(req): - nonlocal run_until_state - run_until_state = 'rendering' - return await self.err_handling(self.run_translate, req, self.format_translate) - - @routes.post("/colorize_translate") - async def colorize_translate_api(req): - nonlocal run_until_state - run_until_state = 'rendering' - return await self.err_handling(self.run_translate, req, self.format_translate, True) - - # #@routes.post("/file") - # async def file_api(req): - # #TODO: return file - # return await self.err_handling(self.file_exec, req, None) - - app.add_routes(routes) - web.run_app(app, host=self.host, port=self.port) - - async def run_translate(self, translation_params, img): - return await self.translate(img, translation_params) - - async def err_handling(self, func, req, format, ri=False): - try: - if req.content_type == 'application/json' or req.content_type == 'multipart/form-data': - if req.content_type == 'application/json': - d = await req.json() - else: - d = await req.post() - schema = self.PostSchema() - data = schema.load(d) - if 'translator_chain' in data: - data['translator_chain'] = translator_chain(data['translator_chain']) - if 'selective_translation' in data: - data['selective_translation'] = translator_chain(data['selective_translation']) - ctx = Context(**dict(self.params, **data)) - _preprocess_params(ctx) - if data.get('image') is None and data.get('base64Images') is None and data.get('url') is None: - return web.json_response({'error': "Missing input", 'status': 422}) - fil = await self.get_file(data.get('image'), data.get('base64Images'), data.get('url')) - if 'image' in data: - del data['image'] - if 'base64Images' in data: - del data['base64Images'] - if 'url' in data: - del data['url'] - attempts = 0 - while ctx.attempts == -1 or attempts <= ctx.attempts: - if attempts > 0: - logger.info(f'Retrying translation! Attempt {attempts}' + ( - f' of {ctx.attempts}' if ctx.attempts != -1 else '')) - try: - await func(ctx, fil) - break - except TranslationInterrupt: - break - except Exception as e: - print(e) - attempts += 1 - if ctx.attempts != -1 and attempts > ctx.attempts: - return web.json_response({'error': "Internal Server Error", 'status': 500}, - status=500) - try: - return format(ctx, ri) - except Exception as e: - print(e) - return web.json_response({'error': "Failed to format", 'status': 500}, - status=500) - else: - return web.json_response({'error': "Wrong content type: " + req.content_type, 'status': 415}, - status=415) - except ValueError as e: - print(e) - return web.json_response({'error': "Wrong input type", 'status': 422}, status=422) - - except ValidationError as e: - print(e) - return web.json_response({'error': "Input invalid", 'status': 422}, status=422) - - def format_translate(self, ctx: Context, return_image: bool): - text_regions = ctx.text_regions - inpaint = ctx.img_inpainted - results = [] - if 'overlay_ext' in ctx: - overlay_ext = ctx['overlay_ext'] - else: - overlay_ext = 'jpg' - for i, blk in enumerate(text_regions): - minX, minY, maxX, maxY = blk.xyxy - if 'translations' in ctx: - trans = {key: value[i] for key, value in ctx['translations'].items()} - else: - trans = {} - trans["originalText"] = text_regions[i].text - if inpaint is not None: - overlay = inpaint[minY:maxY, minX:maxX] - - retval, buffer = cv2.imencode('.' + overlay_ext, overlay) - jpg_as_text = base64.b64encode(buffer) - background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8") - else: - background = None - text_region = text_regions[i] - text_region.adjust_bg_color = False - color1, color2 = text_region.get_font_colors() - - results.append({ - 'text': trans, - 'minX': int(minX), - 'minY': int(minY), - 'maxX': int(maxX), - 'maxY': int(maxY), - 'textColor': { - 'fg': color1.tolist(), - 'bg': color2.tolist() - }, - 'language': text_regions[i].source_lang, - 'background': background - }) - if return_image and ctx.img_colorized is not None: - retval, buffer = cv2.imencode('.' + overlay_ext, np.array(ctx.img_colorized)) - jpg_as_text = base64.b64encode(buffer) - img = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8") - else: - img = None - return web.json_response({'details': results, 'img': img}) - - class PostSchema(Schema): - target_lang = fields.Str(required=False, validate=lambda a: a.upper() in VALID_LANGUAGES) - detector = fields.Str(required=False, validate=lambda a: a.lower() in DETECTORS) - ocr = fields.Str(required=False, validate=lambda a: a.lower() in OCRS) - inpainter = fields.Str(required=False, validate=lambda a: a.lower() in INPAINTERS) - upscaler = fields.Str(required=False, validate=lambda a: a.lower() in UPSCALERS) - translator = fields.Str(required=False, validate=lambda a: a.lower() in TRANSLATORS) - direction = fields.Str(required=False, validate=lambda a: a.lower() in {'auto', 'h', 'v'}) - skip_language = fields.Str(required=False) - upscale_ratio = fields.Integer(required=False) - translator_chain = fields.Str(required=False) - selective_translation = fields.Str(required=False) - attempts = fields.Integer(required=False) - detection_size = fields.Integer(required=False) - text_threshold = fields.Float(required=False) - box_threshold = fields.Float(required=False) - unclip_ratio = fields.Float(required=False) - inpainting_size = fields.Integer(required=False) - det_rotate = fields.Bool(required=False) - det_auto_rotate = fields.Bool(required=False) - det_invert = fields.Bool(required=False) - det_gamma_correct = fields.Bool(required=False) - min_text_length = fields.Integer(required=False) - colorization_size = fields.Integer(required=False) - denoise_sigma = fields.Integer(required=False) - mask_dilation_offset = fields.Integer(required=False) - ignore_bubble = fields.Integer(required=False) - gpt_config = fields.String(required=False) - filter_text = fields.String(required=False) - - # api specific - overlay_ext = fields.Str(required=False) - base64Images = fields.Raw(required=False) - image = fields.Raw(required=False) - url = fields.Raw(required=False) - - # no functionality except preventing errors when given - fingerprint = fields.Raw(required=False) - clientUuid = fields.Raw(required=False) \ No newline at end of file diff --git a/server/instance.py b/server/instance.py index 0b426b606..c27a0c415 100644 --- a/server/instance.py +++ b/server/instance.py @@ -6,8 +6,7 @@ from pydantic import BaseModel from manga_translator import Config -from server.sent_data import fetch_data_stream, NotifyType -from fastapi import Response +from server.sent_data import fetch_data_stream, NotifyType, fetch_data class ExecutorInstance(BaseModel): @@ -18,15 +17,15 @@ class ExecutorInstance(BaseModel): def free_executor(self): self.busy = False - async def sent(self) -> Response: - pass + async def sent(self, image: Image, config: Config): + return await fetch_data("http://"+self.ip+":"+str(self.port)+"/simple_execute/translate", image, config) async def sent_stream(self, image: Image, config: Config, sender: NotifyType): await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender) class Executors: def __init__(self): - self.list: List[ExecutorInstance] = [] + self.list: List[ExecutorInstance] = [ExecutorInstance(ip="127.0.0.1", port=5003)] self.lock: Lock = Lock() self.event = Event() diff --git a/server/main.py b/server/main.py index 3c3507312..cda01c0a9 100644 --- a/server/main.py +++ b/server/main.py @@ -9,12 +9,13 @@ from PIL import Image from fastapi import FastAPI, Request, HTTPException from fastapi.middleware.cors import CORSMiddleware -from starlette.responses import StreamingResponse +from starlette.responses import StreamingResponse, JSONResponse -from manga_translator import Config +from manga_translator import Config, Context from server.instance import ExecutorInstance, Executors from server.myqueue import TaskQueue from server.sent_data import NotifyType +from server.to_json import to_json app = FastAPI() @@ -91,8 +92,7 @@ async def wait(task, notify: NotifyType): else: result = await instance.sent(task.image, task.config) - instance.busy = False - instance.event.set() + executor_instances.free_executor(instance) if notify: return @@ -127,22 +127,42 @@ def transform_to_image(data): @app.post("/json") async def json(req: Request): data, img = await multi_content_type(req) - img = await to_pil_image(img) - data = await wait((data, img), None) + ctx = Context() + + ctx.image = await to_pil_image(img) + ctx.config = data + task_queue.add_task(ctx) + + data = await wait(ctx, None) + json = to_json(data) + return JSONResponse(content=json) @app.post("/bytes") async def bytes(req: Request): data, img = await multi_content_type(req) - img = await to_pil_image(img) + ctx = Context() + + ctx.image = await to_pil_image(img) + ctx.config = data + task_queue.add_task(ctx) data = await wait((data, img), None) @app.post("/image") async def image(req: Request): data, img = await multi_content_type(req) - img = await to_pil_image(img) + ctx = Context() + + ctx.image = await to_pil_image(img) + ctx.config = data + task_queue.add_task(ctx) + data = await wait((data, img), None) + img_byte_arr = io.BytesIO() + data.result.save(img_byte_arr, format="PNG") + img_byte_arr.seek(0) + return StreamingResponse(img_byte_arr, media_type="image/png") @app.post("/stream_json") async def stream_json(req: Request): diff --git a/server/sent_data.py b/server/sent_data.py index 02053bf80..2945cc9f7 100644 --- a/server/sent_data.py +++ b/server/sent_data.py @@ -21,6 +21,16 @@ async def fetch_data_stream(url, image: Image, config: Config, sender: NotifyTyp else: raise HTTPException(response.status, detail=response.text()) +async def fetch_data(url, image: Image, config: Config, headers: Mapping[str, str] = {}): + attributes = {"image": image, "config": config} + data = pickle.dumps(attributes) + + async with aiohttp.ClientSession() as session: + async with session.post(url, data=data, headers=headers) as response: + if response.status == 200: + return pickle.loads(await response.read()) + else: + raise HTTPException(response.status, detail=response.text()) async def process_stream(response, sender: NotifyType): buffer = b'' diff --git a/server/to_json.py b/server/to_json.py new file mode 100644 index 000000000..356e74104 --- /dev/null +++ b/server/to_json.py @@ -0,0 +1,66 @@ +import base64 +from typing import Dict, List + +import cv2 + +from manga_translator import Context +from manga_translator.utils import TextBlock + + +#input:PIL, +#result:PIL +#img_colorized: PIL +#upscaled:PIL +#img_rgb:array +#img_alpha:None +#textlines:list[Quadrilateral] +#text_regions:list[TextBlock] +#translations: map[str, arr[str]] +#img_inpainted: array +#gimp_mask:array +#img_rendered: array +#mask_raw: array +#mask:array +def to_json(ctx: Context): + text_regions:list[TextBlock] = ctx.text_regions + inpaint = ctx.img_inpainted + translations:Dict[str, List[str]] = ctx.translations + results = [] + if 'overlay_ext' in ctx: + #todo: unreachable + overlay_ext = ctx['overlay_ext'] + else: + overlay_ext = 'jpg' + for i, blk in enumerate(text_regions): + minX, minY, maxX, maxY = blk.xyxy + if 'translations' in ctx: + trans = {key: value[i] for key, value in translations.items()} + else: + trans = {} + trans["originalText"] = text_regions[i].text + if inpaint is not None: + overlay = inpaint[minY:maxY, minX:maxX] + + retval, buffer = cv2.imencode('.' + overlay_ext, overlay) + jpg_as_text = base64.b64encode(buffer) + background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8") + else: + background = None + text_region = text_regions[i] + text_region.adjust_bg_color = False + color1, color2 = text_region.get_font_colors() + + results.append({ + 'text': trans, + 'minX': int(minX), + 'minY': int(minY), + 'maxX': int(maxX), + 'maxY': int(maxY), + 'textColor': { + 'fg': color1.tolist(), + 'bg': color2.tolist() + }, + 'language': text_regions[i].source_lang, + 'background': background + }) + return results \ No newline at end of file From 897070cd691ee2e906faae87f4b528fa2620fe6e Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 23:03:20 +0100 Subject: [PATCH 24/75] server refactor --- server/instance.py | 19 +-- server/main.py | 140 ++---------------- server/myqueue.py | 35 ++++- server/request_extraction.py | 61 ++++++++ .../{sent_data.py => sent_data_internal.py} | 0 server/streaming.py | 16 ++ 6 files changed, 126 insertions(+), 145 deletions(-) create mode 100644 server/request_extraction.py rename server/{sent_data.py => sent_data_internal.py} (100%) create mode 100644 server/streaming.py diff --git a/server/instance.py b/server/instance.py index c27a0c415..7c02e695f 100644 --- a/server/instance.py +++ b/server/instance.py @@ -6,7 +6,7 @@ from pydantic import BaseModel from manga_translator import Config -from server.sent_data import fetch_data_stream, NotifyType, fetch_data +from server.sent_data_internal import fetch_data_stream, NotifyType, fetch_data class ExecutorInstance(BaseModel): @@ -54,20 +54,5 @@ def free_executor(self, instance: ExecutorInstance): self.event.set() self.event.clear() -def example_notify(a: int, b) -> None: - if a == 0: - print(pickle.loads(b)) - else: - print(f"Notify called with a={a} and b={b}") -async def main(): - executor = ExecutorInstance(ip="127.0.0.1", port=5003) - - image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png") - config = Config() - - await executor.sent_stream(image, config, example_notify) - -if __name__ == "__main__": - import asyncio - asyncio.run(main()) \ No newline at end of file +executor_instances: Executors = Executors() diff --git a/server/main.py b/server/main.py index cda01c0a9..257be7cf5 100644 --- a/server/main.py +++ b/server/main.py @@ -1,20 +1,14 @@ import asyncio -import builtins import io -import re -from base64 import b64decode -from typing import Union -import requests -from PIL import Image -from fastapi import FastAPI, Request, HTTPException +from fastapi import FastAPI, Request from fastapi.middleware.cors import CORSMiddleware from starlette.responses import StreamingResponse, JSONResponse -from manga_translator import Config, Context -from server.instance import ExecutorInstance, Executors -from server.myqueue import TaskQueue -from server.sent_data import NotifyType +from server.instance import ExecutorInstance, executor_instances +from server.myqueue import wait_in_queue +from server.request_extraction import multi_content_type, to_pil_image, get_ctx +from server.streaming import notify, stream from server.to_json import to_json app = FastAPI() @@ -26,9 +20,6 @@ allow_methods=["*"], allow_headers=["*"], ) -executor_instances: Executors = Executors() -task_queue = TaskQueue() - @app.post("/register") async def register_instance(instance: ExecutorInstance, request: Request): @@ -36,130 +27,25 @@ async def register_instance(instance: ExecutorInstance, request: Request): executor_instances.register(instance) return {"code": 0} - -async def to_pil_image(image: Union[str, bytes]) -> Image.Image: - try: - if isinstance(image, builtins.bytes): - image = Image.open(io.BytesIO(image)) - return image - else: - if re.match(r'^data:image/.+;base64,', image): - value = image.split(',', 1)[1] - image_data = b64decode(value) - image = Image.open(io.BytesIO(image_data)) - return image - else: - response = requests.get(image) - image = Image.open(io.BytesIO(response.content)) - return image - except Exception as e: - raise HTTPException(status_code=422, detail=str(e)) - - -async def multi_content_type(request: Request): - content_type = request.headers.get("content-type") - - if content_type and content_type.startswith("multipart/form-data"): - form = await request.form() - config = form.get("config", "{}") - image = form.get("image") - image_content = await image.read() - config = Config.parse_raw(config) - return config, image_content - elif content_type and content_type.startswith("application/json"): - body = await request.json() - config = Config(**body.get("config", {})) - image = body.get("image") - return config, image - - else: - raise HTTPException(status_code=400, detail="Unsupported Content-Type") - - -async def wait(task, notify: NotifyType): - """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" - while True: - queue_pos = task_queue.get_pos(task) - if notify: - notify(3, queue_pos) - if queue_pos < executor_instances.free_executors(): - instance = await executor_instances.find_executor() - task_queue.remove(task) - if notify: - notify(4, 0) - if notify: - await instance.sent_stream(task.image, task.config, notify) - else: - result = await instance.sent(task.image, task.config) - - executor_instances.free_executor(instance) - - if notify: - return - else: - return result - else: - if queue_pos == 0: - raise HTTPException(500, detail="No translator registered") - await task_queue.wait_for_event() - - -async def stream(messages): - while True: - message = await messages.get() - yield message - if message[0] == 0 or message[0] == 2: - break - -def notify(code, data, transform_to_bytes, messages): - if code == 0: - result_bytes = transform_to_bytes(data) - encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes - messages.put_nowait(encoded_result) - else: - result_bytes = str(data).encode("utf-8") - encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes - messages.put_nowait(encoded_result) - def transform_to_image(data): return b"" @app.post("/json") async def json(req: Request): - data, img = await multi_content_type(req) - ctx = Context() - - ctx.image = await to_pil_image(img) - ctx.config = data - task_queue.add_task(ctx) - - data = await wait(ctx, None) - json = to_json(data) + ctx = await get_ctx(req) + json = to_json(ctx) return JSONResponse(content=json) @app.post("/bytes") async def bytes(req: Request): - data, img = await multi_content_type(req) - ctx = Context() - - ctx.image = await to_pil_image(img) - ctx.config = data - task_queue.add_task(ctx) - data = await wait((data, img), None) + ctx = await get_ctx(req) @app.post("/image") async def image(req: Request): - data, img = await multi_content_type(req) - ctx = Context() - - ctx.image = await to_pil_image(img) - ctx.config = data - task_queue.add_task(ctx) - - data = await wait((data, img), None) + ctx = await get_ctx(req) img_byte_arr = io.BytesIO() - data.result.save(img_byte_arr, format="PNG") + ctx.result.save(img_byte_arr, format="PNG") img_byte_arr.seek(0) return StreamingResponse(img_byte_arr, media_type="image/png") @@ -175,7 +61,7 @@ def example_notify(code: int, data) -> None: notify(code, data, transform_to_image, messages) streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait((data, img), example_notify)) + asyncio.create_task(wait_in_queue((data, img), example_notify)) return streaming_response @app.post("/stream_bytes") @@ -189,7 +75,7 @@ def example_notify(code: int, data) -> None: notify(code, data, transform_to_image, messages) streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait((data, img), example_notify)) + asyncio.create_task(wait_in_queue((data, img), example_notify)) return streaming_response @app.post("/stream_image") @@ -203,7 +89,7 @@ def example_notify(code: int, data) -> None: notify(code, data, transform_to_image, messages) streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait((data, img), example_notify)) + asyncio.create_task(wait_in_queue((data, img), example_notify)) return streaming_response if __name__ == '__main__': diff --git a/server/myqueue.py b/server/myqueue.py index 61c1abac3..cb11db50d 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -1,6 +1,10 @@ import asyncio from typing import List, Dict +from fastapi import HTTPException + +from server.instance import executor_instances +from server.sent_data_internal import NotifyType class TaskQueue: def __init__(self): @@ -22,4 +26,33 @@ def remove(self, task): self.update_event() async def wait_for_event(self): - await self.queue_event.wait() \ No newline at end of file + await self.queue_event.wait() + +task_queue = TaskQueue() + +async def wait_in_queue(task, notify: NotifyType): + """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" + while True: + queue_pos = task_queue.get_pos(task) + if notify: + notify(3, queue_pos) + if queue_pos < executor_instances.free_executors(): + instance = await executor_instances.find_executor() + task_queue.remove(task) + if notify: + notify(4, 0) + if notify: + await instance.sent_stream(task.image, task.config, notify) + else: + result = await instance.sent(task.image, task.config) + + executor_instances.free_executor(instance) + + if notify: + return + else: + return result + else: + if queue_pos == 0: + raise HTTPException(500, detail="No translator registered") + await task_queue.wait_for_event() \ No newline at end of file diff --git a/server/request_extraction.py b/server/request_extraction.py new file mode 100644 index 000000000..559127760 --- /dev/null +++ b/server/request_extraction.py @@ -0,0 +1,61 @@ +import builtins +import io +import re +from base64 import b64decode +from typing import Union + +import requests +from PIL import Image +from fastapi import Request, HTTPException + +from manga_translator import Config, Context +from server.myqueue import task_queue, wait_in_queue + + +async def to_pil_image(image: Union[str, bytes]) -> Image.Image: + try: + if isinstance(image, builtins.bytes): + image = Image.open(io.BytesIO(image)) + return image + else: + if re.match(r'^data:image/.+;base64,', image): + value = image.split(',', 1)[1] + image_data = b64decode(value) + image = Image.open(io.BytesIO(image_data)) + return image + else: + response = requests.get(image) + image = Image.open(io.BytesIO(response.content)) + return image + except Exception as e: + raise HTTPException(status_code=422, detail=str(e)) + + +async def multi_content_type(request: Request): + content_type = request.headers.get("content-type") + + if content_type and content_type.startswith("multipart/form-data"): + form = await request.form() + config = form.get("config", "{}") + image = form.get("image") + image_content = await image.read() + config = Config.parse_raw(config) + return config, image_content + elif content_type and content_type.startswith("application/json"): + body = await request.json() + config = Config(**body.get("config", {})) + image = body.get("image") + return config, image + + else: + raise HTTPException(status_code=400, detail="Unsupported Content-Type") + +async def get_ctx(req: Request): + data, img = await multi_content_type(req) + ctx = Context() + + ctx.image = await to_pil_image(img) + ctx.config = data + task_queue.add_task(ctx) + + data = await wait_in_queue(ctx, None) \ No newline at end of file diff --git a/server/sent_data.py b/server/sent_data_internal.py similarity index 100% rename from server/sent_data.py rename to server/sent_data_internal.py diff --git a/server/streaming.py b/server/streaming.py new file mode 100644 index 000000000..809d58e05 --- /dev/null +++ b/server/streaming.py @@ -0,0 +1,16 @@ +async def stream(messages): + while True: + message = await messages.get() + yield message + if message[0] == 0 or message[0] == 2: + break + +def notify(code, data, transform_to_bytes, messages): + if code == 0: + result_bytes = transform_to_bytes(data) + encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes + messages.put_nowait(encoded_result) + else: + result_bytes = str(data).encode("utf-8") + encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes + messages.put_nowait(encoded_result) \ No newline at end of file From d0e48b9f086371f8a9be2d48a88aab65a88b4873 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 23:11:07 +0100 Subject: [PATCH 25/75] stream update --- server/main.py | 48 ++++++------------------------------ server/request_extraction.py | 22 ++++++++++++++++- server/streaming.py | 4 ++- 3 files changed, 32 insertions(+), 42 deletions(-) diff --git a/server/main.py b/server/main.py index 257be7cf5..665b61af6 100644 --- a/server/main.py +++ b/server/main.py @@ -1,4 +1,3 @@ -import asyncio import io from fastapi import FastAPI, Request @@ -6,9 +5,7 @@ from starlette.responses import StreamingResponse, JSONResponse from server.instance import ExecutorInstance, executor_instances -from server.myqueue import wait_in_queue -from server.request_extraction import multi_content_type, to_pil_image, get_ctx -from server.streaming import notify, stream +from server.request_extraction import get_ctx, while_streaming from server.to_json import to_json app = FastAPI() @@ -27,8 +24,10 @@ async def register_instance(instance: ExecutorInstance, request: Request): executor_instances.register(instance) return {"code": 0} -def transform_to_image(data): - return b"" +def transform_to_image(ctx): + img_byte_arr = io.BytesIO() + ctx.result.save(img_byte_arr, format="PNG") + return img_byte_arr.getvalue() @app.post("/json") async def json(req: Request): @@ -40,7 +39,6 @@ async def json(req: Request): async def bytes(req: Request): ctx = await get_ctx(req) - @app.post("/image") async def image(req: Request): ctx = await get_ctx(req) @@ -52,45 +50,15 @@ async def image(req: Request): @app.post("/stream_json") async def stream_json(req: Request): - data, img = await multi_content_type(req) - img = await to_pil_image(img) - - messages = asyncio.Queue() - - def example_notify(code: int, data) -> None: - notify(code, data, transform_to_image, messages) - - streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait_in_queue((data, img), example_notify)) - return streaming_response + return while_streaming(req, transform_to_image) @app.post("/stream_bytes") async def stream_bytes(req: Request): - data, img = await multi_content_type(req) - img = await to_pil_image(img) - - messages = asyncio.Queue() - - def example_notify(code: int, data) -> None: - notify(code, data, transform_to_image, messages) - - streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait_in_queue((data, img), example_notify)) - return streaming_response + return while_streaming(req, transform_to_image) @app.post("/stream_image") async def stream_image(req: Request): - data, img = await multi_content_type(req) - img = await to_pil_image(img) - - messages = asyncio.Queue() - - def example_notify(code: int, data) -> None: - notify(code, data, transform_to_image, messages) - - streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait_in_queue((data, img), example_notify)) - return streaming_response + return while_streaming(req, transform_to_image) if __name__ == '__main__': import uvicorn diff --git a/server/request_extraction.py b/server/request_extraction.py index 559127760..62b2f9fd8 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -1,3 +1,4 @@ +import asyncio import builtins import io import re @@ -7,9 +8,11 @@ import requests from PIL import Image from fastapi import Request, HTTPException +from starlette.responses import StreamingResponse from manga_translator import Config, Context from server.myqueue import task_queue, wait_in_queue +from server.streaming import notify, stream async def to_pil_image(image: Union[str, bytes]) -> Image.Image: @@ -58,4 +61,21 @@ async def get_ctx(req: Request): ctx.config = data task_queue.add_task(ctx) - data = await wait_in_queue(ctx, None) \ No newline at end of file + data = await wait_in_queue(ctx, None) + +async def while_streaming(req: Request, transform): + data, img = await multi_content_type(req) + ctx = Context() + + ctx.image = await to_pil_image(img) + ctx.config = data + task_queue.add_task(ctx) + + messages = asyncio.Queue() + + def notify_internal(code: int, data) -> None: + notify(code, data, transform, messages) + + streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") + asyncio.create_task(wait_in_queue((data, img), notify_internal)) + return streaming_response \ No newline at end of file diff --git a/server/streaming.py b/server/streaming.py index 809d58e05..907fed3d7 100644 --- a/server/streaming.py +++ b/server/streaming.py @@ -1,3 +1,5 @@ +import pickle + async def stream(messages): while True: message = await messages.get() @@ -7,7 +9,7 @@ async def stream(messages): def notify(code, data, transform_to_bytes, messages): if code == 0: - result_bytes = transform_to_bytes(data) + result_bytes = transform_to_bytes(pickle.loads(data)) encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes messages.put_nowait(encoded_result) else: From 3be082d89ee1cdd8ba07459856b504577695cf1a Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Wed, 20 Nov 2024 17:47:52 +0100 Subject: [PATCH 26/75] stream_image,stream_json now working json&image most likely too --- api-test.html | 153 +++++++++++++++++++++++++++++++++++ server/main.py | 9 ++- server/myqueue.py | 4 +- server/request_extraction.py | 4 +- server/streaming.py | 2 +- 5 files changed, 164 insertions(+), 8 deletions(-) create mode 100644 api-test.html diff --git a/api-test.html b/api-test.html new file mode 100644 index 000000000..f07b865fb --- /dev/null +++ b/api-test.html @@ -0,0 +1,153 @@ + + + + + + Image Upload and Translation + + + +

Upload Image and Translate

+ + +

+

+ + + + diff --git a/server/main.py b/server/main.py index 665b61af6..7a8cf48f2 100644 --- a/server/main.py +++ b/server/main.py @@ -29,6 +29,9 @@ def transform_to_image(ctx): ctx.result.save(img_byte_arr, format="PNG") return img_byte_arr.getvalue() +def transform_to_json(ctx): + return str(to_json(ctx)).encode("utf-8") + @app.post("/json") async def json(req: Request): ctx = await get_ctx(req) @@ -50,15 +53,15 @@ async def image(req: Request): @app.post("/stream_json") async def stream_json(req: Request): - return while_streaming(req, transform_to_image) + return await while_streaming(req, transform_to_json) @app.post("/stream_bytes") async def stream_bytes(req: Request): - return while_streaming(req, transform_to_image) + return await while_streaming(req, transform_to_image) @app.post("/stream_image") async def stream_image(req: Request): - return while_streaming(req, transform_to_image) + return await while_streaming(req, transform_to_image) if __name__ == '__main__': import uvicorn diff --git a/server/myqueue.py b/server/myqueue.py index cb11db50d..ca21698ed 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -35,12 +35,12 @@ async def wait_in_queue(task, notify: NotifyType): while True: queue_pos = task_queue.get_pos(task) if notify: - notify(3, queue_pos) + notify(3, str(queue_pos)) if queue_pos < executor_instances.free_executors(): instance = await executor_instances.find_executor() task_queue.remove(task) if notify: - notify(4, 0) + notify(4, "0") if notify: await instance.sent_stream(task.image, task.config, notify) else: diff --git a/server/request_extraction.py b/server/request_extraction.py index 62b2f9fd8..9290ecc07 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -61,7 +61,7 @@ async def get_ctx(req: Request): ctx.config = data task_queue.add_task(ctx) - data = await wait_in_queue(ctx, None) + return await wait_in_queue(ctx, None) async def while_streaming(req: Request, transform): data, img = await multi_content_type(req) @@ -77,5 +77,5 @@ def notify_internal(code: int, data) -> None: notify(code, data, transform, messages) streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait_in_queue((data, img), notify_internal)) + asyncio.create_task(wait_in_queue(ctx, notify_internal)) return streaming_response \ No newline at end of file diff --git a/server/streaming.py b/server/streaming.py index 907fed3d7..c65a897d2 100644 --- a/server/streaming.py +++ b/server/streaming.py @@ -10,7 +10,7 @@ async def stream(messages): def notify(code, data, transform_to_bytes, messages): if code == 0: result_bytes = transform_to_bytes(pickle.loads(data)) - encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes + encoded_result = b'\x00' + len(result_bytes).to_bytes(4, 'big') + result_bytes messages.put_nowait(encoded_result) else: result_bytes = str(data).encode("utf-8") From 8aa1f2b6931a64e87ad1ed949b848d09cd952693 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Wed, 20 Nov 2024 18:00:15 +0100 Subject: [PATCH 27/75] remove old tasks on interrupt connection --- server/myqueue.py | 15 +++++++++++++++ server/request_extraction.py | 2 ++ 2 files changed, 17 insertions(+) diff --git a/server/myqueue.py b/server/myqueue.py index ca21698ed..9ae865063 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -2,6 +2,7 @@ from typing import List, Dict from fastapi import HTTPException +from starlette.requests import Request from server.instance import executor_instances from server.sent_data_internal import NotifyType @@ -30,6 +31,11 @@ async def wait_for_event(self): task_queue = TaskQueue() +async def is_client_disconnected(request: Request) -> bool: + if await request.is_disconnected(): + return True + return False + async def wait_in_queue(task, notify: NotifyType): """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" while True: @@ -37,6 +43,10 @@ async def wait_in_queue(task, notify: NotifyType): if notify: notify(3, str(queue_pos)) if queue_pos < executor_instances.free_executors(): + if is_client_disconnected(task.req): + task_queue.remove(task) + task_queue.update_event() + raise HTTPException(500, detail="User is no longer connected") #just for the logs instance = await executor_instances.find_executor() task_queue.remove(task) if notify: @@ -47,6 +57,7 @@ async def wait_in_queue(task, notify: NotifyType): result = await instance.sent(task.image, task.config) executor_instances.free_executor(instance) + task_queue.update_event() if notify: return @@ -55,4 +66,8 @@ async def wait_in_queue(task, notify: NotifyType): else: if queue_pos == 0: raise HTTPException(500, detail="No translator registered") + if is_client_disconnected(task.req): + task_queue.remove(task) + task_queue.update_event() + raise HTTPException(500, detail="User is no longer connected") #just for the logs await task_queue.wait_for_event() \ No newline at end of file diff --git a/server/request_extraction.py b/server/request_extraction.py index 9290ecc07..02dc85e70 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -59,6 +59,7 @@ async def get_ctx(req: Request): ctx.image = await to_pil_image(img) ctx.config = data + ctx.req = req task_queue.add_task(ctx) return await wait_in_queue(ctx, None) @@ -69,6 +70,7 @@ async def while_streaming(req: Request, transform): ctx.image = await to_pil_image(img) ctx.config = data + ctx.req = req task_queue.add_task(ctx) messages = asyncio.Queue() From 0160a3e2e5ec2178565082a4126bc0032b850cd2 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Wed, 20 Nov 2024 20:32:11 +0100 Subject: [PATCH 28/75] change routes --- execute_test.py | 46 ----------------------------- server/main.py | 28 ++++++++++++++---- api-test.html => server/manual.html | 3 +- 3 files changed, 23 insertions(+), 54 deletions(-) delete mode 100644 execute_test.py rename api-test.html => server/manual.html (97%) diff --git a/execute_test.py b/execute_test.py deleted file mode 100644 index 8705fb2b1..000000000 --- a/execute_test.py +++ /dev/null @@ -1,46 +0,0 @@ -import asyncio -import json -import pickle -import requests -from PIL import Image - -from manga_translator import Config - - -async def execute_method(method_name, attributes): - url = f"http://127.0.0.1:5003/execute/{method_name}" - headers = {'Content-Type': 'application/octet-stream'} - - response = requests.post(url, data=pickle.dumps(attributes), headers=headers, stream=True) - - if response.status_code == 200: - buffer = b'' - for chunk in response.iter_content(chunk_size=None): - if chunk: - buffer += chunk - while True: - if len(buffer) >= 5: - status = int.from_bytes(buffer[0:1], byteorder='big') - expected_size = int.from_bytes(buffer[1:5], byteorder='big') - if len(buffer) >= 5 + expected_size: - data = buffer[5:5 + expected_size] - if status == 0: - print("data", pickle.loads(data)) - elif status == 1: - print("log", data) - elif status == 2: - print("error", data) - buffer = buffer[5 + expected_size:] - else: - break - else: - break - else: - print(json.loads(response.content)) - - - -if __name__ == '__main__': - image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png") - attributes = {"image": image, "config": Config()} - asyncio.run(execute_method("translate", attributes)) \ No newline at end of file diff --git a/server/main.py b/server/main.py index 7a8cf48f2..49ef9f5a9 100644 --- a/server/main.py +++ b/server/main.py @@ -5,6 +5,7 @@ from starlette.responses import StreamingResponse, JSONResponse from server.instance import ExecutorInstance, executor_instances +from server.myqueue import task_queue from server.request_extraction import get_ctx, while_streaming from server.to_json import to_json @@ -32,17 +33,17 @@ def transform_to_image(ctx): def transform_to_json(ctx): return str(to_json(ctx)).encode("utf-8") -@app.post("/json") +@app.post("/translate/json") async def json(req: Request): ctx = await get_ctx(req) json = to_json(ctx) return JSONResponse(content=json) -@app.post("/bytes") +@app.post("/translate/bytes") async def bytes(req: Request): ctx = await get_ctx(req) -@app.post("/image") +@app.post("/translate/image") async def image(req: Request): ctx = await get_ctx(req) img_byte_arr = io.BytesIO() @@ -51,18 +52,33 @@ async def image(req: Request): return StreamingResponse(img_byte_arr, media_type="image/png") -@app.post("/stream_json") +@app.post("/translate/json/stream") async def stream_json(req: Request): return await while_streaming(req, transform_to_json) -@app.post("/stream_bytes") +@app.post("/translate/bytes/stream") async def stream_bytes(req: Request): return await while_streaming(req, transform_to_image) -@app.post("/stream_image") +@app.post("/translate/image/stream") async def stream_image(req: Request): return await while_streaming(req, transform_to_image) +@app.post("/queue-size") +async def queue_size() -> int: + return len(task_queue.queue) + +@app.post("/") +async def index(): + # ui.html + pass + +@app.post("/manual") +async def manual(): + # manual.html + pass + + if __name__ == '__main__': import uvicorn from args import parse_arguments diff --git a/api-test.html b/server/manual.html similarity index 97% rename from api-test.html rename to server/manual.html index f07b865fb..1335b0c51 100644 --- a/api-test.html +++ b/server/manual.html @@ -72,7 +72,7 @@

formData.append('image', file); try { - const response = await fetch( generateImage.checked ? 'http://127.0.0.1:8000/stream_image' : 'http://127.0.0.1:8000/stream_json', { + const response = await fetch( generateImage.checked ? '/translate/image/stream' : '/translate/json/stream', { method: 'POST', body: formData, /*headers: { @@ -142,7 +142,6 @@

buffer = buffer.slice(totalSize); } } - console.log(buffer.length) } else { errorField.innerHTML = response.statusText; From aa9492724c7225fbc76385d45012e77977fad9bd Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Wed, 20 Nov 2024 21:10:35 +0100 Subject: [PATCH 29/75] api goals --- server/args.py | 7 ++++++- server/instance.py | 2 +- server/main.py | 51 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/server/args.py b/server/args.py index a00e24a22..a27ba5ffd 100644 --- a/server/args.py +++ b/server/args.py @@ -5,5 +5,10 @@ def parse_arguments(): parser = argparse.ArgumentParser(description="Specify host and port for the server.") parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)') parser.add_argument('--port', type=int, default=8000, help='The port number (default: 8080)') - + #use_gpu + #use_gpu_limited + #ignore_errors + #verbose + #nonce + #start_instance return parser.parse_args() \ No newline at end of file diff --git a/server/instance.py b/server/instance.py index 7c02e695f..e844289dd 100644 --- a/server/instance.py +++ b/server/instance.py @@ -25,7 +25,7 @@ async def sent_stream(self, image: Image, config: Config, sender: NotifyType): class Executors: def __init__(self): - self.list: List[ExecutorInstance] = [ExecutorInstance(ip="127.0.0.1", port=5003)] + self.list: List[ExecutorInstance] = [] self.lock: Lock = Lock() self.event = Event() diff --git a/server/main.py b/server/main.py index 49ef9f5a9..dd6f6ebd6 100644 --- a/server/main.py +++ b/server/main.py @@ -1,6 +1,10 @@ import io +import os +import secrets +import subprocess +import sys -from fastapi import FastAPI, Request +from fastapi import FastAPI, Request, HTTPException from fastapi.middleware.cors import CORSMiddleware from starlette.responses import StreamingResponse, JSONResponse @@ -10,6 +14,7 @@ from server.to_json import to_json app = FastAPI() +nonce = None app.add_middleware( CORSMiddleware, @@ -21,6 +26,9 @@ @app.post("/register") async def register_instance(instance: ExecutorInstance, request: Request): + req_nonce = request.headers.get('X-Nonce') + if req_nonce != nonce: + raise HTTPException(401, detail="Invalid nonce") instance.ip = request.client.host executor_instances.register(instance) return {"code": 0} @@ -78,10 +86,51 @@ async def manual(): # manual.html pass +def generate_nonce(): + return secrets.token_hex(16) + +def start_translator_client_proc(host: str, port: int, nonce: str, params: dict): + cmds = [ + sys.executable, + '-m', 'manga_translator', + '--mode', 'shared', + '--host', host, + '--port', str(port), + '--nonce', nonce, + '--no-report' + ] + if params.get('use_gpu', False): + cmds.append('--use-gpu') + if params.get('use_gpu_limited', False): + cmds.append('--use-gpu-limited') + if params.get('ignore_errors', False): + cmds.append('--ignore-errors') + if params.get('verbose', False): + cmds.append('--verbose') + + proc = subprocess.Popen(cmds, cwd=BASE_PATH) + executor_instances.register(ExecutorInstance(ip=host, port=port)) + return proc + +def prepare(args): + global nonce + if args.get("nonce", None) is None: + nonce = os.getenv('MT_WEB_NONCE', generate_nonce()) + else: + nonce = args.get("nonce", None) + if args.get("start_instance", None): + start_translator_client_proc(args.get("host", "0.0.0.0"), args.get("port",8000) + 1, nonce, args) + +#todo: restart if crash +#todo: cache results +#todo: cleanup cache +#todo: store images while in queue if __name__ == '__main__': import uvicorn from args import parse_arguments args = parse_arguments() + prepare(args) + print("Nonce: "+nonce) uvicorn.run(app, host=args.host, port=args.port) From 4dfe04c58f06625940a8e9062d8dba8441a7529c Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Wed, 20 Nov 2024 21:14:31 +0100 Subject: [PATCH 30/75] more todos & args --- server/args.py | 16 ++++++++++------ server/main.py | 7 ++++--- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/server/args.py b/server/args.py index a27ba5ffd..5c7294ca9 100644 --- a/server/args.py +++ b/server/args.py @@ -1,14 +1,18 @@ import argparse +import os def parse_arguments(): parser = argparse.ArgumentParser(description="Specify host and port for the server.") parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)') parser.add_argument('--port', type=int, default=8000, help='The port number (default: 8080)') - #use_gpu - #use_gpu_limited - #ignore_errors - #verbose - #nonce - #start_instance + parser.add_argument('-v', '--verbose', action='store_true', + help='Print debug info and save intermediate images in result folder') + parser.add_argument('--start-instance', action='store_true', + help='If a translator should be launched automatically') + parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.') + parser.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web server communication') + g = parser.add_mutually_exclusive_group() + g.add_argument('--use-gpu', action='store_true', help='Turn on/off gpu (auto switch between mps and cuda)') + g.add_argument('--use-gpu-limited', action='store_true', help='Turn on/off gpu (excluding offline translator)') return parser.parse_args() \ No newline at end of file diff --git a/server/main.py b/server/main.py index dd6f6ebd6..f5b5d105a 100644 --- a/server/main.py +++ b/server/main.py @@ -78,12 +78,12 @@ async def queue_size() -> int: @app.post("/") async def index(): - # ui.html + # todo:ui.html pass @app.post("/manual") async def manual(): - # manual.html + # todo:manual.html pass def generate_nonce(): @@ -107,7 +107,7 @@ def start_translator_client_proc(host: str, port: int, nonce: str, params: dict) cmds.append('--ignore-errors') if params.get('verbose', False): cmds.append('--verbose') - + #todo: cwd proc = subprocess.Popen(cmds, cwd=BASE_PATH) executor_instances.register(ExecutorInstance(ip=host, port=port)) return proc @@ -125,6 +125,7 @@ def prepare(args): #todo: cache results #todo: cleanup cache #todo: store images while in queue +#todo: add docs if __name__ == '__main__': import uvicorn From 29a7625123fd23e6074b946f5268c1adc827d15f Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:03:29 +0100 Subject: [PATCH 31/75] some docs & queue fix --- manga_translator/utils/generic.py | 2 +- server/main.py | 83 ++++++++++++++++++++----------- server/myqueue.py | 20 +++++--- server/request_extraction.py | 8 ++- server/to_json.py | 17 ++++++- 5 files changed, 91 insertions(+), 39 deletions(-) diff --git a/manga_translator/utils/generic.py b/manga_translator/utils/generic.py index 67163b69b..966e6a370 100644 --- a/manga_translator/utils/generic.py +++ b/manga_translator/utils/generic.py @@ -37,7 +37,7 @@ def __init__(self, **kwargs): def __eq__(self, other): if not isinstance(other, Context): return NotImplemented - return vars(self) == vars(other) + return dict(self) == dict(other) def __contains__(self, key): return key in self.keys() diff --git a/server/main.py b/server/main.py index f5b5d105a..185e59f6a 100644 --- a/server/main.py +++ b/server/main.py @@ -3,15 +3,21 @@ import secrets import subprocess import sys +from builtins import bytes +from typing import Union -from fastapi import FastAPI, Request, HTTPException +from fastapi import FastAPI, Request, HTTPException, Header, Form, UploadFile from fastapi.middleware.cors import CORSMiddleware -from starlette.responses import StreamingResponse, JSONResponse +from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse +from pathlib import Path +from pydantic import BaseModel + +from manga_translator import Config from server.instance import ExecutorInstance, executor_instances from server.myqueue import task_queue -from server.request_extraction import get_ctx, while_streaming -from server.to_json import to_json +from server.request_extraction import get_ctx, while_streaming, TranslateRequest +from server.to_json import to_json, Translation app = FastAPI() nonce = None @@ -24,14 +30,19 @@ allow_headers=["*"], ) -@app.post("/register") -async def register_instance(instance: ExecutorInstance, request: Request): - req_nonce = request.headers.get('X-Nonce') +class TranslateRequestForm(BaseModel): + """This request can be a multipart or a json request""" + image: UploadFile + """can be a url, base64 encoded image or a multipart image""" + config: str + """in case it is a multipart this needs to be a string(json.stringify)""" + +@app.post("/register", response_description="no response") +async def register_instance(instance: ExecutorInstance, req: Request, req_nonce: str = Header(alias="X-Nonce")): if req_nonce != nonce: raise HTTPException(401, detail="Invalid nonce") - instance.ip = request.client.host + instance.ip = req.client.host executor_instances.register(instance) - return {"code": 0} def transform_to_image(ctx): img_byte_arr = io.BytesIO() @@ -41,18 +52,29 @@ def transform_to_image(ctx): def transform_to_json(ctx): return str(to_json(ctx)).encode("utf-8") -@app.post("/translate/json") +async def parse_request( + req: Request, + image: Union[str, bytes] = Form(...), + config: str = Form(...), +): + if req.headers.get('content-type').startswith('multipart'): + config = json.loads(config) + return TranslateRequest(image=image, config=Config(**config)) + else: + return None + +@app.post("/translate/json", response_model=list[Translation], response_description="json strucure inspired by the ichigo translator extension") async def json(req: Request): ctx = await get_ctx(req) json = to_json(ctx) return JSONResponse(content=json) -@app.post("/translate/bytes") +@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks") async def bytes(req: Request): ctx = await get_ctx(req) -@app.post("/translate/image") -async def image(req: Request): +@app.post("/translate/image", response_description="the result image", response_class=StreamingResponse) +async def image(req: Request) -> StreamingResponse: ctx = await get_ctx(req) img_byte_arr = io.BytesIO() ctx.result.save(img_byte_arr, format="PNG") @@ -60,31 +82,32 @@ async def image(req: Request): return StreamingResponse(img_byte_arr, media_type="image/png") -@app.post("/translate/json/stream") -async def stream_json(req: Request): +@app.post("/translate/json/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_json(req: Request) -> StreamingResponse: return await while_streaming(req, transform_to_json) -@app.post("/translate/bytes/stream") -async def stream_bytes(req: Request): +@app.post("/translate/bytes/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_bytes(req: Request)-> StreamingResponse: return await while_streaming(req, transform_to_image) -@app.post("/translate/image/stream") -async def stream_image(req: Request): +@app.post("/translate/image/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_image(req: Request) -> StreamingResponse: return await while_streaming(req, transform_to_image) -@app.post("/queue-size") +@app.post("/queue-size", response_model=int) async def queue_size() -> int: return len(task_queue.queue) -@app.post("/") -async def index(): +@app.get("/", response_class=HTMLResponse) +async def index() -> HTMLResponse: # todo:ui.html pass -@app.post("/manual") +@app.get("/manual", response_class=HTMLResponse) async def manual(): - # todo:manual.html - pass + html_file = Path("manual.html") + html_content = html_file.read_text() + return HTMLResponse(content=html_content) def generate_nonce(): return secrets.token_hex(16) @@ -114,18 +137,19 @@ def start_translator_client_proc(host: str, port: int, nonce: str, params: dict) def prepare(args): global nonce - if args.get("nonce", None) is None: + if args.nonce is None: nonce = os.getenv('MT_WEB_NONCE', generate_nonce()) else: - nonce = args.get("nonce", None) - if args.get("start_instance", None): - start_translator_client_proc(args.get("host", "0.0.0.0"), args.get("port",8000) + 1, nonce, args) + nonce = args.nonce + if args.start_instance: + start_translator_client_proc(args.host, args.port + 1, nonce, args) #todo: restart if crash #todo: cache results #todo: cleanup cache #todo: store images while in queue #todo: add docs +#todo: index doesnt work properly in the list(is_client_disconnected is not executed immediatly/does not update the index) if __name__ == '__main__': import uvicorn @@ -134,4 +158,5 @@ def prepare(args): args = parse_arguments() prepare(args) print("Nonce: "+nonce) + executor_instances.register(ExecutorInstance(ip="127.0.0.1", port=5003)) uvicorn.run(app, host=args.host, port=args.port) diff --git a/server/myqueue.py b/server/myqueue.py index 9ae865063..446541fc2 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -3,13 +3,15 @@ from fastapi import HTTPException from starlette.requests import Request +from starlette.responses import StreamingResponse +from manga_translator import Context from server.instance import executor_instances from server.sent_data_internal import NotifyType class TaskQueue: def __init__(self): - self.queue: List[Dict] = [] + self.queue: List[Context] = [] self.queue_event: asyncio.Event = asyncio.Event() def add_task(self, task): @@ -43,10 +45,13 @@ async def wait_in_queue(task, notify: NotifyType): if notify: notify(3, str(queue_pos)) if queue_pos < executor_instances.free_executors(): - if is_client_disconnected(task.req): + if await is_client_disconnected(task.req): task_queue.remove(task) task_queue.update_event() - raise HTTPException(500, detail="User is no longer connected") #just for the logs + if notify: + return + else: + raise HTTPException(500, detail="User is no longer connected") #just for the logs instance = await executor_instances.find_executor() task_queue.remove(task) if notify: @@ -64,10 +69,11 @@ async def wait_in_queue(task, notify: NotifyType): else: return result else: - if queue_pos == 0: - raise HTTPException(500, detail="No translator registered") - if is_client_disconnected(task.req): + if await is_client_disconnected(task.req): task_queue.remove(task) task_queue.update_event() - raise HTTPException(500, detail="User is no longer connected") #just for the logs + if notify: + return + else: + raise HTTPException(500, detail="User is no longer connected") #just for the logs await task_queue.wait_for_event() \ No newline at end of file diff --git a/server/request_extraction.py b/server/request_extraction.py index 02dc85e70..442bc0e68 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -8,12 +8,19 @@ import requests from PIL import Image from fastapi import Request, HTTPException +from pydantic import BaseModel from starlette.responses import StreamingResponse from manga_translator import Config, Context from server.myqueue import task_queue, wait_in_queue from server.streaming import notify, stream +class TranslateRequest(BaseModel): + """This request can be a multipart or a json request""" + image: bytes|str + """can be a url, base64 encoded image or a multipart image""" + config: Config + """in case it is a multipart this needs to be a string(json.stringify)""" async def to_pil_image(image: Union[str, bytes]) -> Image.Image: try: @@ -36,7 +43,6 @@ async def to_pil_image(image: Union[str, bytes]) -> Image.Image: async def multi_content_type(request: Request): content_type = request.headers.get("content-type") - if content_type and content_type.startswith("multipart/form-data"): form = await request.form() config = form.get("config", "{}") diff --git a/server/to_json.py b/server/to_json.py index 356e74104..911718d8d 100644 --- a/server/to_json.py +++ b/server/to_json.py @@ -2,6 +2,7 @@ from typing import Dict, List import cv2 +from pydantic import BaseModel from manga_translator import Context from manga_translator.utils import TextBlock @@ -63,4 +64,18 @@ def to_json(ctx: Context): 'language': text_regions[i].source_lang, 'background': background }) - return results \ No newline at end of file + return results + +class TextColor(BaseModel): + fg: tuple[int, int, int] + bg: tuple[int, int, int] + +class Translation(BaseModel): + text: dict[str, str] + minX: int + minY: int + maxX: int + maxY: int + textColor:TextColor + language: str + background: str From 5e1638779e2e883229677369410d63c98e46405c Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:35:25 +0100 Subject: [PATCH 32/75] stream type fix --- server/myqueue.py | 4 ++-- server/request_extraction.py | 2 +- server/sent_data_internal.py | 2 +- server/streaming.py | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/server/myqueue.py b/server/myqueue.py index 446541fc2..b9174d936 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -43,7 +43,7 @@ async def wait_in_queue(task, notify: NotifyType): while True: queue_pos = task_queue.get_pos(task) if notify: - notify(3, str(queue_pos)) + notify(3, str(queue_pos).encode('utf-8')) if queue_pos < executor_instances.free_executors(): if await is_client_disconnected(task.req): task_queue.remove(task) @@ -55,7 +55,7 @@ async def wait_in_queue(task, notify: NotifyType): instance = await executor_instances.find_executor() task_queue.remove(task) if notify: - notify(4, "0") + notify(4, b"") if notify: await instance.sent_stream(task.image, task.config, notify) else: diff --git a/server/request_extraction.py b/server/request_extraction.py index 442bc0e68..080d84cc4 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -81,7 +81,7 @@ async def while_streaming(req: Request, transform): messages = asyncio.Queue() - def notify_internal(code: int, data) -> None: + def notify_internal(code: int, data: bytes) -> None: notify(code, data, transform, messages) streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") diff --git a/server/sent_data_internal.py b/server/sent_data_internal.py index 2945cc9f7..5720cb6c5 100644 --- a/server/sent_data_internal.py +++ b/server/sent_data_internal.py @@ -8,7 +8,7 @@ from manga_translator import Config -NotifyType = Optional[Callable[[int, Optional[int]], None]] +NotifyType = Optional[Callable[[int, Optional[bytes]], None]] async def fetch_data_stream(url, image: Image, config: Config, sender: NotifyType, headers: Mapping[str, str] = {}): attributes = {"image": image, "config": config} diff --git a/server/streaming.py b/server/streaming.py index c65a897d2..611771853 100644 --- a/server/streaming.py +++ b/server/streaming.py @@ -1,3 +1,4 @@ +import asyncio import pickle async def stream(messages): @@ -7,12 +8,11 @@ async def stream(messages): if message[0] == 0 or message[0] == 2: break -def notify(code, data, transform_to_bytes, messages): +def notify(code: int, data: bytes, transform_to_bytes, messages: asyncio.Queue): if code == 0: result_bytes = transform_to_bytes(pickle.loads(data)) encoded_result = b'\x00' + len(result_bytes).to_bytes(4, 'big') + result_bytes messages.put_nowait(encoded_result) else: - result_bytes = str(data).encode("utf-8") - encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes + encoded_result =code.to_bytes(1, 'big') + len(data).to_bytes(4, 'big') + data messages.put_nowait(encoded_result) \ No newline at end of file From 0c3908e0cd8b62c8faa8a5196a5cd23120bdd7d8 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:35:49 +0100 Subject: [PATCH 33/75] add server ui --- manga_translator/args.py | 2 +- server/index.html | 393 +++++++++++++++++++++++++++++++++++++++ server/main.py | 46 +++-- 3 files changed, 425 insertions(+), 16 deletions(-) create mode 100644 server/index.html diff --git a/manga_translator/args.py b/manga_translator/args.py index 0516c075b..8795b4e2f 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -133,4 +133,4 @@ def _format_action_invocation(self, action: argparse.Action) -> str: parser_api.add_argument('--host', default='127.0.0.1', type=str, help='Host for API service') parser_api.add_argument('--port', default=5003, type=int, help='Port for API service') parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication') - +parser_api.add_argument("--report", default=None,type=str, help='reports to server to register instance') diff --git a/server/index.html b/server/index.html new file mode 100644 index 000000000..edf617609 --- /dev/null +++ b/server/index.html @@ -0,0 +1,393 @@ + + + + + Image/Manga Translator + + + + + + + +
+
+

Image/Manga Translator

+
+
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+
+
+ + +
+
+
+
{{ statusText }}
+ +
+
+ +
{{ statusText }}
+
+
+ +
+
+ Please consider supporting us by + Ko-fi + or + Patreon! +
+ Source Code +
+
+
+ + + diff --git a/server/main.py b/server/main.py index 185e59f6a..773908f41 100644 --- a/server/main.py +++ b/server/main.py @@ -1,8 +1,10 @@ import io import os import secrets +import signal import subprocess import sys +from argparse import Namespace from builtins import bytes from typing import Union @@ -100,8 +102,9 @@ async def queue_size() -> int: @app.get("/", response_class=HTMLResponse) async def index() -> HTMLResponse: - # todo:ui.html - pass + html_file = Path("index.html") + html_content = html_file.read_text() + return HTMLResponse(content=html_content) @app.get("/manual", response_class=HTMLResponse) async def manual(): @@ -112,27 +115,35 @@ async def manual(): def generate_nonce(): return secrets.token_hex(16) -def start_translator_client_proc(host: str, port: int, nonce: str, params: dict): +def start_translator_client_proc(host: str, port: int, nonce: str, params: Namespace): cmds = [ sys.executable, '-m', 'manga_translator', - '--mode', 'shared', + 'shared', '--host', host, '--port', str(port), '--nonce', nonce, - '--no-report' ] - if params.get('use_gpu', False): + if params.use_gpu: cmds.append('--use-gpu') - if params.get('use_gpu_limited', False): + if params.use_gpu_limited: cmds.append('--use-gpu-limited') - if params.get('ignore_errors', False): + if params.ignore_errors: cmds.append('--ignore-errors') - if params.get('verbose', False): + if params.verbose: cmds.append('--verbose') - #todo: cwd - proc = subprocess.Popen(cmds, cwd=BASE_PATH) + base_path = os.path.dirname(os.path.abspath(__file__)) + parent = os.path.dirname(base_path) + proc = subprocess.Popen(cmds, cwd=parent) executor_instances.register(ExecutorInstance(ip=host, port=port)) + + def handle_exit_signals(signal, frame): + proc.terminate() + sys.exit(0) + + signal.signal(signal.SIGINT, handle_exit_signals) + signal.signal(signal.SIGTERM, handle_exit_signals) + return proc def prepare(args): @@ -142,7 +153,7 @@ def prepare(args): else: nonce = args.nonce if args.start_instance: - start_translator_client_proc(args.host, args.port + 1, nonce, args) + return start_translator_client_proc(args.host, args.port + 1, nonce, args) #todo: restart if crash #todo: cache results @@ -150,13 +161,18 @@ def prepare(args): #todo: store images while in queue #todo: add docs #todo: index doesnt work properly in the list(is_client_disconnected is not executed immediatly/does not update the index) +#todo: enable config in html pages if __name__ == '__main__': import uvicorn from args import parse_arguments args = parse_arguments() - prepare(args) + args.start_instance = True + proc = prepare(args) print("Nonce: "+nonce) - executor_instances.register(ExecutorInstance(ip="127.0.0.1", port=5003)) - uvicorn.run(app, host=args.host, port=args.port) + try: + uvicorn.run(app, host=args.host, port=args.port) + except Exception: + if proc: + proc.terminate() From b462d12d5d891d8197f8d3e93f5be5609d8f4f64 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:36:36 +0100 Subject: [PATCH 34/75] delete old code --- manga_translator/mode/web.py | 153 ------- manga_translator/server/manual.html | 352 ---------------- manga_translator/server/ui.html | 377 ----------------- manga_translator/server/web_main.py | 629 ---------------------------- manga_translator/server/ws.proto | 36 -- manga_translator/server/ws_pb2.py | 32 -- 6 files changed, 1579 deletions(-) delete mode 100644 manga_translator/mode/web.py delete mode 100644 manga_translator/server/manual.html delete mode 100644 manga_translator/server/ui.html delete mode 100644 manga_translator/server/web_main.py delete mode 100644 manga_translator/server/ws.proto delete mode 100644 manga_translator/server/ws_pb2.py diff --git a/manga_translator/mode/web.py b/manga_translator/mode/web.py deleted file mode 100644 index 3f0ccb889..000000000 --- a/manga_translator/mode/web.py +++ /dev/null @@ -1,153 +0,0 @@ -import asyncio -import time - -import requests - -from manga_translator import logger, Context, Config -from manga_translator.mode.local import MangaTranslatorLocal -from manga_translator.translators import TRANSLATORS -from manga_translator.utils import add_file_logger, remove_file_logger - - -class MangaTranslatorWeb(MangaTranslatorLocal): - """ - Translator client that executes tasks on behalf of the webserver in web_main.py. - """ - - def __init__(self, params: dict = None): - super().__init__(params) - self.host = params.get('host', '127.0.0.1') - if self.host == '0.0.0.0': - self.host = '127.0.0.1' - self.port = params.get('port', 5003) - self.nonce = params.get('nonce', '') - self.ignore_errors = params.get('ignore_errors', True) - self._task_id = None - self._params = None - - async def _init_connection(self): - available_translators = [] - from ..translators import MissingAPIKeyException, get_translator - for key in TRANSLATORS: - try: - get_translator(key) - available_translators.append(key) - except MissingAPIKeyException: - pass - - data = { - 'nonce': self.nonce, - 'capabilities': { - 'translators': available_translators, - }, - } - requests.post(f'http://{self.host}:{self.port}/connect-internal', json=data) - - async def _send_state(self, state: str, finished: bool): - # wait for translation to be saved first (bad solution?) - finished = finished and not state == 'finished' - while True: - try: - data = { - 'task_id': self._task_id, - 'nonce': self.nonce, - 'state': state, - 'finished': finished, - } - requests.post(f'http://{self.host}:{self.port}/task-update-internal', json=data, timeout=20) - break - except Exception: - # if translation is finished server has to know - if finished: - continue - else: - break - - def _get_task(self): - try: - rjson = requests.get(f'http://{self.host}:{self.port}/task-internal?nonce={self.nonce}', - timeout=3600).json() - return rjson.get('task_id'), rjson.get('data') - except Exception: - return None, None - - async def listen(self, translation_params: dict = None): - """ - Listens for translation tasks from web server. - """ - logger.info('Waiting for translation tasks') - - await self._init_connection() - self.add_progress_hook(self._send_state) - - while True: - self._task_id, self._params = self._get_task() - if self._params and 'exit' in self._params: - break - if not (self._task_id and self._params): - await asyncio.sleep(0.1) - continue - - self.result_sub_folder = self._task_id - logger.info(f'Processing task {self._task_id}') - if translation_params is not None: - # Combine default params with params chosen by webserver - for p, default_value in translation_params.items(): - current_value = self._params.get(p) - self._params[p] = current_value if current_value is not None else default_value - if self.verbose: - # Write log file - log_file = self._result_path('log.txt') - add_file_logger(log_file) - - # final.png will be renamed if format param is set - await self.translate_path(self._result_path('input.png'), self._result_path('final.png'), - params=self._params) - print() - - if self.verbose: - remove_file_logger(log_file) - self._task_id = None - self._params = None - self.result_sub_folder = '' - - async def _run_text_translation(self, config: Config, ctx: Context): - # Run machine translation as reference for manual translation (if `--translator=none` is not set) - text_regions = await super()._run_text_translation(config, ctx) - - #todo: manual never set - if ctx.get('manual', False): - logger.info('Waiting for user input from manual translation') - requests.post(f'http://{self.host}:{self.port}/request-manual-internal', json={ - 'task_id': self._task_id, - 'nonce': self.nonce, - 'texts': [r.text for r in text_regions], - 'translations': [r.translation for r in text_regions], - }, timeout=20) - - # wait for at most 1 hour for manual translation - wait_until = time.time() + 3600 - while time.time() < wait_until: - ret = requests.post(f'http://{self.host}:{self.port}/get-manual-result-internal', json={ - 'task_id': self._task_id, - 'nonce': self.nonce - }, timeout=20).json() - if 'result' in ret: - manual_translations = ret['result'] - if isinstance(manual_translations, str): - if manual_translations == 'error': - return [] - i = 0 - for translation in manual_translations: - if not translation.strip(): - text_regions.pop(i) - i = i - 1 - else: - text_regions[i].translation = translation - text_regions[i].target_lang = config.translator.translator_gen.langs[-1] - i = i + 1 - break - elif 'cancel' in ret: - return 'cancel' - await asyncio.sleep(0.1) - return text_regions diff --git a/manga_translator/server/manual.html b/manga_translator/server/manual.html deleted file mode 100644 index 9e4a2befb..000000000 --- a/manga_translator/server/manual.html +++ /dev/null @@ -1,352 +0,0 @@ - - - - - - Manga Image Translator - - - - -
- - -
-
-
-

Raw Text

-

Translation

-
-
- - -
-
- -
-
-
-

Upload image by dragging and dropping images onto the dashed region

- - -
- - -
-
- -
-
- - - - - - \ No newline at end of file diff --git a/manga_translator/server/ui.html b/manga_translator/server/ui.html deleted file mode 100644 index 88dcb9fc3..000000000 --- a/manga_translator/server/ui.html +++ /dev/null @@ -1,377 +0,0 @@ - - - - - Image/Manga Translator - - - - - - - -
-
-

Image/Manga Translator

-
-
-
- -
- - -
-
-
- -
- - -
-
-
- -
- - -
-
-
- -
- - -
-
-
- -
- - -
-
-
-
-
- - -
-
-
-
{{ statusText }}
- -
-
- -
{{ statusText }}
-
-
- -
-
- Please consider supporting us by - Ko-fi - or - Patreon! -
- Source Code -
-
-
- - - diff --git a/manga_translator/server/web_main.py b/manga_translator/server/web_main.py deleted file mode 100644 index 9cc61c0f4..000000000 --- a/manga_translator/server/web_main.py +++ /dev/null @@ -1,629 +0,0 @@ -import io -import os -import sys -import re -import shutil -import mimetypes -import time -import asyncio -import subprocess -import secrets -from io import BytesIO -from PIL import Image -from aiohttp import web -from collections import deque -from imagehash import phash - -SERVER_DIR_PATH = os.path.dirname(os.path.realpath(__file__)) -BASE_PATH = os.path.dirname(os.path.dirname(SERVER_DIR_PATH)) - -# TODO: Get capabilities through api -VALID_LANGUAGES = { - 'CHS': 'Chinese (Simplified)', - 'CHT': 'Chinese (Traditional)', - 'CSY': 'Czech', - 'NLD': 'Dutch', - 'ENG': 'English', - 'FRA': 'French', - 'DEU': 'German', - 'HUN': 'Hungarian', - 'ITA': 'Italian', - 'JPN': 'Japanese', - 'KOR': 'Korean', - 'PLK': 'Polish', - 'PTB': 'Portuguese (Brazil)', - 'ROM': 'Romanian', - 'RUS': 'Russian', - 'ESP': 'Spanish', - 'TRK': 'Turkish', - 'UKR': 'Ukrainian', - 'VIN': 'Vietnamese', - 'ARA': 'Arabic', -} -# Whitelists -VALID_DETECTORS = set(['default', 'ctd']) -VALID_DIRECTIONS = set(['auto', 'h', 'v']) -VALID_TRANSLATORS = [ - 'youdao', - 'baidu', - 'google', - 'deepl', - 'deepseek', - 'papago', - 'caiyun', - 'gpt3.5', - 'gpt4', - 'nllb', - 'nllb_big', - 'sugoi', - 'jparacrawl', - 'jparacrawl_big', - 'm2m100', - 'm2m100_big', - 'qwen2', - 'qwen2_big', - 'sakura', - 'none', - 'original', -] - -MAX_ONGOING_TASKS = 1 -MAX_IMAGE_SIZE_PX = 8000**2 - -# Time to wait for web client to send a request to /task-state request -# before that web clients task gets removed from the queue -WEB_CLIENT_TIMEOUT = -1 - -# Time before finished tasks get removed from memory -FINISHED_TASK_REMOVE_TIMEOUT = 1800 - -# Auto deletes old task folders upon reaching this disk space limit -DISK_SPACE_LIMIT = 5e7 # 50mb - -# TODO: Turn into dict with translator client id as key for support of multiple translator clients -ONGOING_TASKS = [] -FINISHED_TASKS = [] -NONCE = '' -QUEUE = deque() -TASK_DATA = {} -TASK_STATES = {} -DEFAULT_TRANSLATION_PARAMS = {} -AVAILABLE_TRANSLATORS = [] -FORMAT = '' - -app = web.Application(client_max_size = 1024 * 1024 * 50) -routes = web.RouteTableDef() - - -def constant_compare(a, b): - if isinstance(a, str): - a = a.encode('utf-8') - if isinstance(b, str): - b = b.encode('utf-8') - if not isinstance(a, bytes) or not isinstance(b, bytes): - return False - if len(a) != len(b): - return False - - result = 0 - for x, y in zip(a, b): - result |= x ^ y - return result == 0 - -@routes.get("/") -async def index_async(request): - global AVAILABLE_TRANSLATORS - with open(os.path.join(SERVER_DIR_PATH, 'ui.html'), 'r', encoding='utf8') as fp: - content = fp.read() - if AVAILABLE_TRANSLATORS: - content = re.sub(r'(?<=translator: )(.*)(?=,)', repr(AVAILABLE_TRANSLATORS[0]), content) - content = re.sub(r'(?<=validTranslators: )(\[.*\])(?=,)', repr(AVAILABLE_TRANSLATORS), content) - return web.Response(text=content, content_type='text/html') - -@routes.get("/manual") -async def index_async(request): - with open(os.path.join(SERVER_DIR_PATH, 'manual.html'), 'r', encoding='utf8') as fp: - return web.Response(text=fp.read(), content_type='text/html') - -@routes.get("/result/{taskid}") -async def result_async(request): - global FORMAT - filepath = os.path.join('result', request.match_info.get('taskid'), f'final.{FORMAT}') - if not os.path.exists(filepath): - return web.Response(status=404, text='Not Found') - stream = BytesIO() - with open(filepath, 'rb') as f: - stream.write(f.read()) - mime = mimetypes.guess_type(filepath)[0] or 'application/octet-stream' - return web.Response(body=stream.getvalue(), content_type=mime) - -@routes.get("/result-type") -async def file_type_async(request): - global FORMAT - return web.Response(text=f'{FORMAT}') - -@routes.get("/queue-size") -async def queue_size_async(request): - return web.json_response({'size' : len(QUEUE)}) - -async def handle_post(request): - data = await request.post() - detection_size = None - selected_translator = 'youdao' - target_language = 'CHS' - detector = 'default' - direction = 'auto' - if 'target_lang' in data: - target_language = data['target_lang'].upper() - # TODO: move dicts to their own files to reduce load time - if target_language not in VALID_LANGUAGES: - target_language = 'CHS' - if 'detector' in data: - detector = data['detector'].lower() - if detector not in VALID_DETECTORS: - detector = 'default' - if 'direction' in data: - direction = data['direction'].lower() - if direction not in VALID_DIRECTIONS: - direction = 'auto' - if 'translator' in data: - selected_translator = data['translator'].lower() - if selected_translator not in AVAILABLE_TRANSLATORS: - selected_translator = AVAILABLE_TRANSLATORS[0] - if 'size' in data: - size_text = data['size'].upper() - if size_text == 'S': - detection_size = 1024 - elif size_text == 'M': - detection_size = 1536 - elif size_text == 'L': - detection_size = 2048 - elif size_text == 'X': - detection_size = 2560 - if 'file' in data: - file_field = data['file'] - content = file_field.file.read() - elif 'url' in data: - from aiohttp import ClientSession - async with ClientSession() as session: - async with session.get(data['url']) as resp: - if resp.status == 200: - content = await resp.read() - else: - return web.json_response({'status': 'error'}) - else: - return web.json_response({'status': 'error'}) - try: - img = Image.open(io.BytesIO(content)) - img.verify() - img = Image.open(io.BytesIO(content)) - if img.width * img.height > MAX_IMAGE_SIZE_PX: - return web.json_response({'status': 'error-too-large'}) - except Exception: - return web.json_response({'status': 'error-img-corrupt'}) - return img, detection_size, selected_translator, target_language, detector, direction - -@routes.post("/run") -async def run_async(request): - global FORMAT - x = await handle_post(request) - if isinstance(x, tuple): - img, size, selected_translator, target_language, detector, direction = x - else: - return x - task_id = f'{phash(img, hash_size = 16)}-{size}-{selected_translator}-{target_language}-{detector}-{direction}' - print(f'New `run` task {task_id}') - if os.path.exists(f'result/{task_id}/final.{FORMAT}'): - # Add a console output prompt to avoid the console from appearing to be stuck without execution when the translated image is hit consecutively. - print(f'Using cached result for {task_id}') - return web.json_response({'task_id' : task_id, 'status': 'successful'}) - # elif os.path.exists(f'result/{task_id}'): - # # either image is being processed or error occurred - # if task_id not in TASK_STATES: - # # error occurred - # return web.json_response({'state': 'error'}) - else: - os.makedirs(f'result/{task_id}/', exist_ok=True) - img.save(f'result/{task_id}/input.png') - QUEUE.append(task_id) - now = time.time() - TASK_DATA[task_id] = { - 'detection_size': size, - 'translator': selected_translator, - 'target_lang': target_language, - 'detector': detector, - 'direction': direction, - 'created_at': now, - 'requested_at': now, - } - TASK_STATES[task_id] = { - 'info': 'pending', - 'finished': False, - } - while True: - await asyncio.sleep(0.1) - if task_id not in TASK_STATES: - break - state = TASK_STATES[task_id] - if state['finished']: - break - return web.json_response({'task_id': task_id, 'status': 'successful' if state['finished'] else state['info']}) - - -@routes.post("/connect-internal") -async def index_async(request): - global NONCE, VALID_TRANSLATORS, AVAILABLE_TRANSLATORS - # Can be extended to allow support for multiple translators - rqjson = await request.json() - if constant_compare(rqjson.get('nonce'), NONCE): - capabilities = rqjson.get('capabilities') - if capabilities: - translators = capabilities.get('translators') - AVAILABLE_TRANSLATORS.clear() - for key in VALID_TRANSLATORS: - if key in translators: - AVAILABLE_TRANSLATORS.append(key) - return web.json_response({}) - -@routes.get("/task-internal") -async def get_task_async(request): - """ - Called by the translator to get a translation task. - """ - global NONCE, ONGOING_TASKS, DEFAULT_TRANSLATION_PARAMS - if constant_compare(request.rel_url.query.get('nonce'), NONCE): - if len(QUEUE) > 0 and len(ONGOING_TASKS) < MAX_ONGOING_TASKS: - task_id = QUEUE.popleft() - if task_id in TASK_DATA: - data = TASK_DATA[task_id] - for p, default_value in DEFAULT_TRANSLATION_PARAMS.items(): - current_value = data.get(p) - data[p] = current_value if current_value is not None else default_value - if not TASK_DATA[task_id].get('manual', False): - ONGOING_TASKS.append(task_id) - return web.json_response({'task_id': task_id, 'data': data}) - else: - return web.json_response({}) - else: - return web.json_response({}) - return web.json_response({}) - -async def manual_trans_task(task_id, texts, translations): - if task_id not in TASK_DATA: - TASK_DATA[task_id] = {} - if texts and translations: - TASK_DATA[task_id]['trans_request'] = [{'s': txt, 't': trans} for txt, trans in zip(texts, translations)] - else: - TASK_DATA[task_id]['trans_result'] = [] - print('Manual translation complete') - -@routes.post("/cancel-manual-request") -async def cancel_manual_translation(request): - rqjson = (await request.json()) - if 'task_id' in rqjson: - task_id = rqjson['task_id'] - if task_id in TASK_DATA: - TASK_DATA[task_id]['cancel'] = ' ' - while True: - await asyncio.sleep(0.1) - if TASK_STATES[task_id]['info'].startswith('error'): - ret = web.json_response({'task_id': task_id, 'status': 'error'}) - break - if TASK_STATES[task_id]['finished']: - ret = web.json_response({'task_id': task_id, 'status': 'cancelled'}) - break - del TASK_STATES[task_id] - del TASK_DATA[task_id] - return ret - return web.json_response({}) - -@routes.post("/post-manual-result") -async def post_translation_result(request): - rqjson = (await request.json()) - if 'trans_result' in rqjson and 'task_id' in rqjson: - task_id = rqjson['task_id'] - if task_id in TASK_DATA: - trans_result = [r['t'] for r in rqjson['trans_result']] - TASK_DATA[task_id]['trans_result'] = trans_result - while True: - await asyncio.sleep(0.1) - if TASK_STATES[task_id]['info'].startswith('error'): - ret = web.json_response({'task_id': task_id, 'status': 'error'}) - break - if TASK_STATES[task_id]['finished']: - ret = web.json_response({'task_id': task_id, 'status': 'successful'}) - break - # remove old tasks - del TASK_STATES[task_id] - del TASK_DATA[task_id] - return ret - return web.json_response({}) - -@routes.post("/request-manual-internal") -async def request_translation_internal(request): - global NONCE - rqjson = await request.json() - if constant_compare(rqjson.get('nonce'), NONCE): - task_id = rqjson['task_id'] - if task_id in TASK_DATA: - if TASK_DATA[task_id].get('manual', False): - # manual translation - asyncio.gather(manual_trans_task(task_id, rqjson['texts'], rqjson['translations'])) - return web.json_response({}) - -@routes.post("/get-manual-result-internal") -async def get_translation_internal(request): - global NONCE - rqjson = (await request.json()) - if constant_compare(rqjson.get('nonce'), NONCE): - task_id = rqjson['task_id'] - if task_id in TASK_DATA: - if 'trans_result' in TASK_DATA[task_id]: - return web.json_response({'result': TASK_DATA[task_id]['trans_result']}) - elif 'cancel' in TASK_DATA[task_id]: - return web.json_response({'cancel':''}) - return web.json_response({}) - -@routes.get("/task-state") -async def get_task_state_async(request): - """ - Web API for getting the state of an on-going translation task from the website. - - Is periodically called from ui.html. Once it returns a finished state, - the web client will try to fetch the corresponding image through /result/ - """ - task_id = request.query.get('taskid') - if task_id and task_id in TASK_STATES and task_id in TASK_DATA: - state = TASK_STATES[task_id] - data = TASK_DATA[task_id] - res_dict = { - 'state': state['info'], - 'finished': state['finished'], - } - data['requested_at'] = time.time() - try: - res_dict['waiting'] = QUEUE.index(task_id) + 1 - except Exception: - res_dict['waiting'] = 0 - res = web.json_response(res_dict) - - return res - return web.json_response({'state': 'error'}) - -@routes.post("/task-update-internal") -async def post_task_update_async(request): - """ - Lets the translator update the task state it is working on. - """ - global NONCE, ONGOING_TASKS, FINISHED_TASKS - rqjson = (await request.json()) - if constant_compare(rqjson.get('nonce'), NONCE): - task_id = rqjson['task_id'] - if task_id in TASK_STATES and task_id in TASK_DATA: - TASK_STATES[task_id] = { - 'info': rqjson['state'], - 'finished': rqjson['finished'], - } - if rqjson['finished'] and not TASK_DATA[task_id].get('manual', False): - try: - i = ONGOING_TASKS.index(task_id) - FINISHED_TASKS.append(ONGOING_TASKS.pop(i)) - except ValueError: - pass - print(f'Task state {task_id} to {TASK_STATES[task_id]}') - return web.json_response({}) - -@routes.post("/submit") -async def submit_async(request): - """Adds new task to the queue. Called by web client in ui.html when submitting an image.""" - global FORMAT - x = await handle_post(request) - if isinstance(x, tuple): - img, size, selected_translator, target_language, detector, direction = x - else: - return x - task_id = f'{phash(img, hash_size = 16)}-{size}-{selected_translator}-{target_language}-{detector}-{direction}' - now = time.time() - print(f'New `submit` task {task_id}') - if os.path.exists(f'result/{task_id}/final.{FORMAT}'): - TASK_STATES[task_id] = { - 'info': 'saved', - 'finished': True, - } - TASK_DATA[task_id] = { - 'detection_size': size, - 'translator': selected_translator, - 'target_lang': target_language, - 'detector': detector, - 'direction': direction, - 'created_at': now, - 'requested_at': now, - } - elif task_id not in TASK_DATA or task_id not in TASK_STATES: - os.makedirs(f'result/{task_id}/', exist_ok=True) - img.save(f'result/{task_id}/input.png') - QUEUE.append(task_id) - TASK_STATES[task_id] = { - 'info': 'pending', - 'finished': False, - } - TASK_DATA[task_id] = { - 'detection_size': size, - 'translator': selected_translator, - 'target_lang': target_language, - 'detector': detector, - 'direction': direction, - 'created_at': now, - 'requested_at': now, - } - return web.json_response({'task_id': task_id, 'status': 'successful'}) - -@routes.post("/manual-translate") -async def manual_translate_async(request): - x = await handle_post(request) - if isinstance(x, tuple): - img, size, selected_translator, target_language, detector, direction = x - else: - return x - task_id = secrets.token_hex(16) - print(f'New `manual-translate` task {task_id}') - os.makedirs(f'result/{task_id}/', exist_ok=True) - img.save(f'result/{task_id}/input.png') - now = time.time() - QUEUE.append(task_id) - # TODO: Add form fields to manual translate website - TASK_DATA[task_id] = { - # 'detection_size': size, - 'manual': True, - # 'detector': detector, - # 'direction': direction, - 'created_at': now, - 'requested_at': now, - } - print(TASK_DATA[task_id]) - TASK_STATES[task_id] = { - 'info': 'pending', - 'finished': False, - } - while True: - await asyncio.sleep(1) - if 'trans_request' in TASK_DATA[task_id]: - return web.json_response({'task_id' : task_id, 'status': 'pending', 'trans_result': TASK_DATA[task_id]['trans_request']}) - if TASK_STATES[task_id]['info'].startswith('error'): - break - if TASK_STATES[task_id]['finished']: - # no texts detected - return web.json_response({'task_id' : task_id, 'status': 'successful'}) - return web.json_response({'task_id' : task_id, 'status': 'error'}) - -app.add_routes(routes) - - -def generate_nonce(): - return secrets.token_hex(16) - -def start_translator_client_proc(host: str, port: int, nonce: str, params: dict): - os.environ['MT_WEB_NONCE'] = nonce - cmds = [ - sys.executable, - '-m', 'manga_translator', - '--mode', 'web_client', - '--host', host, - '--port', str(port), - ] - if params.get('use_gpu', False): - cmds.append('--use-gpu') - if params.get('use_gpu_limited', False): - cmds.append('--use-gpu-limited') - if params.get('ignore_errors', False): - cmds.append('--ignore-errors') - if params.get('verbose', False): - cmds.append('--verbose') - - proc = subprocess.Popen(cmds, cwd=BASE_PATH) - return proc - -async def start_async_app(host: str, port: int, nonce: str, translation_params: dict = None): - global NONCE, DEFAULT_TRANSLATION_PARAMS, FORMAT - # Secret to secure communication between webserver and translator clients - NONCE = nonce - DEFAULT_TRANSLATION_PARAMS = translation_params or {} - FORMAT = DEFAULT_TRANSLATION_PARAMS.get('format') or 'jpg' - DEFAULT_TRANSLATION_PARAMS['format'] = FORMAT - - # Schedule web server to run - runner = web.AppRunner(app) - await runner.setup() - site = web.TCPSite(runner, host, port) - await site.start() - print(f'Serving up app on http://{host}:{port}') - - return runner, site - -async def dispatch(host: str, port: int, nonce: str = None, translation_params: dict = None): - global ONGOING_TASKS, FINISHED_TASKS - - if nonce is None: - nonce = os.getenv('MT_WEB_NONCE', generate_nonce()) - - # Start web service - runner, site = await start_async_app(host, port, nonce, translation_params) - - # Create client process that will execute translation tasks - print() - client_process = start_translator_client_proc(host, port, nonce, translation_params) - - # Get all prior finished tasks - os.makedirs('result/', exist_ok=True) - for f in os.listdir('result/'): - if os.path.isdir(f'result/{f}') and re.search(r'^\w+-\d+-\w+-\w+-\w+-\w+$', f): - FINISHED_TASKS.append(f) - FINISHED_TASKS = list(sorted(FINISHED_TASKS, key=lambda task_id: os.path.getmtime(f'result/{task_id}'))) - - try: - while True: - await asyncio.sleep(1) - - # Restart client if OOM or similar errors occurred - if client_process.poll() is not None: - # if client_process.poll() == 0: - # break - print('Restarting translator process') - if len(ONGOING_TASKS) > 0: - tid = ONGOING_TASKS.pop(0) - state = TASK_STATES[tid] - state['info'] = 'error' - state['finished'] = True - client_process = start_translator_client_proc(host, port, nonce, translation_params) - - # Filter queued and finished tasks - now = time.time() - to_del_task_ids = set() - for tid, s in TASK_STATES.items(): - d = TASK_DATA[tid] - # Remove finished tasks after 30 minutes - if s['finished'] and now - d['created_at'] > FINISHED_TASK_REMOVE_TIMEOUT: - to_del_task_ids.add(tid) - - # Remove queued tasks without web client - elif WEB_CLIENT_TIMEOUT >= 0: - if tid not in ONGOING_TASKS and not s['finished'] and now - d['requested_at'] > WEB_CLIENT_TIMEOUT: - print('REMOVING TASK', tid) - to_del_task_ids.add(tid) - try: - QUEUE.remove(tid) - except Exception: - pass - - for tid in to_del_task_ids: - del TASK_STATES[tid] - del TASK_DATA[tid] - - # Delete oldest folder if disk space is becoming sparse - if DISK_SPACE_LIMIT >= 0 and len(FINISHED_TASKS) > 0 and shutil.disk_usage('result/')[2] < DISK_SPACE_LIMIT: - tid = FINISHED_TASKS.pop(0) - try: - p = f'result/{tid}' - print(f'REMOVING OLD TASK RESULT: {p}') - shutil.rmtree(p) - except FileNotFoundError: - pass - except: - if client_process.poll() is None: - # client_process.terminate() - client_process.kill() - await runner.cleanup() - raise - -if __name__ == '__main__': - from ..args import parser - - args = parser.parse_args() - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - try: - runner, site = loop.run_until_complete(dispatch(args.host, args.port, translation_params=vars(args))) - except KeyboardInterrupt: - pass diff --git a/manga_translator/server/ws.proto b/manga_translator/server/ws.proto deleted file mode 100644 index 61367b988..000000000 --- a/manga_translator/server/ws.proto +++ /dev/null @@ -1,36 +0,0 @@ -syntax = "proto3"; - -package ws; - -message NewTask { - string id = 1; - - string source_image = 2; - - string target_language = 3; - string detector = 4; - string direction = 5; - string translator = 6; - string size = 7; - - string translation_mask = 8; -} - -message Status { - string id = 1; - string status = 2; -} - -message FinishTask { - string id = 1; - bool success = 2; - bool has_translation_mask = 3; -} - -message WebSocketMessage { - oneof message { - NewTask new_task = 1; - Status status = 2; - FinishTask finish_task = 3; - } -} diff --git a/manga_translator/server/ws_pb2.py b/manga_translator/server/ws_pb2.py deleted file mode 100644 index 7fcfddfd8..000000000 --- a/manga_translator/server/ws_pb2.py +++ /dev/null @@ -1,32 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: ws.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x08ws.proto\x12\x02ws\"\xe3\x01\n\x07NewTask\x12\n\n\x02id\x18\x01 \x01(\t\x12\x14\n\x0csource_image\x18\x02 \x01(\t\x12\x1b\n\x13source_image_bearer\x18\t \x01(\t\x12\x17\n\x0ftarget_language\x18\x03 \x01(\t\x12\x10\n\x08\x64\x65tector\x18\x04 \x01(\t\x12\x11\n\tdirection\x18\x05 \x01(\t\x12\x12\n\ntranslator\x18\x06 \x01(\t\x12\x0c\n\x04size\x18\x07 \x01(\t\x12\x18\n\x10translation_mask\x18\x08 \x01(\t\x12\x1f\n\x17translation_mask_bearer\x18\n \x01(\t\"$\n\x06Status\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\t\"G\n\nFinishTask\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\x12\x1c\n\x14has_translation_mask\x18\x03 \x01(\x08\"\x83\x01\n\x10WebSocketMessage\x12\x1f\n\x08new_task\x18\x01 \x01(\x0b\x32\x0b.ws.NewTaskH\x00\x12\x1c\n\x06status\x18\x02 \x01(\x0b\x32\n.ws.StatusH\x00\x12%\n\x0b\x66inish_task\x18\x03 \x01(\x0b\x32\x0e.ws.FinishTaskH\x00\x42\t\n\x07messageb\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'ws_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - _globals['_NEWTASK']._serialized_start=17 - _globals['_NEWTASK']._serialized_end=244 - _globals['_STATUS']._serialized_start=246 - _globals['_STATUS']._serialized_end=282 - _globals['_FINISHTASK']._serialized_start=284 - _globals['_FINISHTASK']._serialized_end=355 - _globals['_WEBSOCKETMESSAGE']._serialized_start=358 - _globals['_WEBSOCKETMESSAGE']._serialized_end=489 -# @@protoc_insertion_point(module_scope) From 75f07f4a8a2d44f45816294f9902025374b443fc Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:55:54 +0100 Subject: [PATCH 35/75] changed queue a bit --- server/instance.py | 8 ++++---- server/myqueue.py | 39 +++++++++++++++++++-------------------- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/server/instance.py b/server/instance.py index e844289dd..90d6c4a1a 100644 --- a/server/instance.py +++ b/server/instance.py @@ -1,6 +1,5 @@ -import pickle from asyncio import Event, Lock -from typing import List, Optional +from typing import List from PIL import Image from pydantic import BaseModel @@ -49,10 +48,11 @@ async def find_executor(self) -> ExecutorInstance: instance.busy = True return instance - def free_executor(self, instance: ExecutorInstance): + async def free_executor(self, instance: ExecutorInstance): + from server.myqueue import task_queue instance.free_executor() self.event.set() self.event.clear() - + await task_queue.update_event() executor_instances: Executors = Executors() diff --git a/server/myqueue.py b/server/myqueue.py index b9174d936..267a97302 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -1,9 +1,8 @@ import asyncio -from typing import List, Dict +from typing import List, Dict, Optional from fastapi import HTTPException from starlette.requests import Request -from starlette.responses import StreamingResponse from manga_translator import Context from server.instance import executor_instances @@ -17,16 +16,19 @@ def __init__(self): def add_task(self, task): self.queue.append(task) - def get_pos(self, task): - return self.queue.index(task) - - def update_event(self): + def get_pos(self, task) -> Optional[int]: + try: + return self.queue.index(task) + except ValueError: + return None + async def update_event(self): + self.queue = [ctx for ctx in self.queue if not await is_client_disconnected(ctx.req)] self.queue_event.set() self.queue_event.clear() - def remove(self, task): + async def remove(self, task): self.queue.remove(task) - self.update_event() + await self.update_event() async def wait_for_event(self): await self.queue_event.wait() @@ -42,18 +44,23 @@ async def wait_in_queue(task, notify: NotifyType): """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" while True: queue_pos = task_queue.get_pos(task) + if not queue_pos: + if notify: + return + else: + raise HTTPException(500, detail="User is no longer connected") # just for the logs if notify: notify(3, str(queue_pos).encode('utf-8')) if queue_pos < executor_instances.free_executors(): if await is_client_disconnected(task.req): - task_queue.remove(task) - task_queue.update_event() + await task_queue.update_event() if notify: return else: raise HTTPException(500, detail="User is no longer connected") #just for the logs + instance = await executor_instances.find_executor() - task_queue.remove(task) + await task_queue.remove(task) if notify: notify(4, b"") if notify: @@ -61,19 +68,11 @@ async def wait_in_queue(task, notify: NotifyType): else: result = await instance.sent(task.image, task.config) - executor_instances.free_executor(instance) - task_queue.update_event() + await executor_instances.free_executor(instance) if notify: return else: return result else: - if await is_client_disconnected(task.req): - task_queue.remove(task) - task_queue.update_event() - if notify: - return - else: - raise HTTPException(500, detail="User is no longer connected") #just for the logs await task_queue.wait_for_event() \ No newline at end of file From bba5fc64a1b895080b9cdd39cfd765b9fb27b85f Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Thu, 21 Nov 2024 18:17:49 +0100 Subject: [PATCH 36/75] custom queue element with deconstructor --- server/main.py | 28 ++++++----------- server/myqueue.py | 59 ++++++++++++++++++++++++++---------- server/request_extraction.py | 24 ++++++--------- 3 files changed, 62 insertions(+), 49 deletions(-) diff --git a/server/main.py b/server/main.py index 773908f41..5797a610e 100644 --- a/server/main.py +++ b/server/main.py @@ -1,24 +1,22 @@ import io import os import secrets +import shutil import signal import subprocess import sys from argparse import Namespace -from builtins import bytes -from typing import Union -from fastapi import FastAPI, Request, HTTPException, Header, Form, UploadFile +from fastapi import FastAPI, Request, HTTPException, Header, UploadFile from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse from pathlib import Path from pydantic import BaseModel -from manga_translator import Config from server.instance import ExecutorInstance, executor_instances from server.myqueue import task_queue -from server.request_extraction import get_ctx, while_streaming, TranslateRequest +from server.request_extraction import get_ctx, while_streaming from server.to_json import to_json, Translation app = FastAPI() @@ -54,22 +52,11 @@ def transform_to_image(ctx): def transform_to_json(ctx): return str(to_json(ctx)).encode("utf-8") -async def parse_request( - req: Request, - image: Union[str, bytes] = Form(...), - config: str = Form(...), -): - if req.headers.get('content-type').startswith('multipart'): - config = json.loads(config) - return TranslateRequest(image=image, config=Config(**config)) - else: - return None @app.post("/translate/json", response_model=list[Translation], response_description="json strucure inspired by the ichigo translator extension") async def json(req: Request): ctx = await get_ctx(req) - json = to_json(ctx) - return JSONResponse(content=json) + return JSONResponse(content=to_json(ctx)) @app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks") async def bytes(req: Request): @@ -154,13 +141,16 @@ def prepare(args): nonce = args.nonce if args.start_instance: return start_translator_client_proc(args.host, args.port + 1, nonce, args) + folder_name= "upload-cache" + if os.path.exists(folder_name): + shutil.rmtree(folder_name) + os.makedirs(folder_name) #todo: restart if crash #todo: cache results #todo: cleanup cache -#todo: store images while in queue + #todo: add docs -#todo: index doesnt work properly in the list(is_client_disconnected is not executed immediatly/does not update the index) #todo: enable config in html pages if __name__ == '__main__': diff --git a/server/myqueue.py b/server/myqueue.py index 267a97302..77f681d60 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -1,32 +1,64 @@ import asyncio -from typing import List, Dict, Optional +import os +from typing import List, Optional +from PIL import Image from fastapi import HTTPException -from starlette.requests import Request +from fastapi.requests import Request -from manga_translator import Context +from manga_translator import Config from server.instance import executor_instances from server.sent_data_internal import NotifyType +class QueueElement: + req: Request + image: Image.Image | str + config: Config + + def __init__(self, req: Request, image: Image.Image, config: Config, length): + self.req = req + if length > 10: + #todo: store image in "upload-cache" folder + self.image = image + else: + self.image = image + self.config = config + + def get_image(self)-> Image: + if isinstance(self.image, str): + return Image.open(self.image) + else: + return self.image + + def __del__(self): + if isinstance(self.image, str): + os.remove(self.image) + + async def is_client_disconnected(self) -> bool: + if await self.req.is_disconnected(): + return True + return False + + class TaskQueue: def __init__(self): - self.queue: List[Context] = [] + self.queue: List[QueueElement] = [] self.queue_event: asyncio.Event = asyncio.Event() - def add_task(self, task): + def add_task(self, task: QueueElement): self.queue.append(task) - def get_pos(self, task) -> Optional[int]: + def get_pos(self, task: QueueElement) -> Optional[int]: try: return self.queue.index(task) except ValueError: return None async def update_event(self): - self.queue = [ctx for ctx in self.queue if not await is_client_disconnected(ctx.req)] + self.queue = [task for task in self.queue if not await task.is_client_disconnected()] self.queue_event.set() self.queue_event.clear() - async def remove(self, task): + async def remove(self, task: QueueElement): self.queue.remove(task) await self.update_event() @@ -35,16 +67,11 @@ async def wait_for_event(self): task_queue = TaskQueue() -async def is_client_disconnected(request: Request) -> bool: - if await request.is_disconnected(): - return True - return False - -async def wait_in_queue(task, notify: NotifyType): +async def wait_in_queue(task: QueueElement, notify: NotifyType): """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" while True: queue_pos = task_queue.get_pos(task) - if not queue_pos: + if queue_pos is None: if notify: return else: @@ -52,7 +79,7 @@ async def wait_in_queue(task, notify: NotifyType): if notify: notify(3, str(queue_pos).encode('utf-8')) if queue_pos < executor_instances.free_executors(): - if await is_client_disconnected(task.req): + if await task.is_client_disconnected(): await task_queue.update_event() if notify: return diff --git a/server/request_extraction.py b/server/request_extraction.py index 080d84cc4..decb64bff 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -12,7 +12,7 @@ from starlette.responses import StreamingResponse from manga_translator import Config, Context -from server.myqueue import task_queue, wait_in_queue +from server.myqueue import task_queue, wait_in_queue, QueueElement from server.streaming import notify, stream class TranslateRequest(BaseModel): @@ -61,29 +61,25 @@ async def multi_content_type(request: Request): async def get_ctx(req: Request): data, img = await multi_content_type(req) - ctx = Context() - ctx.image = await to_pil_image(img) - ctx.config = data - ctx.req = req - task_queue.add_task(ctx) + image = await to_pil_image(img) + task = QueueElement(req, image, data, 0) - return await wait_in_queue(ctx, None) + task_queue.add_task(task) + + return await wait_in_queue(task, None) async def while_streaming(req: Request, transform): data, img = await multi_content_type(req) - ctx = Context() - ctx.image = await to_pil_image(img) - ctx.config = data - ctx.req = req - task_queue.add_task(ctx) + image = await to_pil_image(img) + task = QueueElement(req, image, data, 0) + task_queue.add_task(task) messages = asyncio.Queue() def notify_internal(code: int, data: bytes) -> None: notify(code, data, transform, messages) - streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait_in_queue(ctx, notify_internal)) + asyncio.create_task(wait_in_queue(task, notify_internal)) return streaming_response \ No newline at end of file From 259b807266406edf65139b886ecd93f174f2222d Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Fri, 22 Nov 2024 12:14:05 +0100 Subject: [PATCH 37/75] byte response with cpp&rs examples --- manga_translator/utils/textblock.py | 2 +- server/main.py | 13 ++-- server/response.cpp | 116 +++++++++++++++++++++++++++ server/response.rs | 101 ++++++++++++++++++++++++ server/to_json.py | 117 +++++++++++++++++----------- 5 files changed, 297 insertions(+), 52 deletions(-) create mode 100644 server/response.cpp create mode 100644 server/response.rs diff --git a/manga_translator/utils/textblock.py b/manga_translator/utils/textblock.py index de3c8a4cd..358ce4da5 100644 --- a/manga_translator/utils/textblock.py +++ b/manga_translator/utils/textblock.py @@ -46,7 +46,7 @@ def __init__(self, lines: List[Tuple[int, int, int, int]], texts: List[str] = None, language: str = 'unknown', font_size: float = -1, - angle: int = 0, + angle: float = 0, translation: str = "", fg_color: Tuple[float] = (0, 0, 0), bg_color: Tuple[float] = (0, 0, 0), diff --git a/server/main.py b/server/main.py index 5797a610e..44f7b77a3 100644 --- a/server/main.py +++ b/server/main.py @@ -17,7 +17,7 @@ from server.instance import ExecutorInstance, executor_instances from server.myqueue import task_queue from server.request_extraction import get_ctx, while_streaming -from server.to_json import to_json, Translation +from server.to_json import to_translation, TranslationResponse app = FastAPI() nonce = None @@ -50,17 +50,20 @@ def transform_to_image(ctx): return img_byte_arr.getvalue() def transform_to_json(ctx): - return str(to_json(ctx)).encode("utf-8") + return to_translation(ctx).model_dump_json().encode("utf-8") +def transform_to_bytes(ctx): + return to_translation(ctx).to_bytes() -@app.post("/translate/json", response_model=list[Translation], response_description="json strucure inspired by the ichigo translator extension") +@app.post("/translate/json", response_model=TranslationResponse, response_description="json strucure inspired by the ichigo translator extension") async def json(req: Request): ctx = await get_ctx(req) - return JSONResponse(content=to_json(ctx)) + return to_translation(ctx) @app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks") async def bytes(req: Request): ctx = await get_ctx(req) + return StreamingResponse(content=to_translation(ctx).to_bytes()) @app.post("/translate/image", response_description="the result image", response_class=StreamingResponse) async def image(req: Request) -> StreamingResponse: @@ -77,7 +80,7 @@ async def stream_json(req: Request) -> StreamingResponse: @app.post("/translate/bytes/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") async def stream_bytes(req: Request)-> StreamingResponse: - return await while_streaming(req, transform_to_image) + return await while_streaming(req, transform_to_bytes) @app.post("/translate/image/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") async def stream_image(req: Request) -> StreamingResponse: diff --git a/server/response.cpp b/server/response.cpp new file mode 100644 index 000000000..3595856a3 --- /dev/null +++ b/server/response.cpp @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include +#include +#include + +struct Color { + uint8_t fg[3]; + uint8_t bg[3]; +}; + +struct Translation { + uint32_t min_x; + uint32_t min_y; + uint32_t max_x; + uint32_t max_y; + bool is_bulleted_list; + uint32_t angle; + float prob; + Color text_color; + std::unordered_map text; + std::vector background; +}; + +struct TranslationResponse { + std::vector translations; +}; + +uint32_t read_u32(const std::vector& bytes, size_t& offset) { + uint32_t value; + std::memcpy(&value, &bytes[offset], sizeof(value)); + value = value; + offset += 4; + return value; +} + +float read_f32(const std::vector& bytes, size_t& offset) { + float value; + std::memcpy(&value, &bytes[offset], sizeof(value)); + value = *reinterpret_cast(&value); + offset += 4; + return value; +} + +uint8_t read_u8(const std::vector& bytes, size_t& offset) { + uint8_t value = bytes[offset]; + offset += 1; + return value; +} + +bool read_bool(const std::vector& bytes, size_t& offset) { + bool value = bytes[offset] != 0; + offset += 1; + return value; +} + +std::vector read_chunk(const std::vector& bytes, size_t& offset) { + uint32_t size = read_u32(bytes, offset); + std::vector value(bytes.begin() + offset, bytes.begin() + offset + size); + offset += size; + return value; +} + +std::string read_str(const std::vector& bytes, size_t& offset) { + std::vector chunk = read_chunk(bytes, offset); + std::string result(chunk.begin(), chunk.end()); + return result; +} + +std::unordered_map read_map(const std::vector& bytes, size_t& offset) { + uint32_t count = read_u32(bytes, offset); + std::unordered_map map; + for (uint32_t i = 0; i < count; ++i) { + std::string key = read_str(bytes, offset); + std::string value = read_str(bytes, offset); + map[key] = value; + } + return map; +} + +Translation from_bytes(const std::vector& bytes, size_t& offset) { + Translation translation; + translation.min_x = read_u32(bytes, offset); + translation.min_y = read_u32(bytes, offset); + translation.max_x = read_u32(bytes, offset); + translation.max_y = read_u32(bytes, offset); + translation.is_bulleted_list = read_bool(bytes, offset); + translation.angle = read_u32(bytes, offset); + translation.prob = read_f32(bytes, offset); + for (int i = 0; i < 3; ++i) { + translation.text_color.fg[i] = read_u8(bytes, offset); + translation.text_color.bg[i] = read_u8(bytes, offset); + } + translation.text = read_map(bytes, offset); + translation.background = read_chunk(bytes, offset); + return translation; +} + +TranslationResponse from_bytes_response(const std::vector& bytes) { + size_t offset = 0; + uint32_t count = read_u32(bytes, offset); + TranslationResponse response; + + for (uint32_t i = 0; i < count; ++i) { + response.translations.push_back(from_bytes(bytes, offset)); + } + return response; +} + +int main() { + std::vector bytes = {/* byte data here */}; + TranslationResponse data = from_bytes_response(bytes); + return 0; +} diff --git a/server/response.rs b/server/response.rs new file mode 100644 index 000000000..81fa07810 --- /dev/null +++ b/server/response.rs @@ -0,0 +1,101 @@ +#[derive(Debug)] +struct Color { + fg: [u8; 3], + bg: [u8; 3], +} + +#[derive(Debug)] +struct Translation { + min_x: u32, + min_y: u32, + max_x: u32, + max_y: u32, + is_bulleted_list: bool, + angle: u32, + prob: f32, + text_color: Color, + text: HashMap, + background: Vec, +} + +#[derive(Debug)] +struct TranslationResponse { + translations: Vec, +} + +impl TranslationResponse { + fn from_bytes(bytes: &[u8]) -> Self { + let mut offset = 0; + let v = (0..read_u32(bytes, &mut offset)) + .map(|_| Translation::from_bytes(bytes, &mut offset)) + .collect::>(); + Self { translations: v } + } +} + +fn read_u32(bytes: &[u8], offset: &mut usize) -> u32 { + let value = u32::from_le_bytes(bytes[*offset..*offset + 4].try_into().unwrap()); + *offset += 4; + value +} + +impl Translation { + fn from_bytes(bytes: &[u8], offset: &mut usize) -> Self { + let read_f32 = |bytes: &[u8], offset: &mut usize| -> f32 { + let value = f32::from_le_bytes(bytes[*offset..*offset + 4].try_into().unwrap()); + *offset += 4; + value + }; + + let read_u8 = |bytes: &[u8], offset: &mut usize| -> u8 { + let value = bytes[*offset]; + *offset += 1; + value + }; + + let read_bool = |bytes: &[u8], offset: &mut usize| -> bool { + let value = bytes[*offset] != 0; + *offset += 1; + value + }; + + let read_chunk = |bytes: &[u8], offset: &mut usize| -> Vec { + let size = read_u32(bytes, offset); + let value = &bytes[*offset..*offset + size as usize]; + *offset += size as usize; + value.to_vec() + }; + let read_str = |bytes: &[u8], offset: &mut usize| -> String { + String::from_utf8(read_chunk(bytes, offset)).expect("Invalid UTF-8") + }; + let read_map = |bytes: &[u8], offset: &mut usize| -> HashMap { + (0..read_u32(bytes, offset)) + .into_iter() + .map(|_| (read_str(bytes, offset), read_str(bytes, offset))) + .collect::>() + }; + Self { + min_x: read_u32(bytes, offset), + min_y: read_u32(bytes, offset), + max_x: read_u32(bytes, offset), + max_y: read_u32(bytes, offset), + is_bulleted_list: read_bool(bytes, offset), + angle: read_u32(bytes, offset), + prob: read_f32(bytes, offset), + text_color: Color { + fg: [ + read_u8(bytes, offset), + read_u8(bytes, offset), + read_u8(bytes, offset), + ], + bg: [ + read_u8(bytes, offset), + read_u8(bytes, offset), + read_u8(bytes, offset), + ], + }, + text: read_map(bytes, offset), + background: read_chunk(bytes, offset), + } + } +} diff --git a/server/to_json.py b/server/to_json.py index 911718d8d..f6bcdc6e2 100644 --- a/server/to_json.py +++ b/server/to_json.py @@ -1,8 +1,10 @@ import base64 +import struct from typing import Dict, List import cv2 -from pydantic import BaseModel +import numpy as np +from pydantic import BaseModel, Field from manga_translator import Context from manga_translator.utils import TextBlock @@ -22,60 +24,83 @@ #img_rendered: array #mask_raw: array #mask:array -def to_json(ctx: Context): + +class TextColor(BaseModel): + fg: tuple[int, int, int] + bg: tuple[int, int, int] + +class Translation(BaseModel): + minX: int + minY: int + maxX: int + maxY: int + is_bulleted_list: bool + angle: float | int + prob: float + text_color: TextColor + text: dict[str, str] + background: np.ndarray = Field( + ..., + description="Background image encoded as a base64 string", + examples=["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..."] + ) + + class Config: + arbitrary_types_allowed = True + json_encoders = { + np.ndarray: lambda array: Translation.encode_background(array) + } + + @staticmethod + def encode_background(array: np.ndarray) -> str: + retval, buffer = cv2.imencode('.png', array) + jpg_as_text = base64.b64encode(buffer).decode("utf-8") + background = f"data:image/png;base64,{jpg_as_text}" + return background + + def to_bytes(self): + coords_bytes = struct.pack('4i', self.minX, self.minY, self.maxX, self.maxY) + is_bulleted_list_byte = struct.pack('?', self.is_bulleted_list) + angle_bytes = struct.pack('f', float(self.angle) if isinstance(self.angle, int) else self.angle) + prob_bytes = struct.pack('f', self.prob) + fg = struct.pack('3B', self.text_color.fg[0], self.text_color.fg[1], self.text_color.fg[2]) + bg = struct.pack('3B', self.text_color.bg[0], self.text_color.bg[1], self.text_color.bg[2]) + text_bytes = struct.pack('i', len(self.text.items())) + for key, value in self.text.items(): + text_bytes += struct.pack('I', len(key.encode('utf-8'))) + key.encode('utf-8') + text_bytes += struct.pack('I', len(value.encode('utf-8'))) + value.encode('utf-8') + background_bytes = struct.pack('I', len(self.background.tobytes())) + self.background.tobytes() + return coords_bytes +is_bulleted_list_byte+ angle_bytes+prob_bytes+fg + bg + text_bytes + background_bytes + +class TranslationResponse(BaseModel): + translations: List[Translation] + + def to_bytes(self): + items= [v.to_bytes() for v in self.translations] + return struct.pack('i', len(items)) + b''.join(items) + +def to_translation(ctx: Context) -> TranslationResponse: text_regions:list[TextBlock] = ctx.text_regions inpaint = ctx.img_inpainted translations:Dict[str, List[str]] = ctx.translations results = [] - if 'overlay_ext' in ctx: - #todo: unreachable - overlay_ext = ctx['overlay_ext'] - else: - overlay_ext = 'jpg' for i, blk in enumerate(text_regions): minX, minY, maxX, maxY = blk.xyxy + text_region = text_regions[i] if 'translations' in ctx: trans = {key: value[i] for key, value in translations.items()} else: trans = {} - trans["originalText"] = text_regions[i].text - if inpaint is not None: - overlay = inpaint[minY:maxY, minX:maxX] - - retval, buffer = cv2.imencode('.' + overlay_ext, overlay) - jpg_as_text = base64.b64encode(buffer) - background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8") - else: - background = None - text_region = text_regions[i] + trans[text_region.source_lang] = text_regions[i].text text_region.adjust_bg_color = False color1, color2 = text_region.get_font_colors() - - results.append({ - 'text': trans, - 'minX': int(minX), - 'minY': int(minY), - 'maxX': int(maxX), - 'maxY': int(maxY), - 'textColor': { - 'fg': color1.tolist(), - 'bg': color2.tolist() - }, - 'language': text_regions[i].source_lang, - 'background': background - }) - return results - -class TextColor(BaseModel): - fg: tuple[int, int, int] - bg: tuple[int, int, int] - -class Translation(BaseModel): - text: dict[str, str] - minX: int - minY: int - maxX: int - maxY: int - textColor:TextColor - language: str - background: str + results.append(Translation(text=trans, + minX=int(minX),minY=int(minY),maxX=int(maxX),maxY=int(maxY), + background=inpaint[minY:maxY, minX:maxX], + is_bulleted_list=text_region.is_bulleted_list, + text_color=TextColor(fg=color1.tolist(), bg=color2.tolist()), + prob=text_region.prob, + angle=text_region.angle + )) + #todo: background angle + return TranslationResponse(translations=results) From cbf902d408db1fa8d805bf0258c9b9be01fc3b7f Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Fri, 22 Nov 2024 12:15:38 +0100 Subject: [PATCH 38/75] move files into examples --- {server => examples}/response.cpp | 0 {server => examples}/response.rs | 0 server/main.py | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) rename {server => examples}/response.cpp (100%) rename {server => examples}/response.rs (100%) diff --git a/server/response.cpp b/examples/response.cpp similarity index 100% rename from server/response.cpp rename to examples/response.cpp diff --git a/server/response.rs b/examples/response.rs similarity index 100% rename from server/response.rs rename to examples/response.rs diff --git a/server/main.py b/server/main.py index 44f7b77a3..3beef79c5 100644 --- a/server/main.py +++ b/server/main.py @@ -60,7 +60,7 @@ async def json(req: Request): ctx = await get_ctx(req) return to_translation(ctx) -@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks") +@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure for decoding look at examples in 'examples/response.*'") async def bytes(req: Request): ctx = await get_ctx(req) return StreamingResponse(content=to_translation(ctx).to_bytes()) From a2f98e6b171a35e37c3ac94fe3e4adbe573617a8 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Fri, 22 Nov 2024 15:44:01 +0100 Subject: [PATCH 39/75] fix docs --- server/index.html | 2 +- server/main.py | 98 +++++++++++++++++++++++------------- server/manual.html | 2 +- server/request_extraction.py | 40 ++++----------- server/sent_data_internal.py | 1 - server/to_json.py | 10 ++-- 6 files changed, 81 insertions(+), 72 deletions(-) diff --git a/server/index.html b/server/index.html index edf617609..fd1a49a11 100644 --- a/server/index.html +++ b/server/index.html @@ -357,7 +357,7 @@

Image/Manga Translator

} const uploadWithProgress = async (formData) => { try { - const response = await fetch(`${BASE_URI}translate/image/stream`, { + const response = await fetch(`${BASE_URI}translate/with-form/image/stream`, { method: 'POST', body: formData, }); diff --git a/server/main.py b/server/main.py index 3beef79c5..5563f0ce5 100644 --- a/server/main.py +++ b/server/main.py @@ -7,16 +7,15 @@ import sys from argparse import Namespace -from fastapi import FastAPI, Request, HTTPException, Header, UploadFile +from fastapi import FastAPI, Request, HTTPException, Header, UploadFile, File, Form from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse +from fastapi.responses import StreamingResponse, HTMLResponse from pathlib import Path -from pydantic import BaseModel - +from manga_translator import Config from server.instance import ExecutorInstance, executor_instances from server.myqueue import task_queue -from server.request_extraction import get_ctx, while_streaming +from server.request_extraction import get_ctx, while_streaming, TranslateRequest from server.to_json import to_translation, TranslationResponse app = FastAPI() @@ -30,14 +29,7 @@ allow_headers=["*"], ) -class TranslateRequestForm(BaseModel): - """This request can be a multipart or a json request""" - image: UploadFile - """can be a url, base64 encoded image or a multipart image""" - config: str - """in case it is a multipart this needs to be a string(json.stringify)""" - -@app.post("/register", response_description="no response") +@app.post("/register", response_description="no response", tags=["internal-api"]) async def register_instance(instance: ExecutorInstance, req: Request, req_nonce: str = Header(alias="X-Nonce")): if req_nonce != nonce: raise HTTPException(401, detail="Invalid nonce") @@ -55,48 +47,85 @@ def transform_to_json(ctx): def transform_to_bytes(ctx): return to_translation(ctx).to_bytes() -@app.post("/translate/json", response_model=TranslationResponse, response_description="json strucure inspired by the ichigo translator extension") -async def json(req: Request): - ctx = await get_ctx(req) +@app.post("/translate/json", response_model=TranslationResponse, tags=["api", "json"],response_description="json strucure inspired by the ichigo translator extension") +async def json(req: Request, data: TranslateRequest): + ctx = await get_ctx(req, data.config, data.image) + return to_translation(ctx) + +@app.post("/translate/bytes", response_class=StreamingResponse, tags=["api", "json"],response_description="custom byte structure for decoding look at examples in 'examples/response.*'") +async def bytes(req: Request, data: TranslateRequest): + ctx = await get_ctx(req, data.config, data.image) + return StreamingResponse(content=to_translation(ctx).to_bytes()) + +@app.post("/translate/image", response_description="the result image", tags=["api", "json"],response_class=StreamingResponse) +async def image(req: Request, data: TranslateRequest) -> StreamingResponse: + ctx = await get_ctx(req, data.config, data.image) + img_byte_arr = io.BytesIO() + ctx.result.save(img_byte_arr, format="PNG") + img_byte_arr.seek(0) + + return StreamingResponse(img_byte_arr, media_type="image/png") + +@app.post("/translate/json/stream", response_class=StreamingResponse,tags=["api", "json"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_json(req: Request, data: TranslateRequest) -> StreamingResponse: + return await while_streaming(req, transform_to_json, data.config, data.image) + +@app.post("/translate/bytes/stream", response_class=StreamingResponse, tags=["api", "json"],response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_bytes(req: Request, data: TranslateRequest)-> StreamingResponse: + return await while_streaming(req, transform_to_bytes,data.config, data.image) + +@app.post("/translate/image/stream", response_class=StreamingResponse, tags=["api", "json"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_image(req: Request, data: TranslateRequest) -> StreamingResponse: + return await while_streaming(req, transform_to_image, data.config, data.image) + +@app.post("/translate/with-form/json", response_model=TranslationResponse, tags=["api", "form"],response_description="json strucure inspired by the ichigo translator extension") +async def json_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")): + img = await image.read() + ctx = await get_ctx(req, Config.parse_raw(config), img) return to_translation(ctx) -@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure for decoding look at examples in 'examples/response.*'") -async def bytes(req: Request): - ctx = await get_ctx(req) +@app.post("/translate/with-form/bytes", response_class=StreamingResponse, tags=["api", "form"],response_description="custom byte structure for decoding look at examples in 'examples/response.*'") +async def bytes_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")): + img = await image.read() + ctx = await get_ctx(req, Config.parse_raw(config), img) return StreamingResponse(content=to_translation(ctx).to_bytes()) -@app.post("/translate/image", response_description="the result image", response_class=StreamingResponse) -async def image(req: Request) -> StreamingResponse: - ctx = await get_ctx(req) +@app.post("/translate/with-form/image", response_description="the result image", tags=["api", "form"],response_class=StreamingResponse) +async def image_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")) -> StreamingResponse: + img = await image.read() + ctx = await get_ctx(req, Config.parse_raw(config), img) img_byte_arr = io.BytesIO() ctx.result.save(img_byte_arr, format="PNG") img_byte_arr.seek(0) return StreamingResponse(img_byte_arr, media_type="image/png") -@app.post("/translate/json/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") -async def stream_json(req: Request) -> StreamingResponse: - return await while_streaming(req, transform_to_json) +@app.post("/translate/with-form/json/stream", response_class=StreamingResponse, tags=["api", "form"],response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_json_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")) -> StreamingResponse: + img = await image.read() + return await while_streaming(req, transform_to_json, Config.parse_raw(config), img) -@app.post("/translate/bytes/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") -async def stream_bytes(req: Request)-> StreamingResponse: - return await while_streaming(req, transform_to_bytes) +@app.post("/translate/with-form/bytes/stream", response_class=StreamingResponse,tags=["api", "form"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_bytes_form(req: Request, image: UploadFile = File(...), config: str = Form("{}"))-> StreamingResponse: + img = await image.read() + return await while_streaming(req, transform_to_bytes, Config.parse_raw(config), img) -@app.post("/translate/image/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") -async def stream_image(req: Request) -> StreamingResponse: - return await while_streaming(req, transform_to_image) +@app.post("/translate/with-form/image/stream", response_class=StreamingResponse, tags=["api", "form"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_image_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")) -> StreamingResponse: + img = await image.read() + return await while_streaming(req, transform_to_image, Config.parse_raw(config), img) -@app.post("/queue-size", response_model=int) +@app.post("/queue-size", response_model=int, tags=["api", "json"]) async def queue_size() -> int: return len(task_queue.queue) -@app.get("/", response_class=HTMLResponse) +@app.get("/", response_class=HTMLResponse,tags=["ui"]) async def index() -> HTMLResponse: html_file = Path("index.html") html_content = html_file.read_text() return HTMLResponse(content=html_content) -@app.get("/manual", response_class=HTMLResponse) +@app.get("/manual", response_class=HTMLResponse, tags=["ui"]) async def manual(): html_file = Path("manual.html") html_content = html_file.read_text() @@ -153,7 +182,6 @@ def prepare(args): #todo: cache results #todo: cleanup cache -#todo: add docs #todo: enable config in html pages if __name__ == '__main__': diff --git a/server/manual.html b/server/manual.html index 1335b0c51..2983feb64 100644 --- a/server/manual.html +++ b/server/manual.html @@ -72,7 +72,7 @@

formData.append('image', file); try { - const response = await fetch( generateImage.checked ? '/translate/image/stream' : '/translate/json/stream', { + const response = await fetch( generateImage.checked ? '/translate/with-form/image/stream' : '/translate/with-form/json/stream', { method: 'POST', body: formData, /*headers: { diff --git a/server/request_extraction.py b/server/request_extraction.py index decb64bff..54516fa39 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -9,9 +9,9 @@ from PIL import Image from fastapi import Request, HTTPException from pydantic import BaseModel -from starlette.responses import StreamingResponse +from fastapi.responses import StreamingResponse -from manga_translator import Config, Context +from manga_translator import Config from server.myqueue import task_queue, wait_in_queue, QueueElement from server.streaming import notify, stream @@ -19,7 +19,7 @@ class TranslateRequest(BaseModel): """This request can be a multipart or a json request""" image: bytes|str """can be a url, base64 encoded image or a multipart image""" - config: Config + config: Config = Config() """in case it is a multipart this needs to be a string(json.stringify)""" async def to_pil_image(image: Union[str, bytes]) -> Image.Image: @@ -41,39 +41,17 @@ async def to_pil_image(image: Union[str, bytes]) -> Image.Image: raise HTTPException(status_code=422, detail=str(e)) -async def multi_content_type(request: Request): - content_type = request.headers.get("content-type") - if content_type and content_type.startswith("multipart/form-data"): - form = await request.form() - config = form.get("config", "{}") - image = form.get("image") - image_content = await image.read() - config = Config.parse_raw(config) - return config, image_content - elif content_type and content_type.startswith("application/json"): - body = await request.json() - config = Config(**body.get("config", {})) - image = body.get("image") - return config, image - - else: - raise HTTPException(status_code=400, detail="Unsupported Content-Type") - -async def get_ctx(req: Request): - data, img = await multi_content_type(req) - - image = await to_pil_image(img) - task = QueueElement(req, image, data, 0) +async def get_ctx(req: Request, config: Config, image: str|bytes): + image = await to_pil_image(image) + task = QueueElement(req, image, config, 0) task_queue.add_task(task) return await wait_in_queue(task, None) -async def while_streaming(req: Request, transform): - data, img = await multi_content_type(req) - - image = await to_pil_image(img) - task = QueueElement(req, image, data, 0) +async def while_streaming(req: Request, transform, config: Config, image: bytes | str): + image = await to_pil_image(image) + task = QueueElement(req, image, config, 0) task_queue.add_task(task) messages = asyncio.Queue() diff --git a/server/sent_data_internal.py b/server/sent_data_internal.py index 5720cb6c5..99f8c3730 100644 --- a/server/sent_data_internal.py +++ b/server/sent_data_internal.py @@ -1,4 +1,3 @@ -import asyncio import pickle from typing import Mapping, Optional, Callable diff --git a/server/to_json.py b/server/to_json.py index f6bcdc6e2..bdf1d4cab 100644 --- a/server/to_json.py +++ b/server/to_json.py @@ -1,10 +1,10 @@ import base64 import struct -from typing import Dict, List +from typing import Dict, List, Annotated import cv2 import numpy as np -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, WithJsonSchema from manga_translator import Context from manga_translator.utils import TextBlock @@ -24,6 +24,10 @@ #img_rendered: array #mask_raw: array #mask:array +NumpyNdarray = Annotated[ + np.ndarray, + WithJsonSchema({'type': 'string', "format": "base64","examples": ["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..."]}), +] class TextColor(BaseModel): fg: tuple[int, int, int] @@ -39,7 +43,7 @@ class Translation(BaseModel): prob: float text_color: TextColor text: dict[str, str] - background: np.ndarray = Field( + background: NumpyNdarray = Field( ..., description="Background image encoded as a base64 string", examples=["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..."] From a0525ddfcf70d6460906ccfe52029fd0d8d1c372 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Fri, 22 Nov 2024 15:50:44 +0100 Subject: [PATCH 40/75] convert to normal enum --- manga_translator/config.py | 144 ++++++++++++++++++------------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/manga_translator/config.py b/manga_translator/config.py index 4c34c5d36..adfe367bd 100644 --- a/manga_translator/config.py +++ b/manga_translator/config.py @@ -1,6 +1,6 @@ import argparse import re -from enum import IntEnum +from enum import Enum from typing import Optional @@ -56,88 +56,88 @@ def hex2rgb(h): h = h.lstrip('#') return tuple(int(h[i:i+2], 16) for i in (0, 2, 4)) -class Renderer(IntEnum): - default = 0 - manga2Eng = 1 - none = 2 +class Renderer(str, Enum): + default = "default" + manga2Eng = "manga2eng" + none = "none" -class Alignment(IntEnum): - auto = 0 - left = 1 - center = 2 - right = 3 +class Alignment(str, Enum): + auto = "auto" + left = "left" + center = "center" + right = "right" -class Direction(IntEnum): - auto = 0 - h = 1 - v = 2 +class Direction(str, Enum): + auto = "auto" + h = "horizontal" + v = "vertical" -class InpaintPrecision(IntEnum): - fp32 = 0 - fp16 = 1 - bf16 = 2 +class InpaintPrecision(str, Enum): + fp32 = "fp32" + fp16 = "fp16" + bf16 = "bf16" def __str__(self): return self.name -class Detector(IntEnum): - default = 0 - dbconvnext = 1 - ctd = 2 - craft = 3 - none = 4 - -class Inpainter(IntEnum): - default = 0 - lama_large = 1 - lama_mpe = 2 - sd = 3 - none = 4 - original = 5 - -class Colorizer(IntEnum): - none = 0 - mc2 = 1 - -class Ocr(IntEnum): - ocr32px = 0 - ocr48px = 1 - ocr48px_ctc = 2 - mocr = 3 - -class Translator(IntEnum): - youdao = 0 - baidu = 1 - deepl = 2 - papago = 3 - caiyun = 4 - gpt3 = 5 - gpt3_5 = 6 - gpt4 = 7 - none = 8 - original = 9 - sakura = 10 - deepseek = 11 - groq = 12 - offline = 13 - nllb = 14 - nllb_big = 15 - sugoi = 16 - jparacrawl = 17 - jparacrawl_big = 18 - m2m100 = 19 - m2m100_big = 20 - mbart50 = 21 - qwen2 = 22 - qwen2_big = 23 +class Detector(str, Enum): + default = "default" + dbconvnext = "dbconvnext" + ctd = "ctd" + craft = "craft" + none = "none" + +class Inpainter(str, Enum): + default = "default" + lama_large = "lama_large" + lama_mpe = "lama_mpe" + sd = "sd" + none = "none" + original = "original" + +class Colorizer(str, Enum): + none = "none" + mc2 = "mc2" + +class Ocr(str, Enum): + ocr32px = "32px" + ocr48px = "48px" + ocr48px_ctc = "48px_ctc" + mocr = "mocr" + +class Translator(str, Enum): + youdao = "youdao" + baidu = "baidu" + deepl = "deepl" + papago = "papago" + caiyun = "caiyun" + gpt3 = "gpt3" + gpt3_5 = "gpt3.5" + gpt4 = "gpt4" + none = "none" + original = "original" + sakura = "sakura" + deepseek = "deepseek" + groq = "groq" + offline = "offline" + nllb = "nllb" + nllb_big = "nllb_big" + sugoi = "sugoi" + jparacrawl = "jparacrawl" + jparacrawl_big = "jparacrawl_big" + m2m100 = "m2m100" + m2m100_big = "m2m100_big" + mbart50 = "mbart50" + qwen2 = "qwen2" + qwen2_big = "qwen2_big" def __str__(self): return self.name -class Upscaler(IntEnum): - waifu2x = 0 - esrgan = 1 - upscler4xultrasharp = 2 +class Upscaler(str, Enum): + waifu2x = "waifu2x" + esrgan = "esrgan" + upscler4xultrasharp = "4xultrasharp" class RenderConfig(BaseModel): renderer: Renderer = Renderer.default From 322ea7893484747878247c8b759e0bb2694a0150 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:08:02 +0100 Subject: [PATCH 41/75] configs --- server/index.html | 43 +++++++++++++++++++++++++++---------------- server/main.py | 2 -- server/manual.html | 9 +++++++++ 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/server/index.html b/server/index.html index fd1a49a11..177885d79 100644 --- a/server/index.html +++ b/server/index.html @@ -33,10 +33,10 @@

Image/Manga Translator

@@ -47,7 +47,7 @@

Image/Manga Translator

Image/Manga Translator @@ -199,13 +199,13 @@

Image/Manga Translator

get fileUri() { return this.file ? URL.createObjectURL(this.file) : null }, - detectionResolution: 'M', - textDetector: 'auto', + detectionResolution: '1536', + textDetector: 'default', renderTextDirection: 'auto', translator: 'youdao', - validTranslators: ['youdao', 'baidu', 'google', 'deepl', 'papago', 'caiyun', 'offline', 'gpt3.5', 'none'], + validTranslators: ['youdao', 'baidu', 'deepl', 'papago', 'caiyun', 'offline', 'gpt3.5', 'none'], getTranslatorName(key) { - if (key == 'none') + if (key === 'none') return "No Text" return key ? key[0].toUpperCase() + key.slice(1) : ""; }, @@ -307,11 +307,22 @@

Image/Manga Translator

const formData = new FormData() formData.append('image', this.file) - /*formData.append('size', this.detectionResolution) - formData.append('detector', this.textDetector) - formData.append('direction', this.renderTextDirection) - formData.append('translator', this.translator) - formData.append('target_lang', this.targetLanguage)*/ + const config = `{ + "detector": { + "detector": "${this.textDetector}", + "detection_size": ${this.detectionResolution} + }, + "render": { + "direction": "${this.renderTextDirection}" + }, + "translator": { + "translator": "${this.translator}", + "target_lang": "${this.targetLanguage}" + } + }`; + + formData.append('config', config) + const processChunk = (value) => { if (this.error) return; diff --git a/server/main.py b/server/main.py index 5563f0ce5..20d3d5833 100644 --- a/server/main.py +++ b/server/main.py @@ -182,8 +182,6 @@ def prepare(args): #todo: cache results #todo: cleanup cache -#todo: enable config in html pages - if __name__ == '__main__': import uvicorn from args import parse_arguments diff --git a/server/manual.html b/server/manual.html index 2983feb64..45c801070 100644 --- a/server/manual.html +++ b/server/manual.html @@ -15,6 +15,8 @@

Upload Image and Translate

+
+

@@ -55,10 +57,12 @@

let submitButton = document.getElementById("submit-button") const fileInput = document.getElementById('fileInput'); const generateImage = document.getElementById('generate-image'); + const configField = document.getElementById('config-json'); submitButton.classList.add("hidden"); fileInput.classList.add("hidden"); generateImage.classList.add("hidden"); + configField.classList.add("hidden"); statusField.innerHTML = ''; errorField.innerHTML = ''; const file = fileInput.files[0]; @@ -70,6 +74,10 @@

const formData = new FormData(); formData.append('image', file); + const text = configField.innerText; + if (text.length > 2) { + formData.append('config', text) + } try { const response = await fetch( generateImage.checked ? '/translate/with-form/image/stream' : '/translate/with-form/json/stream', { @@ -94,6 +102,7 @@

submitButton.classList.remove("hidden"); fileInput.classList.remove("hidden"); generateImage.classList.remove("hidden"); + configField.classList.remove("hidden"); } } From c9d8cbe7e46af15d7f5f7e3526a54f9f6e1dff13 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 23 Nov 2024 13:01:42 +0100 Subject: [PATCH 42/75] fix ctx -> config after merge --- manga_translator/manga_translator.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 098b68563..7fa85c7b2 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -421,35 +421,32 @@ async def _run_text_translation(self, config: Config, ctx: Context): # Target language detection if config.translator.target_lang in ['CHS', 'CHT']: # Chinese has_target_lang = bool(re.search('[\u4e00-\u9fff]', region.text)) - elif config.translator.target_lang == 'JPN': # Japanese - if ctx.target_lang in ['CHS', 'CHT']: # Chinese - has_target_lang = bool(re.search('[\u4e00-\u9fff]', region.text)) has_target_lang_in_translation = bool(re.search('[\u4e00-\u9fff]', region.translation)) - elif ctx.target_lang == 'JPN': # Japanese + elif config.translator.target_lang == 'JPN': # Japanese has_target_lang = bool(re.search('[\u3040-\u309f\u30a0-\u30ff\u4e00-\u9fff]', region.text)) elif config.translator.target_lang == 'KOR': # Korean has_target_lang_in_translation = bool(re.search('[\u3040-\u309f\u30a0-\u30ff\u4e00-\u9fff]', region.translation)) - elif ctx.target_lang == 'KOR': # Korean + elif config.translator.target_lang == 'KOR': # Korean has_target_lang = bool(re.search('[\uac00-\ud7af\u1100-\u11ff]', region.text)) elif config.translator.target_lang == 'ARA': # Arabic has_target_lang_in_translation = bool(re.search('[\uac00-\ud7af\u1100-\u11ff]', region.translation)) - elif ctx.target_lang == 'ARA': # Arabic + elif config.translator.target_lang == 'ARA': # Arabic has_target_lang = bool(re.search('[\u0600-\u06ff]', region.text)) elif config.translator.target_lang == 'THA': # Thai has_target_lang_in_translation = bool(re.search('[\u0600-\u06ff]', region.translation)) - elif ctx.target_lang == 'THA': # Thai + elif config.translator.target_lang == 'THA': # Thai has_target_lang = bool(re.search('[\u0e00-\u0e7f]', region.text)) elif config.translator.target_lang == 'RUS': # Russian has_target_lang_in_translation = bool(re.search('[\u0e00-\u0e7f]', region.translation)) - elif ctx.target_lang == 'RUS': # Russian + elif config.translator.target_lang == 'RUS': # Russian has_target_lang = bool(re.search('[\u0400-\u04ff]', region.text)) elif config.translator.target_lang == 'UKR': # Ukrainian has_target_lang_in_translation = bool(re.search('[\u0400-\u04ff]', region.translation)) - elif ctx.target_lang == 'UKR': # Ukrainian + elif config.translator.target_lang == 'UKR': # Ukrainian has_target_lang = bool(re.search('[\u0400-\u04ff]', region.text)) elif config.translator.target_lang == 'IND': # Indonesian has_target_lang_in_translation = bool(re.search('[\u0400-\u04ff]', region.translation)) - elif ctx.target_lang == 'IND': # Indonesian + elif config.translator.target_lang == 'IND': # Indonesian has_target_lang = bool(re.search('[A-Za-z]', region.text)) has_target_lang_in_translation = bool(re.search('[A-Za-z]', region.translation)) From 510db7f418e3c8e5f651269513b7aaf5ee929a92 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 18:38:34 +0100 Subject: [PATCH 43/75] server --- manga_translator/args.py | 2 +- server/args.py | 9 +++++ server/instance.py | 66 ++++++++++++++++++++++++++++++++++++ server/main.py | 73 ++++++++++++++++++++++++++++++++++++++++ server/myqueue.py | 25 ++++++++++++++ server/sent_data.py | 50 +++++++++++++++++++++++++++ 6 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 server/args.py create mode 100644 server/instance.py create mode 100644 server/main.py create mode 100644 server/myqueue.py create mode 100644 server/sent_data.py diff --git a/manga_translator/args.py b/manga_translator/args.py index 92e70ff07..ef84073d6 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -127,7 +127,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str: parser_ws.add_argument('--ws-url', default='ws://localhost:5000', type=str, help='Server URL for WebSocket mode') # API mode -parser_api = subparsers.add_parser('api', help='Run in API mode') +parser_api = subparsers.add_parser('shared', help='Run in API mode') parser_api.add_argument('--host', default='127.0.0.1', type=str, help='Host for API service') parser_api.add_argument('--port', default=5003, type=int, help='Port for API service') parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication') diff --git a/server/args.py b/server/args.py new file mode 100644 index 000000000..ac3d92b33 --- /dev/null +++ b/server/args.py @@ -0,0 +1,9 @@ +import argparse + + +def parse_arguments(): + parser = argparse.ArgumentParser(description="Specify host and port for the server.") + parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)') + parser.add_argument('--port', type=int, default=8080, help='The port number (default: 8080)') + + return parser.parse_args() \ No newline at end of file diff --git a/server/instance.py b/server/instance.py new file mode 100644 index 000000000..68260c962 --- /dev/null +++ b/server/instance.py @@ -0,0 +1,66 @@ +from asyncio import Event, Lock +from typing import List, Optional + +from PIL import Image +from pydantic import BaseModel + +from manga_translator import Config +from server.sent_data import fetch_data_stream, NotifyType + + +class ExecutorInstance(BaseModel): + ip: str + port: int + busy: bool = False + + def free_executor(self): + self.busy = False + + async def sent(self, image: Image, config: Config, sender: NotifyType): + await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender) + +class Executors: + def __init__(self): + self.list: List[ExecutorInstance] = [] + self.lock: Lock = Lock() + self.event = Event() + + def register(self, instance: ExecutorInstance): + self.list.append(instance) + + def free_executors(self) -> int: + return len([item for item in self.list if not item.busy]) + + async def _find_instance(self): + while True: + instance = next((x for x in self.list if x.busy == False), None) + if instance is not None: + return instance + #todo: cricial error: warn should never happen + await self.event.wait() + + async def find_executor(self) -> ExecutorInstance: + async with self.lock: # Using async with for lock management + instance = await self._find_instance() + instance.busy = True + return instance + + def free_executor(self, instance: ExecutorInstance): + instance.free_executor() + self.event.set() + self.event.clear() + +def example_notify(a: int, b: Optional[int] = None) -> None: + print(f"Notify called with a={a} and b={b}") + +async def main(): + executor = ExecutorInstance(ip="127.0.0.1", port=5003) + + image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png") + config = Config() + + await executor.sent(image, config, example_notify) + +if __name__ == "__main__": + import asyncio + asyncio.run(main()) \ No newline at end of file diff --git a/server/main.py b/server/main.py new file mode 100644 index 000000000..8da8c2677 --- /dev/null +++ b/server/main.py @@ -0,0 +1,73 @@ +import asyncio +from typing import List, Dict, Optional, Callable + +from fastapi import FastAPI, Request, HTTPException + +from server.instance import ExecutorInstance, Executors +from server.myqueue import TaskQueue +from server.sent_data import NotifyType + +app = FastAPI() +executor_instances: Executors = Executors() +task_queue = TaskQueue() + +@app.post("/register") +async def register_instance(instance: ExecutorInstance, request: Request): + instance.ip = request.client.host + executor_instances.register(instance) + return {"code": 0} + + + +async def wait(task, notify: NotifyType): + """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" + while True: + queue_pos = task_queue.get_pos(task) + if notify: + notify(3, queue_pos) + if queue_pos < executor_instances.free_executors(): + instance = await executor_instances.find_executor() + task_queue.remove(task) + if notify: + notify(4, 0) + result = "" #todo: implement logic + instance.busy = False + instance.event.set() + + if notify: + return + else: + return result + else: + if queue_pos == 0: + raise HTTPException(500, detail="No translator registered") + await task_queue.wait_for_event() +@app.post("/json") +async def json(req: TranslateRequest): + pass + +@app.post("/bytes") +async def bytes(req: TranslateRequest): + pass + +@app.post("/image") +async def image(req: TranslateRequest): + pass + +@app.post("/stream_json") +async def image(req: TranslateRequest): + pass + +@app.post("/stream_bytes") +async def image(req: TranslateRequest): + pass + +@app.post("/stream_image") +async def image(req: TranslateRequest): + pass + +if __name__ == '__main__': + import uvicorn + from args import parse_arguments + args = parse_arguments() + uvicorn.run(app, host=args.host, port=args.port) \ No newline at end of file diff --git a/server/myqueue.py b/server/myqueue.py new file mode 100644 index 000000000..61c1abac3 --- /dev/null +++ b/server/myqueue.py @@ -0,0 +1,25 @@ +import asyncio +from typing import List, Dict + + +class TaskQueue: + def __init__(self): + self.queue: List[Dict] = [] + self.queue_event: asyncio.Event = asyncio.Event() + + def add_task(self, task): + self.queue.append(task) + + def get_pos(self, task): + return self.queue.index(task) + + def update_event(self): + self.queue_event.set() + self.queue_event.clear() + + def remove(self, task): + self.queue.remove(task) + self.update_event() + + async def wait_for_event(self): + await self.queue_event.wait() \ No newline at end of file diff --git a/server/sent_data.py b/server/sent_data.py new file mode 100644 index 000000000..619dca71b --- /dev/null +++ b/server/sent_data.py @@ -0,0 +1,50 @@ +import pickle +from typing import Mapping, Optional, Callable + +import aiohttp +from PIL.Image import Image +from fastapi import HTTPException + +from manga_translator import Config + +NotifyType = Optional[Callable[[int, Optional[int]], None]] + +async def fetch_data_stream(url, image: Image, config: Config, sender: NotifyType, headers: Mapping[str, str] = {}): + attributes = {"image": image, "config": config} + data = pickle.dumps(attributes) + + async with aiohttp.ClientSession() as session: + async with session.post(url, data=data, headers=headers) as response: + if response.status == 200: + await process_stream(response, sender) + else: + raise HTTPException(response.status, detail=response.text()) + + +async def process_stream(response, sender: NotifyType): + buffer = b'' + + async for chunk in response.content.iter_any(): + if chunk: + buffer += chunk + await handle_buffer(buffer, sender) + + +async def handle_buffer(buffer, sender: NotifyType): + while len(buffer) >= 5: + status, expected_size = extract_header(buffer) + + if len(buffer) >= 5 + expected_size: + data = buffer[5:5 + expected_size] + sender(status, data) + buffer = buffer[5 + expected_size:] + else: + break + + +def extract_header(buffer): + """Extract the status and expected size from the buffer.""" + status = int.from_bytes(buffer[0:1], byteorder='big') + expected_size = int.from_bytes(buffer[1:5], byteorder='big') + return status, expected_size + From 55c59b6c8bccc5d002e335b005e207fb21c91489 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 19:04:12 +0100 Subject: [PATCH 44/75] python sucks --- execute_test.py | 5 ++++- server/instance.py | 12 ++++++++---- server/sent_data.py | 7 +++++-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/execute_test.py b/execute_test.py index 3a1d0e8af..8705fb2b1 100644 --- a/execute_test.py +++ b/execute_test.py @@ -4,6 +4,9 @@ import requests from PIL import Image +from manga_translator import Config + + async def execute_method(method_name, attributes): url = f"http://127.0.0.1:5003/execute/{method_name}" headers = {'Content-Type': 'application/octet-stream'} @@ -39,5 +42,5 @@ async def execute_method(method_name, attributes): if __name__ == '__main__': image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png") - attributes = {"image": image, "params": {"translator": "none", "inpainter": "none"}} + attributes = {"image": image, "config": Config()} asyncio.run(execute_method("translate", attributes)) \ No newline at end of file diff --git a/server/instance.py b/server/instance.py index 68260c962..da550613c 100644 --- a/server/instance.py +++ b/server/instance.py @@ -1,3 +1,4 @@ +import pickle from asyncio import Event, Lock from typing import List, Optional @@ -16,7 +17,7 @@ class ExecutorInstance(BaseModel): def free_executor(self): self.busy = False - async def sent(self, image: Image, config: Config, sender: NotifyType): + async def sent_stream(self, image: Image, config: Config, sender: NotifyType): await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender) class Executors: @@ -50,8 +51,11 @@ def free_executor(self, instance: ExecutorInstance): self.event.set() self.event.clear() -def example_notify(a: int, b: Optional[int] = None) -> None: - print(f"Notify called with a={a} and b={b}") +def example_notify(a: int, b) -> None: + if a == 0: + print(pickle.loads(b)) + else: + print(f"Notify called with a={a} and b={b}") async def main(): executor = ExecutorInstance(ip="127.0.0.1", port=5003) @@ -59,7 +63,7 @@ async def main(): image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png") config = Config() - await executor.sent(image, config, example_notify) + await executor.sent_stream(image, config, example_notify) if __name__ == "__main__": import asyncio diff --git a/server/sent_data.py b/server/sent_data.py index 619dca71b..02053bf80 100644 --- a/server/sent_data.py +++ b/server/sent_data.py @@ -1,3 +1,4 @@ +import asyncio import pickle from typing import Mapping, Optional, Callable @@ -27,10 +28,11 @@ async def process_stream(response, sender: NotifyType): async for chunk in response.content.iter_any(): if chunk: buffer += chunk - await handle_buffer(buffer, sender) + buffer = handle_buffer(buffer, sender) -async def handle_buffer(buffer, sender: NotifyType): + +def handle_buffer(buffer, sender: NotifyType): while len(buffer) >= 5: status, expected_size = extract_header(buffer) @@ -40,6 +42,7 @@ async def handle_buffer(buffer, sender: NotifyType): buffer = buffer[5 + expected_size:] else: break + return buffer def extract_header(buffer): From 0d8d5825e8deed320ddc49daf27d4fb9fe1cc62c Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 22:02:42 +0100 Subject: [PATCH 45/75] api logic --- requirements.txt | 3 +- server/args.py | 2 +- server/instance.py | 4 ++ server/main.py | 151 ++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 143 insertions(+), 17 deletions(-) diff --git a/requirements.txt b/requirements.txt index 25c2e953b..e843bf354 100644 --- a/requirements.txt +++ b/requirements.txt @@ -51,4 +51,5 @@ accelerate bitsandbytes uvicorn fastapi -pydantic \ No newline at end of file +pydantic +python-multipart \ No newline at end of file diff --git a/server/args.py b/server/args.py index ac3d92b33..a00e24a22 100644 --- a/server/args.py +++ b/server/args.py @@ -4,6 +4,6 @@ def parse_arguments(): parser = argparse.ArgumentParser(description="Specify host and port for the server.") parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)') - parser.add_argument('--port', type=int, default=8080, help='The port number (default: 8080)') + parser.add_argument('--port', type=int, default=8000, help='The port number (default: 8080)') return parser.parse_args() \ No newline at end of file diff --git a/server/instance.py b/server/instance.py index da550613c..0b426b606 100644 --- a/server/instance.py +++ b/server/instance.py @@ -7,6 +7,7 @@ from manga_translator import Config from server.sent_data import fetch_data_stream, NotifyType +from fastapi import Response class ExecutorInstance(BaseModel): @@ -17,6 +18,9 @@ class ExecutorInstance(BaseModel): def free_executor(self): self.busy = False + async def sent(self) -> Response: + pass + async def sent_stream(self, image: Image, config: Config, sender: NotifyType): await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender) diff --git a/server/main.py b/server/main.py index 8da8c2677..3c3507312 100644 --- a/server/main.py +++ b/server/main.py @@ -1,16 +1,34 @@ import asyncio -from typing import List, Dict, Optional, Callable +import builtins +import io +import re +from base64 import b64decode +from typing import Union +import requests +from PIL import Image from fastapi import FastAPI, Request, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from starlette.responses import StreamingResponse +from manga_translator import Config from server.instance import ExecutorInstance, Executors from server.myqueue import TaskQueue from server.sent_data import NotifyType app = FastAPI() + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) executor_instances: Executors = Executors() task_queue = TaskQueue() + @app.post("/register") async def register_instance(instance: ExecutorInstance, request: Request): instance.ip = request.client.host @@ -18,6 +36,44 @@ async def register_instance(instance: ExecutorInstance, request: Request): return {"code": 0} +async def to_pil_image(image: Union[str, bytes]) -> Image.Image: + try: + if isinstance(image, builtins.bytes): + image = Image.open(io.BytesIO(image)) + return image + else: + if re.match(r'^data:image/.+;base64,', image): + value = image.split(',', 1)[1] + image_data = b64decode(value) + image = Image.open(io.BytesIO(image_data)) + return image + else: + response = requests.get(image) + image = Image.open(io.BytesIO(response.content)) + return image + except Exception as e: + raise HTTPException(status_code=422, detail=str(e)) + + +async def multi_content_type(request: Request): + content_type = request.headers.get("content-type") + + if content_type and content_type.startswith("multipart/form-data"): + form = await request.form() + config = form.get("config", "{}") + image = form.get("image") + image_content = await image.read() + config = Config.parse_raw(config) + return config, image_content + elif content_type and content_type.startswith("application/json"): + body = await request.json() + config = Config(**body.get("config", {})) + image = body.get("image") + return config, image + + else: + raise HTTPException(status_code=400, detail="Unsupported Content-Type") + async def wait(task, notify: NotifyType): """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" @@ -30,7 +86,11 @@ async def wait(task, notify: NotifyType): task_queue.remove(task) if notify: notify(4, 0) - result = "" #todo: implement logic + if notify: + await instance.sent_stream(task.image, task.config, notify) + else: + result = await instance.sent(task.image, task.config) + instance.busy = False instance.event.set() @@ -42,32 +102,93 @@ async def wait(task, notify: NotifyType): if queue_pos == 0: raise HTTPException(500, detail="No translator registered") await task_queue.wait_for_event() + + +async def stream(messages): + while True: + message = await messages.get() + yield message + if message[0] == 0 or message[0] == 2: + break + +def notify(code, data, transform_to_bytes, messages): + if code == 0: + result_bytes = transform_to_bytes(data) + encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes + messages.put_nowait(encoded_result) + else: + result_bytes = str(data).encode("utf-8") + encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes + messages.put_nowait(encoded_result) + +def transform_to_image(data): + return b"" + @app.post("/json") -async def json(req: TranslateRequest): - pass +async def json(req: Request): + data, img = await multi_content_type(req) + img = await to_pil_image(img) + data = await wait((data, img), None) @app.post("/bytes") -async def bytes(req: TranslateRequest): - pass +async def bytes(req: Request): + data, img = await multi_content_type(req) + img = await to_pil_image(img) + data = await wait((data, img), None) + @app.post("/image") -async def image(req: TranslateRequest): - pass +async def image(req: Request): + data, img = await multi_content_type(req) + img = await to_pil_image(img) + data = await wait((data, img), None) + @app.post("/stream_json") -async def image(req: TranslateRequest): - pass +async def stream_json(req: Request): + data, img = await multi_content_type(req) + img = await to_pil_image(img) + + messages = asyncio.Queue() + + def example_notify(code: int, data) -> None: + notify(code, data, transform_to_image, messages) + + streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") + asyncio.create_task(wait((data, img), example_notify)) + return streaming_response @app.post("/stream_bytes") -async def image(req: TranslateRequest): - pass +async def stream_bytes(req: Request): + data, img = await multi_content_type(req) + img = await to_pil_image(img) + + messages = asyncio.Queue() + + def example_notify(code: int, data) -> None: + notify(code, data, transform_to_image, messages) + + streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") + asyncio.create_task(wait((data, img), example_notify)) + return streaming_response @app.post("/stream_image") -async def image(req: TranslateRequest): - pass +async def stream_image(req: Request): + data, img = await multi_content_type(req) + img = await to_pil_image(img) + + messages = asyncio.Queue() + + def example_notify(code: int, data) -> None: + notify(code, data, transform_to_image, messages) + + streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") + asyncio.create_task(wait((data, img), example_notify)) + return streaming_response if __name__ == '__main__': import uvicorn from args import parse_arguments + args = parse_arguments() - uvicorn.run(app, host=args.host, port=args.port) \ No newline at end of file + uvicorn.run(app, host=args.host, port=args.port) From bc3f02e367c76dc3612313839280fce9b75929f9 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 22:52:22 +0100 Subject: [PATCH 46/75] replace api --- manga_translator/__main__.py | 4 - manga_translator/mode/api.py | 292 ----------------------------------- server/instance.py | 9 +- server/main.py | 36 ++++- server/sent_data.py | 10 ++ server/to_json.py | 66 ++++++++ 6 files changed, 108 insertions(+), 309 deletions(-) delete mode 100644 manga_translator/mode/api.py create mode 100644 server/to_json.py diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py index db711b2c8..eb348798f 100644 --- a/manga_translator/__main__.py +++ b/manga_translator/__main__.py @@ -79,10 +79,6 @@ async def dispatch(args: Namespace): translator = MangaTranslatorWS(args_dict) await translator.listen(args_dict) - elif args.mode == 'api': - from manga_translator.mode.api import MangaTranslatorAPI - translator = MangaTranslatorAPI(args_dict) - await translator.listen(args_dict) elif args.mode == 'shared': translator = MangaShare(args_dict) await translator.listen(args_dict) diff --git a/manga_translator/mode/api.py b/manga_translator/mode/api.py deleted file mode 100644 index fc71d32e3..000000000 --- a/manga_translator/mode/api.py +++ /dev/null @@ -1,292 +0,0 @@ -# Experimental. May be replaced by a refactored server/web_main.py in the future. -import asyncio -import base64 -import io - -import cv2 -import numpy as np -from PIL import Image -from aiohttp import web -from aiohttp.web_middlewares import middleware -from marshmallow import fields, Schema, ValidationError - -from manga_translator import MangaTranslator, Context, TranslationInterrupt, logger -from manga_translator.args import translator_chain -from manga_translator.detection import DETECTORS -from manga_translator.inpainting import INPAINTERS -from manga_translator.manga_translator import _preprocess_params -from manga_translator.ocr import OCRS -from manga_translator.translators import VALID_LANGUAGES, TRANSLATORS -from manga_translator.upscaling import UPSCALERS - - -class MangaTranslatorAPI(MangaTranslator): - def __init__(self, params: dict = None): - import nest_asyncio - nest_asyncio.apply() - super().__init__(params) - self.host = params.get('host', '127.0.0.1') - self.port = params.get('port', '5003') - self.log_web = params.get('log_web', False) - self.ignore_errors = params.get('ignore_errors', True) - self._task_id = None - self._params = None - self.params = params - self.queue = [] - - async def wait_queue(self, id: int): - while self.queue[0] != id: - await asyncio.sleep(0.05) - - def remove_from_queue(self, id: int): - self.queue.remove(id) - - def generate_id(self): - try: - x = max(self.queue) - except: - x = 0 - return x + 1 - - def middleware_factory(self): - @middleware - async def sample_middleware(request, handler): - id = self.generate_id() - self.queue.append(id) - try: - await self.wait_queue(id) - except Exception as e: - print(e) - try: - # todo make cancellable - response = await handler(request) - except: - response = web.json_response({'error': "Internal Server Error", 'status': 500}, - status=500) - # Handle cases where a user leaves the queue, request fails, or is completed - try: - self.remove_from_queue(id) - except Exception as e: - print(e) - return response - - return sample_middleware - - async def get_file(self, image, base64Images, url) -> Image: - if image is not None: - content = image.file.read() - elif base64Images is not None: - base64Images = base64Images - if base64Images.__contains__('base64,'): - base64Images = base64Images.split('base64,')[1] - content = base64.b64decode(base64Images) - elif url is not None: - from aiohttp import ClientSession - async with ClientSession() as session: - async with session.get(url) as resp: - if resp.status == 200: - content = await resp.read() - else: - return web.json_response({'status': 'error'}) - else: - raise ValidationError("donest exist") - img = Image.open(io.BytesIO(content)) - - img.verify() - img = Image.open(io.BytesIO(content)) - if img.width * img.height > 8000 ** 2: - raise ValidationError("to large") - return img - - async def listen(self, translation_params: dict = None): - self.params = translation_params - app = web.Application(client_max_size=1024 * 1024 * 50, middlewares=[self.middleware_factory()]) - - routes = web.RouteTableDef() - run_until_state = '' - - async def hook(state, finished): - if run_until_state and run_until_state == state and not finished: - raise TranslationInterrupt() - - self.add_progress_hook(hook) - - @routes.post("/get_text") - async def text_api(req): - nonlocal run_until_state - run_until_state = 'translating' - return await self.err_handling(self.run_translate, req, self.format_translate) - - @routes.post("/translate") - async def translate_api(req): - nonlocal run_until_state - run_until_state = 'after-translating' - return await self.err_handling(self.run_translate, req, self.format_translate) - - @routes.post("/inpaint_translate") - async def inpaint_translate_api(req): - nonlocal run_until_state - run_until_state = 'rendering' - return await self.err_handling(self.run_translate, req, self.format_translate) - - @routes.post("/colorize_translate") - async def colorize_translate_api(req): - nonlocal run_until_state - run_until_state = 'rendering' - return await self.err_handling(self.run_translate, req, self.format_translate, True) - - # #@routes.post("/file") - # async def file_api(req): - # #TODO: return file - # return await self.err_handling(self.file_exec, req, None) - - app.add_routes(routes) - web.run_app(app, host=self.host, port=self.port) - - async def run_translate(self, translation_params, img): - return await self.translate(img, translation_params) - - async def err_handling(self, func, req, format, ri=False): - try: - if req.content_type == 'application/json' or req.content_type == 'multipart/form-data': - if req.content_type == 'application/json': - d = await req.json() - else: - d = await req.post() - schema = self.PostSchema() - data = schema.load(d) - if 'translator_chain' in data: - data['translator_chain'] = translator_chain(data['translator_chain']) - if 'selective_translation' in data: - data['selective_translation'] = translator_chain(data['selective_translation']) - ctx = Context(**dict(self.params, **data)) - _preprocess_params(ctx) - if data.get('image') is None and data.get('base64Images') is None and data.get('url') is None: - return web.json_response({'error': "Missing input", 'status': 422}) - fil = await self.get_file(data.get('image'), data.get('base64Images'), data.get('url')) - if 'image' in data: - del data['image'] - if 'base64Images' in data: - del data['base64Images'] - if 'url' in data: - del data['url'] - attempts = 0 - while ctx.attempts == -1 or attempts <= ctx.attempts: - if attempts > 0: - logger.info(f'Retrying translation! Attempt {attempts}' + ( - f' of {ctx.attempts}' if ctx.attempts != -1 else '')) - try: - await func(ctx, fil) - break - except TranslationInterrupt: - break - except Exception as e: - print(e) - attempts += 1 - if ctx.attempts != -1 and attempts > ctx.attempts: - return web.json_response({'error': "Internal Server Error", 'status': 500}, - status=500) - try: - return format(ctx, ri) - except Exception as e: - print(e) - return web.json_response({'error': "Failed to format", 'status': 500}, - status=500) - else: - return web.json_response({'error': "Wrong content type: " + req.content_type, 'status': 415}, - status=415) - except ValueError as e: - print(e) - return web.json_response({'error': "Wrong input type", 'status': 422}, status=422) - - except ValidationError as e: - print(e) - return web.json_response({'error': "Input invalid", 'status': 422}, status=422) - - def format_translate(self, ctx: Context, return_image: bool): - text_regions = ctx.text_regions - inpaint = ctx.img_inpainted - results = [] - if 'overlay_ext' in ctx: - overlay_ext = ctx['overlay_ext'] - else: - overlay_ext = 'jpg' - for i, blk in enumerate(text_regions): - minX, minY, maxX, maxY = blk.xyxy - if 'translations' in ctx: - trans = {key: value[i] for key, value in ctx['translations'].items()} - else: - trans = {} - trans["originalText"] = text_regions[i].text - if inpaint is not None: - overlay = inpaint[minY:maxY, minX:maxX] - - retval, buffer = cv2.imencode('.' + overlay_ext, overlay) - jpg_as_text = base64.b64encode(buffer) - background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8") - else: - background = None - text_region = text_regions[i] - text_region.adjust_bg_color = False - color1, color2 = text_region.get_font_colors() - - results.append({ - 'text': trans, - 'minX': int(minX), - 'minY': int(minY), - 'maxX': int(maxX), - 'maxY': int(maxY), - 'textColor': { - 'fg': color1.tolist(), - 'bg': color2.tolist() - }, - 'language': text_regions[i].source_lang, - 'background': background - }) - if return_image and ctx.img_colorized is not None: - retval, buffer = cv2.imencode('.' + overlay_ext, np.array(ctx.img_colorized)) - jpg_as_text = base64.b64encode(buffer) - img = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8") - else: - img = None - return web.json_response({'details': results, 'img': img}) - - class PostSchema(Schema): - target_lang = fields.Str(required=False, validate=lambda a: a.upper() in VALID_LANGUAGES) - detector = fields.Str(required=False, validate=lambda a: a.lower() in DETECTORS) - ocr = fields.Str(required=False, validate=lambda a: a.lower() in OCRS) - inpainter = fields.Str(required=False, validate=lambda a: a.lower() in INPAINTERS) - upscaler = fields.Str(required=False, validate=lambda a: a.lower() in UPSCALERS) - translator = fields.Str(required=False, validate=lambda a: a.lower() in TRANSLATORS) - direction = fields.Str(required=False, validate=lambda a: a.lower() in {'auto', 'h', 'v'}) - skip_language = fields.Str(required=False) - upscale_ratio = fields.Integer(required=False) - translator_chain = fields.Str(required=False) - selective_translation = fields.Str(required=False) - attempts = fields.Integer(required=False) - detection_size = fields.Integer(required=False) - text_threshold = fields.Float(required=False) - box_threshold = fields.Float(required=False) - unclip_ratio = fields.Float(required=False) - inpainting_size = fields.Integer(required=False) - det_rotate = fields.Bool(required=False) - det_auto_rotate = fields.Bool(required=False) - det_invert = fields.Bool(required=False) - det_gamma_correct = fields.Bool(required=False) - min_text_length = fields.Integer(required=False) - colorization_size = fields.Integer(required=False) - denoise_sigma = fields.Integer(required=False) - mask_dilation_offset = fields.Integer(required=False) - ignore_bubble = fields.Integer(required=False) - gpt_config = fields.String(required=False) - filter_text = fields.String(required=False) - - # api specific - overlay_ext = fields.Str(required=False) - base64Images = fields.Raw(required=False) - image = fields.Raw(required=False) - url = fields.Raw(required=False) - - # no functionality except preventing errors when given - fingerprint = fields.Raw(required=False) - clientUuid = fields.Raw(required=False) \ No newline at end of file diff --git a/server/instance.py b/server/instance.py index 0b426b606..c27a0c415 100644 --- a/server/instance.py +++ b/server/instance.py @@ -6,8 +6,7 @@ from pydantic import BaseModel from manga_translator import Config -from server.sent_data import fetch_data_stream, NotifyType -from fastapi import Response +from server.sent_data import fetch_data_stream, NotifyType, fetch_data class ExecutorInstance(BaseModel): @@ -18,15 +17,15 @@ class ExecutorInstance(BaseModel): def free_executor(self): self.busy = False - async def sent(self) -> Response: - pass + async def sent(self, image: Image, config: Config): + return await fetch_data("http://"+self.ip+":"+str(self.port)+"/simple_execute/translate", image, config) async def sent_stream(self, image: Image, config: Config, sender: NotifyType): await fetch_data_stream("http://"+self.ip+":"+str(self.port)+"/execute/translate", image, config, sender) class Executors: def __init__(self): - self.list: List[ExecutorInstance] = [] + self.list: List[ExecutorInstance] = [ExecutorInstance(ip="127.0.0.1", port=5003)] self.lock: Lock = Lock() self.event = Event() diff --git a/server/main.py b/server/main.py index 3c3507312..cda01c0a9 100644 --- a/server/main.py +++ b/server/main.py @@ -9,12 +9,13 @@ from PIL import Image from fastapi import FastAPI, Request, HTTPException from fastapi.middleware.cors import CORSMiddleware -from starlette.responses import StreamingResponse +from starlette.responses import StreamingResponse, JSONResponse -from manga_translator import Config +from manga_translator import Config, Context from server.instance import ExecutorInstance, Executors from server.myqueue import TaskQueue from server.sent_data import NotifyType +from server.to_json import to_json app = FastAPI() @@ -91,8 +92,7 @@ async def wait(task, notify: NotifyType): else: result = await instance.sent(task.image, task.config) - instance.busy = False - instance.event.set() + executor_instances.free_executor(instance) if notify: return @@ -127,22 +127,42 @@ def transform_to_image(data): @app.post("/json") async def json(req: Request): data, img = await multi_content_type(req) - img = await to_pil_image(img) - data = await wait((data, img), None) + ctx = Context() + + ctx.image = await to_pil_image(img) + ctx.config = data + task_queue.add_task(ctx) + + data = await wait(ctx, None) + json = to_json(data) + return JSONResponse(content=json) @app.post("/bytes") async def bytes(req: Request): data, img = await multi_content_type(req) - img = await to_pil_image(img) + ctx = Context() + + ctx.image = await to_pil_image(img) + ctx.config = data + task_queue.add_task(ctx) data = await wait((data, img), None) @app.post("/image") async def image(req: Request): data, img = await multi_content_type(req) - img = await to_pil_image(img) + ctx = Context() + + ctx.image = await to_pil_image(img) + ctx.config = data + task_queue.add_task(ctx) + data = await wait((data, img), None) + img_byte_arr = io.BytesIO() + data.result.save(img_byte_arr, format="PNG") + img_byte_arr.seek(0) + return StreamingResponse(img_byte_arr, media_type="image/png") @app.post("/stream_json") async def stream_json(req: Request): diff --git a/server/sent_data.py b/server/sent_data.py index 02053bf80..2945cc9f7 100644 --- a/server/sent_data.py +++ b/server/sent_data.py @@ -21,6 +21,16 @@ async def fetch_data_stream(url, image: Image, config: Config, sender: NotifyTyp else: raise HTTPException(response.status, detail=response.text()) +async def fetch_data(url, image: Image, config: Config, headers: Mapping[str, str] = {}): + attributes = {"image": image, "config": config} + data = pickle.dumps(attributes) + + async with aiohttp.ClientSession() as session: + async with session.post(url, data=data, headers=headers) as response: + if response.status == 200: + return pickle.loads(await response.read()) + else: + raise HTTPException(response.status, detail=response.text()) async def process_stream(response, sender: NotifyType): buffer = b'' diff --git a/server/to_json.py b/server/to_json.py new file mode 100644 index 000000000..356e74104 --- /dev/null +++ b/server/to_json.py @@ -0,0 +1,66 @@ +import base64 +from typing import Dict, List + +import cv2 + +from manga_translator import Context +from manga_translator.utils import TextBlock + + +#input:PIL, +#result:PIL +#img_colorized: PIL +#upscaled:PIL +#img_rgb:array +#img_alpha:None +#textlines:list[Quadrilateral] +#text_regions:list[TextBlock] +#translations: map[str, arr[str]] +#img_inpainted: array +#gimp_mask:array +#img_rendered: array +#mask_raw: array +#mask:array +def to_json(ctx: Context): + text_regions:list[TextBlock] = ctx.text_regions + inpaint = ctx.img_inpainted + translations:Dict[str, List[str]] = ctx.translations + results = [] + if 'overlay_ext' in ctx: + #todo: unreachable + overlay_ext = ctx['overlay_ext'] + else: + overlay_ext = 'jpg' + for i, blk in enumerate(text_regions): + minX, minY, maxX, maxY = blk.xyxy + if 'translations' in ctx: + trans = {key: value[i] for key, value in translations.items()} + else: + trans = {} + trans["originalText"] = text_regions[i].text + if inpaint is not None: + overlay = inpaint[minY:maxY, minX:maxX] + + retval, buffer = cv2.imencode('.' + overlay_ext, overlay) + jpg_as_text = base64.b64encode(buffer) + background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8") + else: + background = None + text_region = text_regions[i] + text_region.adjust_bg_color = False + color1, color2 = text_region.get_font_colors() + + results.append({ + 'text': trans, + 'minX': int(minX), + 'minY': int(minY), + 'maxX': int(maxX), + 'maxY': int(maxY), + 'textColor': { + 'fg': color1.tolist(), + 'bg': color2.tolist() + }, + 'language': text_regions[i].source_lang, + 'background': background + }) + return results \ No newline at end of file From 536859e1198b1aa2725780653f24123d0f2db991 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 23:03:20 +0100 Subject: [PATCH 47/75] server refactor --- server/instance.py | 19 +-- server/main.py | 140 ++---------------- server/myqueue.py | 35 ++++- server/request_extraction.py | 61 ++++++++ .../{sent_data.py => sent_data_internal.py} | 0 server/streaming.py | 16 ++ 6 files changed, 126 insertions(+), 145 deletions(-) create mode 100644 server/request_extraction.py rename server/{sent_data.py => sent_data_internal.py} (100%) create mode 100644 server/streaming.py diff --git a/server/instance.py b/server/instance.py index c27a0c415..7c02e695f 100644 --- a/server/instance.py +++ b/server/instance.py @@ -6,7 +6,7 @@ from pydantic import BaseModel from manga_translator import Config -from server.sent_data import fetch_data_stream, NotifyType, fetch_data +from server.sent_data_internal import fetch_data_stream, NotifyType, fetch_data class ExecutorInstance(BaseModel): @@ -54,20 +54,5 @@ def free_executor(self, instance: ExecutorInstance): self.event.set() self.event.clear() -def example_notify(a: int, b) -> None: - if a == 0: - print(pickle.loads(b)) - else: - print(f"Notify called with a={a} and b={b}") -async def main(): - executor = ExecutorInstance(ip="127.0.0.1", port=5003) - - image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png") - config = Config() - - await executor.sent_stream(image, config, example_notify) - -if __name__ == "__main__": - import asyncio - asyncio.run(main()) \ No newline at end of file +executor_instances: Executors = Executors() diff --git a/server/main.py b/server/main.py index cda01c0a9..257be7cf5 100644 --- a/server/main.py +++ b/server/main.py @@ -1,20 +1,14 @@ import asyncio -import builtins import io -import re -from base64 import b64decode -from typing import Union -import requests -from PIL import Image -from fastapi import FastAPI, Request, HTTPException +from fastapi import FastAPI, Request from fastapi.middleware.cors import CORSMiddleware from starlette.responses import StreamingResponse, JSONResponse -from manga_translator import Config, Context -from server.instance import ExecutorInstance, Executors -from server.myqueue import TaskQueue -from server.sent_data import NotifyType +from server.instance import ExecutorInstance, executor_instances +from server.myqueue import wait_in_queue +from server.request_extraction import multi_content_type, to_pil_image, get_ctx +from server.streaming import notify, stream from server.to_json import to_json app = FastAPI() @@ -26,9 +20,6 @@ allow_methods=["*"], allow_headers=["*"], ) -executor_instances: Executors = Executors() -task_queue = TaskQueue() - @app.post("/register") async def register_instance(instance: ExecutorInstance, request: Request): @@ -36,130 +27,25 @@ async def register_instance(instance: ExecutorInstance, request: Request): executor_instances.register(instance) return {"code": 0} - -async def to_pil_image(image: Union[str, bytes]) -> Image.Image: - try: - if isinstance(image, builtins.bytes): - image = Image.open(io.BytesIO(image)) - return image - else: - if re.match(r'^data:image/.+;base64,', image): - value = image.split(',', 1)[1] - image_data = b64decode(value) - image = Image.open(io.BytesIO(image_data)) - return image - else: - response = requests.get(image) - image = Image.open(io.BytesIO(response.content)) - return image - except Exception as e: - raise HTTPException(status_code=422, detail=str(e)) - - -async def multi_content_type(request: Request): - content_type = request.headers.get("content-type") - - if content_type and content_type.startswith("multipart/form-data"): - form = await request.form() - config = form.get("config", "{}") - image = form.get("image") - image_content = await image.read() - config = Config.parse_raw(config) - return config, image_content - elif content_type and content_type.startswith("application/json"): - body = await request.json() - config = Config(**body.get("config", {})) - image = body.get("image") - return config, image - - else: - raise HTTPException(status_code=400, detail="Unsupported Content-Type") - - -async def wait(task, notify: NotifyType): - """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" - while True: - queue_pos = task_queue.get_pos(task) - if notify: - notify(3, queue_pos) - if queue_pos < executor_instances.free_executors(): - instance = await executor_instances.find_executor() - task_queue.remove(task) - if notify: - notify(4, 0) - if notify: - await instance.sent_stream(task.image, task.config, notify) - else: - result = await instance.sent(task.image, task.config) - - executor_instances.free_executor(instance) - - if notify: - return - else: - return result - else: - if queue_pos == 0: - raise HTTPException(500, detail="No translator registered") - await task_queue.wait_for_event() - - -async def stream(messages): - while True: - message = await messages.get() - yield message - if message[0] == 0 or message[0] == 2: - break - -def notify(code, data, transform_to_bytes, messages): - if code == 0: - result_bytes = transform_to_bytes(data) - encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes - messages.put_nowait(encoded_result) - else: - result_bytes = str(data).encode("utf-8") - encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes - messages.put_nowait(encoded_result) - def transform_to_image(data): return b"" @app.post("/json") async def json(req: Request): - data, img = await multi_content_type(req) - ctx = Context() - - ctx.image = await to_pil_image(img) - ctx.config = data - task_queue.add_task(ctx) - - data = await wait(ctx, None) - json = to_json(data) + ctx = await get_ctx(req) + json = to_json(ctx) return JSONResponse(content=json) @app.post("/bytes") async def bytes(req: Request): - data, img = await multi_content_type(req) - ctx = Context() - - ctx.image = await to_pil_image(img) - ctx.config = data - task_queue.add_task(ctx) - data = await wait((data, img), None) + ctx = await get_ctx(req) @app.post("/image") async def image(req: Request): - data, img = await multi_content_type(req) - ctx = Context() - - ctx.image = await to_pil_image(img) - ctx.config = data - task_queue.add_task(ctx) - - data = await wait((data, img), None) + ctx = await get_ctx(req) img_byte_arr = io.BytesIO() - data.result.save(img_byte_arr, format="PNG") + ctx.result.save(img_byte_arr, format="PNG") img_byte_arr.seek(0) return StreamingResponse(img_byte_arr, media_type="image/png") @@ -175,7 +61,7 @@ def example_notify(code: int, data) -> None: notify(code, data, transform_to_image, messages) streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait((data, img), example_notify)) + asyncio.create_task(wait_in_queue((data, img), example_notify)) return streaming_response @app.post("/stream_bytes") @@ -189,7 +75,7 @@ def example_notify(code: int, data) -> None: notify(code, data, transform_to_image, messages) streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait((data, img), example_notify)) + asyncio.create_task(wait_in_queue((data, img), example_notify)) return streaming_response @app.post("/stream_image") @@ -203,7 +89,7 @@ def example_notify(code: int, data) -> None: notify(code, data, transform_to_image, messages) streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait((data, img), example_notify)) + asyncio.create_task(wait_in_queue((data, img), example_notify)) return streaming_response if __name__ == '__main__': diff --git a/server/myqueue.py b/server/myqueue.py index 61c1abac3..cb11db50d 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -1,6 +1,10 @@ import asyncio from typing import List, Dict +from fastapi import HTTPException + +from server.instance import executor_instances +from server.sent_data_internal import NotifyType class TaskQueue: def __init__(self): @@ -22,4 +26,33 @@ def remove(self, task): self.update_event() async def wait_for_event(self): - await self.queue_event.wait() \ No newline at end of file + await self.queue_event.wait() + +task_queue = TaskQueue() + +async def wait_in_queue(task, notify: NotifyType): + """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" + while True: + queue_pos = task_queue.get_pos(task) + if notify: + notify(3, queue_pos) + if queue_pos < executor_instances.free_executors(): + instance = await executor_instances.find_executor() + task_queue.remove(task) + if notify: + notify(4, 0) + if notify: + await instance.sent_stream(task.image, task.config, notify) + else: + result = await instance.sent(task.image, task.config) + + executor_instances.free_executor(instance) + + if notify: + return + else: + return result + else: + if queue_pos == 0: + raise HTTPException(500, detail="No translator registered") + await task_queue.wait_for_event() \ No newline at end of file diff --git a/server/request_extraction.py b/server/request_extraction.py new file mode 100644 index 000000000..559127760 --- /dev/null +++ b/server/request_extraction.py @@ -0,0 +1,61 @@ +import builtins +import io +import re +from base64 import b64decode +from typing import Union + +import requests +from PIL import Image +from fastapi import Request, HTTPException + +from manga_translator import Config, Context +from server.myqueue import task_queue, wait_in_queue + + +async def to_pil_image(image: Union[str, bytes]) -> Image.Image: + try: + if isinstance(image, builtins.bytes): + image = Image.open(io.BytesIO(image)) + return image + else: + if re.match(r'^data:image/.+;base64,', image): + value = image.split(',', 1)[1] + image_data = b64decode(value) + image = Image.open(io.BytesIO(image_data)) + return image + else: + response = requests.get(image) + image = Image.open(io.BytesIO(response.content)) + return image + except Exception as e: + raise HTTPException(status_code=422, detail=str(e)) + + +async def multi_content_type(request: Request): + content_type = request.headers.get("content-type") + + if content_type and content_type.startswith("multipart/form-data"): + form = await request.form() + config = form.get("config", "{}") + image = form.get("image") + image_content = await image.read() + config = Config.parse_raw(config) + return config, image_content + elif content_type and content_type.startswith("application/json"): + body = await request.json() + config = Config(**body.get("config", {})) + image = body.get("image") + return config, image + + else: + raise HTTPException(status_code=400, detail="Unsupported Content-Type") + +async def get_ctx(req: Request): + data, img = await multi_content_type(req) + ctx = Context() + + ctx.image = await to_pil_image(img) + ctx.config = data + task_queue.add_task(ctx) + + data = await wait_in_queue(ctx, None) \ No newline at end of file diff --git a/server/sent_data.py b/server/sent_data_internal.py similarity index 100% rename from server/sent_data.py rename to server/sent_data_internal.py diff --git a/server/streaming.py b/server/streaming.py new file mode 100644 index 000000000..809d58e05 --- /dev/null +++ b/server/streaming.py @@ -0,0 +1,16 @@ +async def stream(messages): + while True: + message = await messages.get() + yield message + if message[0] == 0 or message[0] == 2: + break + +def notify(code, data, transform_to_bytes, messages): + if code == 0: + result_bytes = transform_to_bytes(data) + encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes + messages.put_nowait(encoded_result) + else: + result_bytes = str(data).encode("utf-8") + encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes + messages.put_nowait(encoded_result) \ No newline at end of file From 4a8ec9915108c7623b428c8081ca40822cb2f694 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Tue, 19 Nov 2024 23:11:07 +0100 Subject: [PATCH 48/75] stream update --- server/main.py | 48 ++++++------------------------------ server/request_extraction.py | 22 ++++++++++++++++- server/streaming.py | 4 ++- 3 files changed, 32 insertions(+), 42 deletions(-) diff --git a/server/main.py b/server/main.py index 257be7cf5..665b61af6 100644 --- a/server/main.py +++ b/server/main.py @@ -1,4 +1,3 @@ -import asyncio import io from fastapi import FastAPI, Request @@ -6,9 +5,7 @@ from starlette.responses import StreamingResponse, JSONResponse from server.instance import ExecutorInstance, executor_instances -from server.myqueue import wait_in_queue -from server.request_extraction import multi_content_type, to_pil_image, get_ctx -from server.streaming import notify, stream +from server.request_extraction import get_ctx, while_streaming from server.to_json import to_json app = FastAPI() @@ -27,8 +24,10 @@ async def register_instance(instance: ExecutorInstance, request: Request): executor_instances.register(instance) return {"code": 0} -def transform_to_image(data): - return b"" +def transform_to_image(ctx): + img_byte_arr = io.BytesIO() + ctx.result.save(img_byte_arr, format="PNG") + return img_byte_arr.getvalue() @app.post("/json") async def json(req: Request): @@ -40,7 +39,6 @@ async def json(req: Request): async def bytes(req: Request): ctx = await get_ctx(req) - @app.post("/image") async def image(req: Request): ctx = await get_ctx(req) @@ -52,45 +50,15 @@ async def image(req: Request): @app.post("/stream_json") async def stream_json(req: Request): - data, img = await multi_content_type(req) - img = await to_pil_image(img) - - messages = asyncio.Queue() - - def example_notify(code: int, data) -> None: - notify(code, data, transform_to_image, messages) - - streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait_in_queue((data, img), example_notify)) - return streaming_response + return while_streaming(req, transform_to_image) @app.post("/stream_bytes") async def stream_bytes(req: Request): - data, img = await multi_content_type(req) - img = await to_pil_image(img) - - messages = asyncio.Queue() - - def example_notify(code: int, data) -> None: - notify(code, data, transform_to_image, messages) - - streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait_in_queue((data, img), example_notify)) - return streaming_response + return while_streaming(req, transform_to_image) @app.post("/stream_image") async def stream_image(req: Request): - data, img = await multi_content_type(req) - img = await to_pil_image(img) - - messages = asyncio.Queue() - - def example_notify(code: int, data) -> None: - notify(code, data, transform_to_image, messages) - - streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait_in_queue((data, img), example_notify)) - return streaming_response + return while_streaming(req, transform_to_image) if __name__ == '__main__': import uvicorn diff --git a/server/request_extraction.py b/server/request_extraction.py index 559127760..62b2f9fd8 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -1,3 +1,4 @@ +import asyncio import builtins import io import re @@ -7,9 +8,11 @@ import requests from PIL import Image from fastapi import Request, HTTPException +from starlette.responses import StreamingResponse from manga_translator import Config, Context from server.myqueue import task_queue, wait_in_queue +from server.streaming import notify, stream async def to_pil_image(image: Union[str, bytes]) -> Image.Image: @@ -58,4 +61,21 @@ async def get_ctx(req: Request): ctx.config = data task_queue.add_task(ctx) - data = await wait_in_queue(ctx, None) \ No newline at end of file + data = await wait_in_queue(ctx, None) + +async def while_streaming(req: Request, transform): + data, img = await multi_content_type(req) + ctx = Context() + + ctx.image = await to_pil_image(img) + ctx.config = data + task_queue.add_task(ctx) + + messages = asyncio.Queue() + + def notify_internal(code: int, data) -> None: + notify(code, data, transform, messages) + + streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") + asyncio.create_task(wait_in_queue((data, img), notify_internal)) + return streaming_response \ No newline at end of file diff --git a/server/streaming.py b/server/streaming.py index 809d58e05..907fed3d7 100644 --- a/server/streaming.py +++ b/server/streaming.py @@ -1,3 +1,5 @@ +import pickle + async def stream(messages): while True: message = await messages.get() @@ -7,7 +9,7 @@ async def stream(messages): def notify(code, data, transform_to_bytes, messages): if code == 0: - result_bytes = transform_to_bytes(data) + result_bytes = transform_to_bytes(pickle.loads(data)) encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes messages.put_nowait(encoded_result) else: From 85d7b1e4375e6fab749abff30ba1009d1669611f Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Wed, 20 Nov 2024 17:47:52 +0100 Subject: [PATCH 49/75] stream_image,stream_json now working json&image most likely too --- api-test.html | 153 +++++++++++++++++++++++++++++++++++ server/main.py | 9 ++- server/myqueue.py | 4 +- server/request_extraction.py | 4 +- server/streaming.py | 2 +- 5 files changed, 164 insertions(+), 8 deletions(-) create mode 100644 api-test.html diff --git a/api-test.html b/api-test.html new file mode 100644 index 000000000..f07b865fb --- /dev/null +++ b/api-test.html @@ -0,0 +1,153 @@ + + + + + + Image Upload and Translation + + + +

Upload Image and Translate

+ + +

+

+ + + + diff --git a/server/main.py b/server/main.py index 665b61af6..7a8cf48f2 100644 --- a/server/main.py +++ b/server/main.py @@ -29,6 +29,9 @@ def transform_to_image(ctx): ctx.result.save(img_byte_arr, format="PNG") return img_byte_arr.getvalue() +def transform_to_json(ctx): + return str(to_json(ctx)).encode("utf-8") + @app.post("/json") async def json(req: Request): ctx = await get_ctx(req) @@ -50,15 +53,15 @@ async def image(req: Request): @app.post("/stream_json") async def stream_json(req: Request): - return while_streaming(req, transform_to_image) + return await while_streaming(req, transform_to_json) @app.post("/stream_bytes") async def stream_bytes(req: Request): - return while_streaming(req, transform_to_image) + return await while_streaming(req, transform_to_image) @app.post("/stream_image") async def stream_image(req: Request): - return while_streaming(req, transform_to_image) + return await while_streaming(req, transform_to_image) if __name__ == '__main__': import uvicorn diff --git a/server/myqueue.py b/server/myqueue.py index cb11db50d..ca21698ed 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -35,12 +35,12 @@ async def wait_in_queue(task, notify: NotifyType): while True: queue_pos = task_queue.get_pos(task) if notify: - notify(3, queue_pos) + notify(3, str(queue_pos)) if queue_pos < executor_instances.free_executors(): instance = await executor_instances.find_executor() task_queue.remove(task) if notify: - notify(4, 0) + notify(4, "0") if notify: await instance.sent_stream(task.image, task.config, notify) else: diff --git a/server/request_extraction.py b/server/request_extraction.py index 62b2f9fd8..9290ecc07 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -61,7 +61,7 @@ async def get_ctx(req: Request): ctx.config = data task_queue.add_task(ctx) - data = await wait_in_queue(ctx, None) + return await wait_in_queue(ctx, None) async def while_streaming(req: Request, transform): data, img = await multi_content_type(req) @@ -77,5 +77,5 @@ def notify_internal(code: int, data) -> None: notify(code, data, transform, messages) streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait_in_queue((data, img), notify_internal)) + asyncio.create_task(wait_in_queue(ctx, notify_internal)) return streaming_response \ No newline at end of file diff --git a/server/streaming.py b/server/streaming.py index 907fed3d7..c65a897d2 100644 --- a/server/streaming.py +++ b/server/streaming.py @@ -10,7 +10,7 @@ async def stream(messages): def notify(code, data, transform_to_bytes, messages): if code == 0: result_bytes = transform_to_bytes(pickle.loads(data)) - encoded_result = b"" + len(result_bytes).to_bytes(4, 'big') + result_bytes + encoded_result = b'\x00' + len(result_bytes).to_bytes(4, 'big') + result_bytes messages.put_nowait(encoded_result) else: result_bytes = str(data).encode("utf-8") From d07e7dcf351b187058cdce0f6248b90c963a54f5 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Wed, 20 Nov 2024 18:00:15 +0100 Subject: [PATCH 50/75] remove old tasks on interrupt connection --- server/myqueue.py | 15 +++++++++++++++ server/request_extraction.py | 2 ++ 2 files changed, 17 insertions(+) diff --git a/server/myqueue.py b/server/myqueue.py index ca21698ed..9ae865063 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -2,6 +2,7 @@ from typing import List, Dict from fastapi import HTTPException +from starlette.requests import Request from server.instance import executor_instances from server.sent_data_internal import NotifyType @@ -30,6 +31,11 @@ async def wait_for_event(self): task_queue = TaskQueue() +async def is_client_disconnected(request: Request) -> bool: + if await request.is_disconnected(): + return True + return False + async def wait_in_queue(task, notify: NotifyType): """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" while True: @@ -37,6 +43,10 @@ async def wait_in_queue(task, notify: NotifyType): if notify: notify(3, str(queue_pos)) if queue_pos < executor_instances.free_executors(): + if is_client_disconnected(task.req): + task_queue.remove(task) + task_queue.update_event() + raise HTTPException(500, detail="User is no longer connected") #just for the logs instance = await executor_instances.find_executor() task_queue.remove(task) if notify: @@ -47,6 +57,7 @@ async def wait_in_queue(task, notify: NotifyType): result = await instance.sent(task.image, task.config) executor_instances.free_executor(instance) + task_queue.update_event() if notify: return @@ -55,4 +66,8 @@ async def wait_in_queue(task, notify: NotifyType): else: if queue_pos == 0: raise HTTPException(500, detail="No translator registered") + if is_client_disconnected(task.req): + task_queue.remove(task) + task_queue.update_event() + raise HTTPException(500, detail="User is no longer connected") #just for the logs await task_queue.wait_for_event() \ No newline at end of file diff --git a/server/request_extraction.py b/server/request_extraction.py index 9290ecc07..02dc85e70 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -59,6 +59,7 @@ async def get_ctx(req: Request): ctx.image = await to_pil_image(img) ctx.config = data + ctx.req = req task_queue.add_task(ctx) return await wait_in_queue(ctx, None) @@ -69,6 +70,7 @@ async def while_streaming(req: Request, transform): ctx.image = await to_pil_image(img) ctx.config = data + ctx.req = req task_queue.add_task(ctx) messages = asyncio.Queue() From e9ef90123cb95793f422e599d1dce2628c419938 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Wed, 20 Nov 2024 20:32:11 +0100 Subject: [PATCH 51/75] change routes --- execute_test.py | 46 ----------------------------- server/main.py | 28 ++++++++++++++---- api-test.html => server/manual.html | 3 +- 3 files changed, 23 insertions(+), 54 deletions(-) delete mode 100644 execute_test.py rename api-test.html => server/manual.html (97%) diff --git a/execute_test.py b/execute_test.py deleted file mode 100644 index 8705fb2b1..000000000 --- a/execute_test.py +++ /dev/null @@ -1,46 +0,0 @@ -import asyncio -import json -import pickle -import requests -from PIL import Image - -from manga_translator import Config - - -async def execute_method(method_name, attributes): - url = f"http://127.0.0.1:5003/execute/{method_name}" - headers = {'Content-Type': 'application/octet-stream'} - - response = requests.post(url, data=pickle.dumps(attributes), headers=headers, stream=True) - - if response.status_code == 200: - buffer = b'' - for chunk in response.iter_content(chunk_size=None): - if chunk: - buffer += chunk - while True: - if len(buffer) >= 5: - status = int.from_bytes(buffer[0:1], byteorder='big') - expected_size = int.from_bytes(buffer[1:5], byteorder='big') - if len(buffer) >= 5 + expected_size: - data = buffer[5:5 + expected_size] - if status == 0: - print("data", pickle.loads(data)) - elif status == 1: - print("log", data) - elif status == 2: - print("error", data) - buffer = buffer[5 + expected_size:] - else: - break - else: - break - else: - print(json.loads(response.content)) - - - -if __name__ == '__main__': - image = Image.open("../imgs/232264684-5a7bcf8e-707b-4925-86b0-4212382f1680.png") - attributes = {"image": image, "config": Config()} - asyncio.run(execute_method("translate", attributes)) \ No newline at end of file diff --git a/server/main.py b/server/main.py index 7a8cf48f2..49ef9f5a9 100644 --- a/server/main.py +++ b/server/main.py @@ -5,6 +5,7 @@ from starlette.responses import StreamingResponse, JSONResponse from server.instance import ExecutorInstance, executor_instances +from server.myqueue import task_queue from server.request_extraction import get_ctx, while_streaming from server.to_json import to_json @@ -32,17 +33,17 @@ def transform_to_image(ctx): def transform_to_json(ctx): return str(to_json(ctx)).encode("utf-8") -@app.post("/json") +@app.post("/translate/json") async def json(req: Request): ctx = await get_ctx(req) json = to_json(ctx) return JSONResponse(content=json) -@app.post("/bytes") +@app.post("/translate/bytes") async def bytes(req: Request): ctx = await get_ctx(req) -@app.post("/image") +@app.post("/translate/image") async def image(req: Request): ctx = await get_ctx(req) img_byte_arr = io.BytesIO() @@ -51,18 +52,33 @@ async def image(req: Request): return StreamingResponse(img_byte_arr, media_type="image/png") -@app.post("/stream_json") +@app.post("/translate/json/stream") async def stream_json(req: Request): return await while_streaming(req, transform_to_json) -@app.post("/stream_bytes") +@app.post("/translate/bytes/stream") async def stream_bytes(req: Request): return await while_streaming(req, transform_to_image) -@app.post("/stream_image") +@app.post("/translate/image/stream") async def stream_image(req: Request): return await while_streaming(req, transform_to_image) +@app.post("/queue-size") +async def queue_size() -> int: + return len(task_queue.queue) + +@app.post("/") +async def index(): + # ui.html + pass + +@app.post("/manual") +async def manual(): + # manual.html + pass + + if __name__ == '__main__': import uvicorn from args import parse_arguments diff --git a/api-test.html b/server/manual.html similarity index 97% rename from api-test.html rename to server/manual.html index f07b865fb..1335b0c51 100644 --- a/api-test.html +++ b/server/manual.html @@ -72,7 +72,7 @@

formData.append('image', file); try { - const response = await fetch( generateImage.checked ? 'http://127.0.0.1:8000/stream_image' : 'http://127.0.0.1:8000/stream_json', { + const response = await fetch( generateImage.checked ? '/translate/image/stream' : '/translate/json/stream', { method: 'POST', body: formData, /*headers: { @@ -142,7 +142,6 @@

buffer = buffer.slice(totalSize); } } - console.log(buffer.length) } else { errorField.innerHTML = response.statusText; From c42c9f60eb0462b99b53d9c0e7c9bd3f210e2e04 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Wed, 20 Nov 2024 21:10:35 +0100 Subject: [PATCH 52/75] api goals --- server/args.py | 7 ++++++- server/instance.py | 2 +- server/main.py | 51 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/server/args.py b/server/args.py index a00e24a22..a27ba5ffd 100644 --- a/server/args.py +++ b/server/args.py @@ -5,5 +5,10 @@ def parse_arguments(): parser = argparse.ArgumentParser(description="Specify host and port for the server.") parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)') parser.add_argument('--port', type=int, default=8000, help='The port number (default: 8080)') - + #use_gpu + #use_gpu_limited + #ignore_errors + #verbose + #nonce + #start_instance return parser.parse_args() \ No newline at end of file diff --git a/server/instance.py b/server/instance.py index 7c02e695f..e844289dd 100644 --- a/server/instance.py +++ b/server/instance.py @@ -25,7 +25,7 @@ async def sent_stream(self, image: Image, config: Config, sender: NotifyType): class Executors: def __init__(self): - self.list: List[ExecutorInstance] = [ExecutorInstance(ip="127.0.0.1", port=5003)] + self.list: List[ExecutorInstance] = [] self.lock: Lock = Lock() self.event = Event() diff --git a/server/main.py b/server/main.py index 49ef9f5a9..dd6f6ebd6 100644 --- a/server/main.py +++ b/server/main.py @@ -1,6 +1,10 @@ import io +import os +import secrets +import subprocess +import sys -from fastapi import FastAPI, Request +from fastapi import FastAPI, Request, HTTPException from fastapi.middleware.cors import CORSMiddleware from starlette.responses import StreamingResponse, JSONResponse @@ -10,6 +14,7 @@ from server.to_json import to_json app = FastAPI() +nonce = None app.add_middleware( CORSMiddleware, @@ -21,6 +26,9 @@ @app.post("/register") async def register_instance(instance: ExecutorInstance, request: Request): + req_nonce = request.headers.get('X-Nonce') + if req_nonce != nonce: + raise HTTPException(401, detail="Invalid nonce") instance.ip = request.client.host executor_instances.register(instance) return {"code": 0} @@ -78,10 +86,51 @@ async def manual(): # manual.html pass +def generate_nonce(): + return secrets.token_hex(16) + +def start_translator_client_proc(host: str, port: int, nonce: str, params: dict): + cmds = [ + sys.executable, + '-m', 'manga_translator', + '--mode', 'shared', + '--host', host, + '--port', str(port), + '--nonce', nonce, + '--no-report' + ] + if params.get('use_gpu', False): + cmds.append('--use-gpu') + if params.get('use_gpu_limited', False): + cmds.append('--use-gpu-limited') + if params.get('ignore_errors', False): + cmds.append('--ignore-errors') + if params.get('verbose', False): + cmds.append('--verbose') + + proc = subprocess.Popen(cmds, cwd=BASE_PATH) + executor_instances.register(ExecutorInstance(ip=host, port=port)) + return proc + +def prepare(args): + global nonce + if args.get("nonce", None) is None: + nonce = os.getenv('MT_WEB_NONCE', generate_nonce()) + else: + nonce = args.get("nonce", None) + if args.get("start_instance", None): + start_translator_client_proc(args.get("host", "0.0.0.0"), args.get("port",8000) + 1, nonce, args) + +#todo: restart if crash +#todo: cache results +#todo: cleanup cache +#todo: store images while in queue if __name__ == '__main__': import uvicorn from args import parse_arguments args = parse_arguments() + prepare(args) + print("Nonce: "+nonce) uvicorn.run(app, host=args.host, port=args.port) From b53b79e904e62af638027cc13dc8b228aa8b7d72 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Wed, 20 Nov 2024 21:14:31 +0100 Subject: [PATCH 53/75] more todos & args --- server/args.py | 16 ++++++++++------ server/main.py | 7 ++++--- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/server/args.py b/server/args.py index a27ba5ffd..5c7294ca9 100644 --- a/server/args.py +++ b/server/args.py @@ -1,14 +1,18 @@ import argparse +import os def parse_arguments(): parser = argparse.ArgumentParser(description="Specify host and port for the server.") parser.add_argument('--host', type=str, default='127.0.0.1', help='The host address (default: 127.0.0.1)') parser.add_argument('--port', type=int, default=8000, help='The port number (default: 8080)') - #use_gpu - #use_gpu_limited - #ignore_errors - #verbose - #nonce - #start_instance + parser.add_argument('-v', '--verbose', action='store_true', + help='Print debug info and save intermediate images in result folder') + parser.add_argument('--start-instance', action='store_true', + help='If a translator should be launched automatically') + parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.') + parser.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web server communication') + g = parser.add_mutually_exclusive_group() + g.add_argument('--use-gpu', action='store_true', help='Turn on/off gpu (auto switch between mps and cuda)') + g.add_argument('--use-gpu-limited', action='store_true', help='Turn on/off gpu (excluding offline translator)') return parser.parse_args() \ No newline at end of file diff --git a/server/main.py b/server/main.py index dd6f6ebd6..f5b5d105a 100644 --- a/server/main.py +++ b/server/main.py @@ -78,12 +78,12 @@ async def queue_size() -> int: @app.post("/") async def index(): - # ui.html + # todo:ui.html pass @app.post("/manual") async def manual(): - # manual.html + # todo:manual.html pass def generate_nonce(): @@ -107,7 +107,7 @@ def start_translator_client_proc(host: str, port: int, nonce: str, params: dict) cmds.append('--ignore-errors') if params.get('verbose', False): cmds.append('--verbose') - + #todo: cwd proc = subprocess.Popen(cmds, cwd=BASE_PATH) executor_instances.register(ExecutorInstance(ip=host, port=port)) return proc @@ -125,6 +125,7 @@ def prepare(args): #todo: cache results #todo: cleanup cache #todo: store images while in queue +#todo: add docs if __name__ == '__main__': import uvicorn From 06e04ca8241d1822be2d4dfc58927bbfd8a27b94 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:03:29 +0100 Subject: [PATCH 54/75] some docs & queue fix --- manga_translator/utils/generic.py | 2 +- server/main.py | 83 ++++++++++++++++++++----------- server/myqueue.py | 20 +++++--- server/request_extraction.py | 8 ++- server/to_json.py | 17 ++++++- 5 files changed, 91 insertions(+), 39 deletions(-) diff --git a/manga_translator/utils/generic.py b/manga_translator/utils/generic.py index 92c55c6ac..785eb135d 100644 --- a/manga_translator/utils/generic.py +++ b/manga_translator/utils/generic.py @@ -37,7 +37,7 @@ def __init__(self, **kwargs): def __eq__(self, other): if not isinstance(other, Context): return NotImplemented - return vars(self) == vars(other) + return dict(self) == dict(other) def __contains__(self, key): return key in self.keys() diff --git a/server/main.py b/server/main.py index f5b5d105a..185e59f6a 100644 --- a/server/main.py +++ b/server/main.py @@ -3,15 +3,21 @@ import secrets import subprocess import sys +from builtins import bytes +from typing import Union -from fastapi import FastAPI, Request, HTTPException +from fastapi import FastAPI, Request, HTTPException, Header, Form, UploadFile from fastapi.middleware.cors import CORSMiddleware -from starlette.responses import StreamingResponse, JSONResponse +from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse +from pathlib import Path +from pydantic import BaseModel + +from manga_translator import Config from server.instance import ExecutorInstance, executor_instances from server.myqueue import task_queue -from server.request_extraction import get_ctx, while_streaming -from server.to_json import to_json +from server.request_extraction import get_ctx, while_streaming, TranslateRequest +from server.to_json import to_json, Translation app = FastAPI() nonce = None @@ -24,14 +30,19 @@ allow_headers=["*"], ) -@app.post("/register") -async def register_instance(instance: ExecutorInstance, request: Request): - req_nonce = request.headers.get('X-Nonce') +class TranslateRequestForm(BaseModel): + """This request can be a multipart or a json request""" + image: UploadFile + """can be a url, base64 encoded image or a multipart image""" + config: str + """in case it is a multipart this needs to be a string(json.stringify)""" + +@app.post("/register", response_description="no response") +async def register_instance(instance: ExecutorInstance, req: Request, req_nonce: str = Header(alias="X-Nonce")): if req_nonce != nonce: raise HTTPException(401, detail="Invalid nonce") - instance.ip = request.client.host + instance.ip = req.client.host executor_instances.register(instance) - return {"code": 0} def transform_to_image(ctx): img_byte_arr = io.BytesIO() @@ -41,18 +52,29 @@ def transform_to_image(ctx): def transform_to_json(ctx): return str(to_json(ctx)).encode("utf-8") -@app.post("/translate/json") +async def parse_request( + req: Request, + image: Union[str, bytes] = Form(...), + config: str = Form(...), +): + if req.headers.get('content-type').startswith('multipart'): + config = json.loads(config) + return TranslateRequest(image=image, config=Config(**config)) + else: + return None + +@app.post("/translate/json", response_model=list[Translation], response_description="json strucure inspired by the ichigo translator extension") async def json(req: Request): ctx = await get_ctx(req) json = to_json(ctx) return JSONResponse(content=json) -@app.post("/translate/bytes") +@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks") async def bytes(req: Request): ctx = await get_ctx(req) -@app.post("/translate/image") -async def image(req: Request): +@app.post("/translate/image", response_description="the result image", response_class=StreamingResponse) +async def image(req: Request) -> StreamingResponse: ctx = await get_ctx(req) img_byte_arr = io.BytesIO() ctx.result.save(img_byte_arr, format="PNG") @@ -60,31 +82,32 @@ async def image(req: Request): return StreamingResponse(img_byte_arr, media_type="image/png") -@app.post("/translate/json/stream") -async def stream_json(req: Request): +@app.post("/translate/json/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_json(req: Request) -> StreamingResponse: return await while_streaming(req, transform_to_json) -@app.post("/translate/bytes/stream") -async def stream_bytes(req: Request): +@app.post("/translate/bytes/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_bytes(req: Request)-> StreamingResponse: return await while_streaming(req, transform_to_image) -@app.post("/translate/image/stream") -async def stream_image(req: Request): +@app.post("/translate/image/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_image(req: Request) -> StreamingResponse: return await while_streaming(req, transform_to_image) -@app.post("/queue-size") +@app.post("/queue-size", response_model=int) async def queue_size() -> int: return len(task_queue.queue) -@app.post("/") -async def index(): +@app.get("/", response_class=HTMLResponse) +async def index() -> HTMLResponse: # todo:ui.html pass -@app.post("/manual") +@app.get("/manual", response_class=HTMLResponse) async def manual(): - # todo:manual.html - pass + html_file = Path("manual.html") + html_content = html_file.read_text() + return HTMLResponse(content=html_content) def generate_nonce(): return secrets.token_hex(16) @@ -114,18 +137,19 @@ def start_translator_client_proc(host: str, port: int, nonce: str, params: dict) def prepare(args): global nonce - if args.get("nonce", None) is None: + if args.nonce is None: nonce = os.getenv('MT_WEB_NONCE', generate_nonce()) else: - nonce = args.get("nonce", None) - if args.get("start_instance", None): - start_translator_client_proc(args.get("host", "0.0.0.0"), args.get("port",8000) + 1, nonce, args) + nonce = args.nonce + if args.start_instance: + start_translator_client_proc(args.host, args.port + 1, nonce, args) #todo: restart if crash #todo: cache results #todo: cleanup cache #todo: store images while in queue #todo: add docs +#todo: index doesnt work properly in the list(is_client_disconnected is not executed immediatly/does not update the index) if __name__ == '__main__': import uvicorn @@ -134,4 +158,5 @@ def prepare(args): args = parse_arguments() prepare(args) print("Nonce: "+nonce) + executor_instances.register(ExecutorInstance(ip="127.0.0.1", port=5003)) uvicorn.run(app, host=args.host, port=args.port) diff --git a/server/myqueue.py b/server/myqueue.py index 9ae865063..446541fc2 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -3,13 +3,15 @@ from fastapi import HTTPException from starlette.requests import Request +from starlette.responses import StreamingResponse +from manga_translator import Context from server.instance import executor_instances from server.sent_data_internal import NotifyType class TaskQueue: def __init__(self): - self.queue: List[Dict] = [] + self.queue: List[Context] = [] self.queue_event: asyncio.Event = asyncio.Event() def add_task(self, task): @@ -43,10 +45,13 @@ async def wait_in_queue(task, notify: NotifyType): if notify: notify(3, str(queue_pos)) if queue_pos < executor_instances.free_executors(): - if is_client_disconnected(task.req): + if await is_client_disconnected(task.req): task_queue.remove(task) task_queue.update_event() - raise HTTPException(500, detail="User is no longer connected") #just for the logs + if notify: + return + else: + raise HTTPException(500, detail="User is no longer connected") #just for the logs instance = await executor_instances.find_executor() task_queue.remove(task) if notify: @@ -64,10 +69,11 @@ async def wait_in_queue(task, notify: NotifyType): else: return result else: - if queue_pos == 0: - raise HTTPException(500, detail="No translator registered") - if is_client_disconnected(task.req): + if await is_client_disconnected(task.req): task_queue.remove(task) task_queue.update_event() - raise HTTPException(500, detail="User is no longer connected") #just for the logs + if notify: + return + else: + raise HTTPException(500, detail="User is no longer connected") #just for the logs await task_queue.wait_for_event() \ No newline at end of file diff --git a/server/request_extraction.py b/server/request_extraction.py index 02dc85e70..442bc0e68 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -8,12 +8,19 @@ import requests from PIL import Image from fastapi import Request, HTTPException +from pydantic import BaseModel from starlette.responses import StreamingResponse from manga_translator import Config, Context from server.myqueue import task_queue, wait_in_queue from server.streaming import notify, stream +class TranslateRequest(BaseModel): + """This request can be a multipart or a json request""" + image: bytes|str + """can be a url, base64 encoded image or a multipart image""" + config: Config + """in case it is a multipart this needs to be a string(json.stringify)""" async def to_pil_image(image: Union[str, bytes]) -> Image.Image: try: @@ -36,7 +43,6 @@ async def to_pil_image(image: Union[str, bytes]) -> Image.Image: async def multi_content_type(request: Request): content_type = request.headers.get("content-type") - if content_type and content_type.startswith("multipart/form-data"): form = await request.form() config = form.get("config", "{}") diff --git a/server/to_json.py b/server/to_json.py index 356e74104..911718d8d 100644 --- a/server/to_json.py +++ b/server/to_json.py @@ -2,6 +2,7 @@ from typing import Dict, List import cv2 +from pydantic import BaseModel from manga_translator import Context from manga_translator.utils import TextBlock @@ -63,4 +64,18 @@ def to_json(ctx: Context): 'language': text_regions[i].source_lang, 'background': background }) - return results \ No newline at end of file + return results + +class TextColor(BaseModel): + fg: tuple[int, int, int] + bg: tuple[int, int, int] + +class Translation(BaseModel): + text: dict[str, str] + minX: int + minY: int + maxX: int + maxY: int + textColor:TextColor + language: str + background: str From cbd01adb924e7750f7019d579609f5774c7f465a Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:35:25 +0100 Subject: [PATCH 55/75] stream type fix --- server/myqueue.py | 4 ++-- server/request_extraction.py | 2 +- server/sent_data_internal.py | 2 +- server/streaming.py | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/server/myqueue.py b/server/myqueue.py index 446541fc2..b9174d936 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -43,7 +43,7 @@ async def wait_in_queue(task, notify: NotifyType): while True: queue_pos = task_queue.get_pos(task) if notify: - notify(3, str(queue_pos)) + notify(3, str(queue_pos).encode('utf-8')) if queue_pos < executor_instances.free_executors(): if await is_client_disconnected(task.req): task_queue.remove(task) @@ -55,7 +55,7 @@ async def wait_in_queue(task, notify: NotifyType): instance = await executor_instances.find_executor() task_queue.remove(task) if notify: - notify(4, "0") + notify(4, b"") if notify: await instance.sent_stream(task.image, task.config, notify) else: diff --git a/server/request_extraction.py b/server/request_extraction.py index 442bc0e68..080d84cc4 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -81,7 +81,7 @@ async def while_streaming(req: Request, transform): messages = asyncio.Queue() - def notify_internal(code: int, data) -> None: + def notify_internal(code: int, data: bytes) -> None: notify(code, data, transform, messages) streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") diff --git a/server/sent_data_internal.py b/server/sent_data_internal.py index 2945cc9f7..5720cb6c5 100644 --- a/server/sent_data_internal.py +++ b/server/sent_data_internal.py @@ -8,7 +8,7 @@ from manga_translator import Config -NotifyType = Optional[Callable[[int, Optional[int]], None]] +NotifyType = Optional[Callable[[int, Optional[bytes]], None]] async def fetch_data_stream(url, image: Image, config: Config, sender: NotifyType, headers: Mapping[str, str] = {}): attributes = {"image": image, "config": config} diff --git a/server/streaming.py b/server/streaming.py index c65a897d2..611771853 100644 --- a/server/streaming.py +++ b/server/streaming.py @@ -1,3 +1,4 @@ +import asyncio import pickle async def stream(messages): @@ -7,12 +8,11 @@ async def stream(messages): if message[0] == 0 or message[0] == 2: break -def notify(code, data, transform_to_bytes, messages): +def notify(code: int, data: bytes, transform_to_bytes, messages: asyncio.Queue): if code == 0: result_bytes = transform_to_bytes(pickle.loads(data)) encoded_result = b'\x00' + len(result_bytes).to_bytes(4, 'big') + result_bytes messages.put_nowait(encoded_result) else: - result_bytes = str(data).encode("utf-8") - encoded_result =code.to_bytes(1, 'big') + len(result_bytes).to_bytes(4, 'big') + result_bytes + encoded_result =code.to_bytes(1, 'big') + len(data).to_bytes(4, 'big') + data messages.put_nowait(encoded_result) \ No newline at end of file From 858675a7a4bdacd86ee5727d8c28538384526a29 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:35:49 +0100 Subject: [PATCH 56/75] add server ui --- manga_translator/args.py | 2 +- server/index.html | 393 +++++++++++++++++++++++++++++++++++++++ server/main.py | 46 +++-- 3 files changed, 425 insertions(+), 16 deletions(-) create mode 100644 server/index.html diff --git a/manga_translator/args.py b/manga_translator/args.py index ef84073d6..562059ad6 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -131,4 +131,4 @@ def _format_action_invocation(self, action: argparse.Action) -> str: parser_api.add_argument('--host', default='127.0.0.1', type=str, help='Host for API service') parser_api.add_argument('--port', default=5003, type=int, help='Port for API service') parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication') - +parser_api.add_argument("--report", default=None,type=str, help='reports to server to register instance') diff --git a/server/index.html b/server/index.html new file mode 100644 index 000000000..edf617609 --- /dev/null +++ b/server/index.html @@ -0,0 +1,393 @@ + + + + + Image/Manga Translator + + + + + + + +
+
+

Image/Manga Translator

+
+
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+ +
+ + +
+
+
+
+
+ + +
+
+
+
{{ statusText }}
+ +
+
+ +
{{ statusText }}
+
+
+ +
+
+ Please consider supporting us by + Ko-fi + or + Patreon! +
+ Source Code +
+
+
+ + + diff --git a/server/main.py b/server/main.py index 185e59f6a..773908f41 100644 --- a/server/main.py +++ b/server/main.py @@ -1,8 +1,10 @@ import io import os import secrets +import signal import subprocess import sys +from argparse import Namespace from builtins import bytes from typing import Union @@ -100,8 +102,9 @@ async def queue_size() -> int: @app.get("/", response_class=HTMLResponse) async def index() -> HTMLResponse: - # todo:ui.html - pass + html_file = Path("index.html") + html_content = html_file.read_text() + return HTMLResponse(content=html_content) @app.get("/manual", response_class=HTMLResponse) async def manual(): @@ -112,27 +115,35 @@ async def manual(): def generate_nonce(): return secrets.token_hex(16) -def start_translator_client_proc(host: str, port: int, nonce: str, params: dict): +def start_translator_client_proc(host: str, port: int, nonce: str, params: Namespace): cmds = [ sys.executable, '-m', 'manga_translator', - '--mode', 'shared', + 'shared', '--host', host, '--port', str(port), '--nonce', nonce, - '--no-report' ] - if params.get('use_gpu', False): + if params.use_gpu: cmds.append('--use-gpu') - if params.get('use_gpu_limited', False): + if params.use_gpu_limited: cmds.append('--use-gpu-limited') - if params.get('ignore_errors', False): + if params.ignore_errors: cmds.append('--ignore-errors') - if params.get('verbose', False): + if params.verbose: cmds.append('--verbose') - #todo: cwd - proc = subprocess.Popen(cmds, cwd=BASE_PATH) + base_path = os.path.dirname(os.path.abspath(__file__)) + parent = os.path.dirname(base_path) + proc = subprocess.Popen(cmds, cwd=parent) executor_instances.register(ExecutorInstance(ip=host, port=port)) + + def handle_exit_signals(signal, frame): + proc.terminate() + sys.exit(0) + + signal.signal(signal.SIGINT, handle_exit_signals) + signal.signal(signal.SIGTERM, handle_exit_signals) + return proc def prepare(args): @@ -142,7 +153,7 @@ def prepare(args): else: nonce = args.nonce if args.start_instance: - start_translator_client_proc(args.host, args.port + 1, nonce, args) + return start_translator_client_proc(args.host, args.port + 1, nonce, args) #todo: restart if crash #todo: cache results @@ -150,13 +161,18 @@ def prepare(args): #todo: store images while in queue #todo: add docs #todo: index doesnt work properly in the list(is_client_disconnected is not executed immediatly/does not update the index) +#todo: enable config in html pages if __name__ == '__main__': import uvicorn from args import parse_arguments args = parse_arguments() - prepare(args) + args.start_instance = True + proc = prepare(args) print("Nonce: "+nonce) - executor_instances.register(ExecutorInstance(ip="127.0.0.1", port=5003)) - uvicorn.run(app, host=args.host, port=args.port) + try: + uvicorn.run(app, host=args.host, port=args.port) + except Exception: + if proc: + proc.terminate() From 8aeff8e21fd1c80540da00661f5166fe95624ad8 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:36:36 +0100 Subject: [PATCH 57/75] delete old code --- manga_translator/mode/web.py | 153 ------- manga_translator/server/manual.html | 352 ---------------- manga_translator/server/ui.html | 377 ----------------- manga_translator/server/web_main.py | 629 ---------------------------- manga_translator/server/ws.proto | 36 -- manga_translator/server/ws_pb2.py | 32 -- 6 files changed, 1579 deletions(-) delete mode 100644 manga_translator/mode/web.py delete mode 100644 manga_translator/server/manual.html delete mode 100644 manga_translator/server/ui.html delete mode 100644 manga_translator/server/web_main.py delete mode 100644 manga_translator/server/ws.proto delete mode 100644 manga_translator/server/ws_pb2.py diff --git a/manga_translator/mode/web.py b/manga_translator/mode/web.py deleted file mode 100644 index 3f0ccb889..000000000 --- a/manga_translator/mode/web.py +++ /dev/null @@ -1,153 +0,0 @@ -import asyncio -import time - -import requests - -from manga_translator import logger, Context, Config -from manga_translator.mode.local import MangaTranslatorLocal -from manga_translator.translators import TRANSLATORS -from manga_translator.utils import add_file_logger, remove_file_logger - - -class MangaTranslatorWeb(MangaTranslatorLocal): - """ - Translator client that executes tasks on behalf of the webserver in web_main.py. - """ - - def __init__(self, params: dict = None): - super().__init__(params) - self.host = params.get('host', '127.0.0.1') - if self.host == '0.0.0.0': - self.host = '127.0.0.1' - self.port = params.get('port', 5003) - self.nonce = params.get('nonce', '') - self.ignore_errors = params.get('ignore_errors', True) - self._task_id = None - self._params = None - - async def _init_connection(self): - available_translators = [] - from ..translators import MissingAPIKeyException, get_translator - for key in TRANSLATORS: - try: - get_translator(key) - available_translators.append(key) - except MissingAPIKeyException: - pass - - data = { - 'nonce': self.nonce, - 'capabilities': { - 'translators': available_translators, - }, - } - requests.post(f'http://{self.host}:{self.port}/connect-internal', json=data) - - async def _send_state(self, state: str, finished: bool): - # wait for translation to be saved first (bad solution?) - finished = finished and not state == 'finished' - while True: - try: - data = { - 'task_id': self._task_id, - 'nonce': self.nonce, - 'state': state, - 'finished': finished, - } - requests.post(f'http://{self.host}:{self.port}/task-update-internal', json=data, timeout=20) - break - except Exception: - # if translation is finished server has to know - if finished: - continue - else: - break - - def _get_task(self): - try: - rjson = requests.get(f'http://{self.host}:{self.port}/task-internal?nonce={self.nonce}', - timeout=3600).json() - return rjson.get('task_id'), rjson.get('data') - except Exception: - return None, None - - async def listen(self, translation_params: dict = None): - """ - Listens for translation tasks from web server. - """ - logger.info('Waiting for translation tasks') - - await self._init_connection() - self.add_progress_hook(self._send_state) - - while True: - self._task_id, self._params = self._get_task() - if self._params and 'exit' in self._params: - break - if not (self._task_id and self._params): - await asyncio.sleep(0.1) - continue - - self.result_sub_folder = self._task_id - logger.info(f'Processing task {self._task_id}') - if translation_params is not None: - # Combine default params with params chosen by webserver - for p, default_value in translation_params.items(): - current_value = self._params.get(p) - self._params[p] = current_value if current_value is not None else default_value - if self.verbose: - # Write log file - log_file = self._result_path('log.txt') - add_file_logger(log_file) - - # final.png will be renamed if format param is set - await self.translate_path(self._result_path('input.png'), self._result_path('final.png'), - params=self._params) - print() - - if self.verbose: - remove_file_logger(log_file) - self._task_id = None - self._params = None - self.result_sub_folder = '' - - async def _run_text_translation(self, config: Config, ctx: Context): - # Run machine translation as reference for manual translation (if `--translator=none` is not set) - text_regions = await super()._run_text_translation(config, ctx) - - #todo: manual never set - if ctx.get('manual', False): - logger.info('Waiting for user input from manual translation') - requests.post(f'http://{self.host}:{self.port}/request-manual-internal', json={ - 'task_id': self._task_id, - 'nonce': self.nonce, - 'texts': [r.text for r in text_regions], - 'translations': [r.translation for r in text_regions], - }, timeout=20) - - # wait for at most 1 hour for manual translation - wait_until = time.time() + 3600 - while time.time() < wait_until: - ret = requests.post(f'http://{self.host}:{self.port}/get-manual-result-internal', json={ - 'task_id': self._task_id, - 'nonce': self.nonce - }, timeout=20).json() - if 'result' in ret: - manual_translations = ret['result'] - if isinstance(manual_translations, str): - if manual_translations == 'error': - return [] - i = 0 - for translation in manual_translations: - if not translation.strip(): - text_regions.pop(i) - i = i - 1 - else: - text_regions[i].translation = translation - text_regions[i].target_lang = config.translator.translator_gen.langs[-1] - i = i + 1 - break - elif 'cancel' in ret: - return 'cancel' - await asyncio.sleep(0.1) - return text_regions diff --git a/manga_translator/server/manual.html b/manga_translator/server/manual.html deleted file mode 100644 index 9e4a2befb..000000000 --- a/manga_translator/server/manual.html +++ /dev/null @@ -1,352 +0,0 @@ - - - - - - Manga Image Translator - - - - -
- - -
-
-
-

Raw Text

-

Translation

-
-
- - -
-
- -
-
-
-

Upload image by dragging and dropping images onto the dashed region

- - -
- - -
-
- -
-
- - - - - - \ No newline at end of file diff --git a/manga_translator/server/ui.html b/manga_translator/server/ui.html deleted file mode 100644 index 88dcb9fc3..000000000 --- a/manga_translator/server/ui.html +++ /dev/null @@ -1,377 +0,0 @@ - - - - - Image/Manga Translator - - - - - - - -
-
-

Image/Manga Translator

-
-
-
- -
- - -
-
-
- -
- - -
-
-
- -
- - -
-
-
- -
- - -
-
-
- -
- - -
-
-
-
-
- - -
-
-
-
{{ statusText }}
- -
-
- -
{{ statusText }}
-
-
- -
-
- Please consider supporting us by - Ko-fi - or - Patreon! -
- Source Code -
-
-
- - - diff --git a/manga_translator/server/web_main.py b/manga_translator/server/web_main.py deleted file mode 100644 index 9cc61c0f4..000000000 --- a/manga_translator/server/web_main.py +++ /dev/null @@ -1,629 +0,0 @@ -import io -import os -import sys -import re -import shutil -import mimetypes -import time -import asyncio -import subprocess -import secrets -from io import BytesIO -from PIL import Image -from aiohttp import web -from collections import deque -from imagehash import phash - -SERVER_DIR_PATH = os.path.dirname(os.path.realpath(__file__)) -BASE_PATH = os.path.dirname(os.path.dirname(SERVER_DIR_PATH)) - -# TODO: Get capabilities through api -VALID_LANGUAGES = { - 'CHS': 'Chinese (Simplified)', - 'CHT': 'Chinese (Traditional)', - 'CSY': 'Czech', - 'NLD': 'Dutch', - 'ENG': 'English', - 'FRA': 'French', - 'DEU': 'German', - 'HUN': 'Hungarian', - 'ITA': 'Italian', - 'JPN': 'Japanese', - 'KOR': 'Korean', - 'PLK': 'Polish', - 'PTB': 'Portuguese (Brazil)', - 'ROM': 'Romanian', - 'RUS': 'Russian', - 'ESP': 'Spanish', - 'TRK': 'Turkish', - 'UKR': 'Ukrainian', - 'VIN': 'Vietnamese', - 'ARA': 'Arabic', -} -# Whitelists -VALID_DETECTORS = set(['default', 'ctd']) -VALID_DIRECTIONS = set(['auto', 'h', 'v']) -VALID_TRANSLATORS = [ - 'youdao', - 'baidu', - 'google', - 'deepl', - 'deepseek', - 'papago', - 'caiyun', - 'gpt3.5', - 'gpt4', - 'nllb', - 'nllb_big', - 'sugoi', - 'jparacrawl', - 'jparacrawl_big', - 'm2m100', - 'm2m100_big', - 'qwen2', - 'qwen2_big', - 'sakura', - 'none', - 'original', -] - -MAX_ONGOING_TASKS = 1 -MAX_IMAGE_SIZE_PX = 8000**2 - -# Time to wait for web client to send a request to /task-state request -# before that web clients task gets removed from the queue -WEB_CLIENT_TIMEOUT = -1 - -# Time before finished tasks get removed from memory -FINISHED_TASK_REMOVE_TIMEOUT = 1800 - -# Auto deletes old task folders upon reaching this disk space limit -DISK_SPACE_LIMIT = 5e7 # 50mb - -# TODO: Turn into dict with translator client id as key for support of multiple translator clients -ONGOING_TASKS = [] -FINISHED_TASKS = [] -NONCE = '' -QUEUE = deque() -TASK_DATA = {} -TASK_STATES = {} -DEFAULT_TRANSLATION_PARAMS = {} -AVAILABLE_TRANSLATORS = [] -FORMAT = '' - -app = web.Application(client_max_size = 1024 * 1024 * 50) -routes = web.RouteTableDef() - - -def constant_compare(a, b): - if isinstance(a, str): - a = a.encode('utf-8') - if isinstance(b, str): - b = b.encode('utf-8') - if not isinstance(a, bytes) or not isinstance(b, bytes): - return False - if len(a) != len(b): - return False - - result = 0 - for x, y in zip(a, b): - result |= x ^ y - return result == 0 - -@routes.get("/") -async def index_async(request): - global AVAILABLE_TRANSLATORS - with open(os.path.join(SERVER_DIR_PATH, 'ui.html'), 'r', encoding='utf8') as fp: - content = fp.read() - if AVAILABLE_TRANSLATORS: - content = re.sub(r'(?<=translator: )(.*)(?=,)', repr(AVAILABLE_TRANSLATORS[0]), content) - content = re.sub(r'(?<=validTranslators: )(\[.*\])(?=,)', repr(AVAILABLE_TRANSLATORS), content) - return web.Response(text=content, content_type='text/html') - -@routes.get("/manual") -async def index_async(request): - with open(os.path.join(SERVER_DIR_PATH, 'manual.html'), 'r', encoding='utf8') as fp: - return web.Response(text=fp.read(), content_type='text/html') - -@routes.get("/result/{taskid}") -async def result_async(request): - global FORMAT - filepath = os.path.join('result', request.match_info.get('taskid'), f'final.{FORMAT}') - if not os.path.exists(filepath): - return web.Response(status=404, text='Not Found') - stream = BytesIO() - with open(filepath, 'rb') as f: - stream.write(f.read()) - mime = mimetypes.guess_type(filepath)[0] or 'application/octet-stream' - return web.Response(body=stream.getvalue(), content_type=mime) - -@routes.get("/result-type") -async def file_type_async(request): - global FORMAT - return web.Response(text=f'{FORMAT}') - -@routes.get("/queue-size") -async def queue_size_async(request): - return web.json_response({'size' : len(QUEUE)}) - -async def handle_post(request): - data = await request.post() - detection_size = None - selected_translator = 'youdao' - target_language = 'CHS' - detector = 'default' - direction = 'auto' - if 'target_lang' in data: - target_language = data['target_lang'].upper() - # TODO: move dicts to their own files to reduce load time - if target_language not in VALID_LANGUAGES: - target_language = 'CHS' - if 'detector' in data: - detector = data['detector'].lower() - if detector not in VALID_DETECTORS: - detector = 'default' - if 'direction' in data: - direction = data['direction'].lower() - if direction not in VALID_DIRECTIONS: - direction = 'auto' - if 'translator' in data: - selected_translator = data['translator'].lower() - if selected_translator not in AVAILABLE_TRANSLATORS: - selected_translator = AVAILABLE_TRANSLATORS[0] - if 'size' in data: - size_text = data['size'].upper() - if size_text == 'S': - detection_size = 1024 - elif size_text == 'M': - detection_size = 1536 - elif size_text == 'L': - detection_size = 2048 - elif size_text == 'X': - detection_size = 2560 - if 'file' in data: - file_field = data['file'] - content = file_field.file.read() - elif 'url' in data: - from aiohttp import ClientSession - async with ClientSession() as session: - async with session.get(data['url']) as resp: - if resp.status == 200: - content = await resp.read() - else: - return web.json_response({'status': 'error'}) - else: - return web.json_response({'status': 'error'}) - try: - img = Image.open(io.BytesIO(content)) - img.verify() - img = Image.open(io.BytesIO(content)) - if img.width * img.height > MAX_IMAGE_SIZE_PX: - return web.json_response({'status': 'error-too-large'}) - except Exception: - return web.json_response({'status': 'error-img-corrupt'}) - return img, detection_size, selected_translator, target_language, detector, direction - -@routes.post("/run") -async def run_async(request): - global FORMAT - x = await handle_post(request) - if isinstance(x, tuple): - img, size, selected_translator, target_language, detector, direction = x - else: - return x - task_id = f'{phash(img, hash_size = 16)}-{size}-{selected_translator}-{target_language}-{detector}-{direction}' - print(f'New `run` task {task_id}') - if os.path.exists(f'result/{task_id}/final.{FORMAT}'): - # Add a console output prompt to avoid the console from appearing to be stuck without execution when the translated image is hit consecutively. - print(f'Using cached result for {task_id}') - return web.json_response({'task_id' : task_id, 'status': 'successful'}) - # elif os.path.exists(f'result/{task_id}'): - # # either image is being processed or error occurred - # if task_id not in TASK_STATES: - # # error occurred - # return web.json_response({'state': 'error'}) - else: - os.makedirs(f'result/{task_id}/', exist_ok=True) - img.save(f'result/{task_id}/input.png') - QUEUE.append(task_id) - now = time.time() - TASK_DATA[task_id] = { - 'detection_size': size, - 'translator': selected_translator, - 'target_lang': target_language, - 'detector': detector, - 'direction': direction, - 'created_at': now, - 'requested_at': now, - } - TASK_STATES[task_id] = { - 'info': 'pending', - 'finished': False, - } - while True: - await asyncio.sleep(0.1) - if task_id not in TASK_STATES: - break - state = TASK_STATES[task_id] - if state['finished']: - break - return web.json_response({'task_id': task_id, 'status': 'successful' if state['finished'] else state['info']}) - - -@routes.post("/connect-internal") -async def index_async(request): - global NONCE, VALID_TRANSLATORS, AVAILABLE_TRANSLATORS - # Can be extended to allow support for multiple translators - rqjson = await request.json() - if constant_compare(rqjson.get('nonce'), NONCE): - capabilities = rqjson.get('capabilities') - if capabilities: - translators = capabilities.get('translators') - AVAILABLE_TRANSLATORS.clear() - for key in VALID_TRANSLATORS: - if key in translators: - AVAILABLE_TRANSLATORS.append(key) - return web.json_response({}) - -@routes.get("/task-internal") -async def get_task_async(request): - """ - Called by the translator to get a translation task. - """ - global NONCE, ONGOING_TASKS, DEFAULT_TRANSLATION_PARAMS - if constant_compare(request.rel_url.query.get('nonce'), NONCE): - if len(QUEUE) > 0 and len(ONGOING_TASKS) < MAX_ONGOING_TASKS: - task_id = QUEUE.popleft() - if task_id in TASK_DATA: - data = TASK_DATA[task_id] - for p, default_value in DEFAULT_TRANSLATION_PARAMS.items(): - current_value = data.get(p) - data[p] = current_value if current_value is not None else default_value - if not TASK_DATA[task_id].get('manual', False): - ONGOING_TASKS.append(task_id) - return web.json_response({'task_id': task_id, 'data': data}) - else: - return web.json_response({}) - else: - return web.json_response({}) - return web.json_response({}) - -async def manual_trans_task(task_id, texts, translations): - if task_id not in TASK_DATA: - TASK_DATA[task_id] = {} - if texts and translations: - TASK_DATA[task_id]['trans_request'] = [{'s': txt, 't': trans} for txt, trans in zip(texts, translations)] - else: - TASK_DATA[task_id]['trans_result'] = [] - print('Manual translation complete') - -@routes.post("/cancel-manual-request") -async def cancel_manual_translation(request): - rqjson = (await request.json()) - if 'task_id' in rqjson: - task_id = rqjson['task_id'] - if task_id in TASK_DATA: - TASK_DATA[task_id]['cancel'] = ' ' - while True: - await asyncio.sleep(0.1) - if TASK_STATES[task_id]['info'].startswith('error'): - ret = web.json_response({'task_id': task_id, 'status': 'error'}) - break - if TASK_STATES[task_id]['finished']: - ret = web.json_response({'task_id': task_id, 'status': 'cancelled'}) - break - del TASK_STATES[task_id] - del TASK_DATA[task_id] - return ret - return web.json_response({}) - -@routes.post("/post-manual-result") -async def post_translation_result(request): - rqjson = (await request.json()) - if 'trans_result' in rqjson and 'task_id' in rqjson: - task_id = rqjson['task_id'] - if task_id in TASK_DATA: - trans_result = [r['t'] for r in rqjson['trans_result']] - TASK_DATA[task_id]['trans_result'] = trans_result - while True: - await asyncio.sleep(0.1) - if TASK_STATES[task_id]['info'].startswith('error'): - ret = web.json_response({'task_id': task_id, 'status': 'error'}) - break - if TASK_STATES[task_id]['finished']: - ret = web.json_response({'task_id': task_id, 'status': 'successful'}) - break - # remove old tasks - del TASK_STATES[task_id] - del TASK_DATA[task_id] - return ret - return web.json_response({}) - -@routes.post("/request-manual-internal") -async def request_translation_internal(request): - global NONCE - rqjson = await request.json() - if constant_compare(rqjson.get('nonce'), NONCE): - task_id = rqjson['task_id'] - if task_id in TASK_DATA: - if TASK_DATA[task_id].get('manual', False): - # manual translation - asyncio.gather(manual_trans_task(task_id, rqjson['texts'], rqjson['translations'])) - return web.json_response({}) - -@routes.post("/get-manual-result-internal") -async def get_translation_internal(request): - global NONCE - rqjson = (await request.json()) - if constant_compare(rqjson.get('nonce'), NONCE): - task_id = rqjson['task_id'] - if task_id in TASK_DATA: - if 'trans_result' in TASK_DATA[task_id]: - return web.json_response({'result': TASK_DATA[task_id]['trans_result']}) - elif 'cancel' in TASK_DATA[task_id]: - return web.json_response({'cancel':''}) - return web.json_response({}) - -@routes.get("/task-state") -async def get_task_state_async(request): - """ - Web API for getting the state of an on-going translation task from the website. - - Is periodically called from ui.html. Once it returns a finished state, - the web client will try to fetch the corresponding image through /result/ - """ - task_id = request.query.get('taskid') - if task_id and task_id in TASK_STATES and task_id in TASK_DATA: - state = TASK_STATES[task_id] - data = TASK_DATA[task_id] - res_dict = { - 'state': state['info'], - 'finished': state['finished'], - } - data['requested_at'] = time.time() - try: - res_dict['waiting'] = QUEUE.index(task_id) + 1 - except Exception: - res_dict['waiting'] = 0 - res = web.json_response(res_dict) - - return res - return web.json_response({'state': 'error'}) - -@routes.post("/task-update-internal") -async def post_task_update_async(request): - """ - Lets the translator update the task state it is working on. - """ - global NONCE, ONGOING_TASKS, FINISHED_TASKS - rqjson = (await request.json()) - if constant_compare(rqjson.get('nonce'), NONCE): - task_id = rqjson['task_id'] - if task_id in TASK_STATES and task_id in TASK_DATA: - TASK_STATES[task_id] = { - 'info': rqjson['state'], - 'finished': rqjson['finished'], - } - if rqjson['finished'] and not TASK_DATA[task_id].get('manual', False): - try: - i = ONGOING_TASKS.index(task_id) - FINISHED_TASKS.append(ONGOING_TASKS.pop(i)) - except ValueError: - pass - print(f'Task state {task_id} to {TASK_STATES[task_id]}') - return web.json_response({}) - -@routes.post("/submit") -async def submit_async(request): - """Adds new task to the queue. Called by web client in ui.html when submitting an image.""" - global FORMAT - x = await handle_post(request) - if isinstance(x, tuple): - img, size, selected_translator, target_language, detector, direction = x - else: - return x - task_id = f'{phash(img, hash_size = 16)}-{size}-{selected_translator}-{target_language}-{detector}-{direction}' - now = time.time() - print(f'New `submit` task {task_id}') - if os.path.exists(f'result/{task_id}/final.{FORMAT}'): - TASK_STATES[task_id] = { - 'info': 'saved', - 'finished': True, - } - TASK_DATA[task_id] = { - 'detection_size': size, - 'translator': selected_translator, - 'target_lang': target_language, - 'detector': detector, - 'direction': direction, - 'created_at': now, - 'requested_at': now, - } - elif task_id not in TASK_DATA or task_id not in TASK_STATES: - os.makedirs(f'result/{task_id}/', exist_ok=True) - img.save(f'result/{task_id}/input.png') - QUEUE.append(task_id) - TASK_STATES[task_id] = { - 'info': 'pending', - 'finished': False, - } - TASK_DATA[task_id] = { - 'detection_size': size, - 'translator': selected_translator, - 'target_lang': target_language, - 'detector': detector, - 'direction': direction, - 'created_at': now, - 'requested_at': now, - } - return web.json_response({'task_id': task_id, 'status': 'successful'}) - -@routes.post("/manual-translate") -async def manual_translate_async(request): - x = await handle_post(request) - if isinstance(x, tuple): - img, size, selected_translator, target_language, detector, direction = x - else: - return x - task_id = secrets.token_hex(16) - print(f'New `manual-translate` task {task_id}') - os.makedirs(f'result/{task_id}/', exist_ok=True) - img.save(f'result/{task_id}/input.png') - now = time.time() - QUEUE.append(task_id) - # TODO: Add form fields to manual translate website - TASK_DATA[task_id] = { - # 'detection_size': size, - 'manual': True, - # 'detector': detector, - # 'direction': direction, - 'created_at': now, - 'requested_at': now, - } - print(TASK_DATA[task_id]) - TASK_STATES[task_id] = { - 'info': 'pending', - 'finished': False, - } - while True: - await asyncio.sleep(1) - if 'trans_request' in TASK_DATA[task_id]: - return web.json_response({'task_id' : task_id, 'status': 'pending', 'trans_result': TASK_DATA[task_id]['trans_request']}) - if TASK_STATES[task_id]['info'].startswith('error'): - break - if TASK_STATES[task_id]['finished']: - # no texts detected - return web.json_response({'task_id' : task_id, 'status': 'successful'}) - return web.json_response({'task_id' : task_id, 'status': 'error'}) - -app.add_routes(routes) - - -def generate_nonce(): - return secrets.token_hex(16) - -def start_translator_client_proc(host: str, port: int, nonce: str, params: dict): - os.environ['MT_WEB_NONCE'] = nonce - cmds = [ - sys.executable, - '-m', 'manga_translator', - '--mode', 'web_client', - '--host', host, - '--port', str(port), - ] - if params.get('use_gpu', False): - cmds.append('--use-gpu') - if params.get('use_gpu_limited', False): - cmds.append('--use-gpu-limited') - if params.get('ignore_errors', False): - cmds.append('--ignore-errors') - if params.get('verbose', False): - cmds.append('--verbose') - - proc = subprocess.Popen(cmds, cwd=BASE_PATH) - return proc - -async def start_async_app(host: str, port: int, nonce: str, translation_params: dict = None): - global NONCE, DEFAULT_TRANSLATION_PARAMS, FORMAT - # Secret to secure communication between webserver and translator clients - NONCE = nonce - DEFAULT_TRANSLATION_PARAMS = translation_params or {} - FORMAT = DEFAULT_TRANSLATION_PARAMS.get('format') or 'jpg' - DEFAULT_TRANSLATION_PARAMS['format'] = FORMAT - - # Schedule web server to run - runner = web.AppRunner(app) - await runner.setup() - site = web.TCPSite(runner, host, port) - await site.start() - print(f'Serving up app on http://{host}:{port}') - - return runner, site - -async def dispatch(host: str, port: int, nonce: str = None, translation_params: dict = None): - global ONGOING_TASKS, FINISHED_TASKS - - if nonce is None: - nonce = os.getenv('MT_WEB_NONCE', generate_nonce()) - - # Start web service - runner, site = await start_async_app(host, port, nonce, translation_params) - - # Create client process that will execute translation tasks - print() - client_process = start_translator_client_proc(host, port, nonce, translation_params) - - # Get all prior finished tasks - os.makedirs('result/', exist_ok=True) - for f in os.listdir('result/'): - if os.path.isdir(f'result/{f}') and re.search(r'^\w+-\d+-\w+-\w+-\w+-\w+$', f): - FINISHED_TASKS.append(f) - FINISHED_TASKS = list(sorted(FINISHED_TASKS, key=lambda task_id: os.path.getmtime(f'result/{task_id}'))) - - try: - while True: - await asyncio.sleep(1) - - # Restart client if OOM or similar errors occurred - if client_process.poll() is not None: - # if client_process.poll() == 0: - # break - print('Restarting translator process') - if len(ONGOING_TASKS) > 0: - tid = ONGOING_TASKS.pop(0) - state = TASK_STATES[tid] - state['info'] = 'error' - state['finished'] = True - client_process = start_translator_client_proc(host, port, nonce, translation_params) - - # Filter queued and finished tasks - now = time.time() - to_del_task_ids = set() - for tid, s in TASK_STATES.items(): - d = TASK_DATA[tid] - # Remove finished tasks after 30 minutes - if s['finished'] and now - d['created_at'] > FINISHED_TASK_REMOVE_TIMEOUT: - to_del_task_ids.add(tid) - - # Remove queued tasks without web client - elif WEB_CLIENT_TIMEOUT >= 0: - if tid not in ONGOING_TASKS and not s['finished'] and now - d['requested_at'] > WEB_CLIENT_TIMEOUT: - print('REMOVING TASK', tid) - to_del_task_ids.add(tid) - try: - QUEUE.remove(tid) - except Exception: - pass - - for tid in to_del_task_ids: - del TASK_STATES[tid] - del TASK_DATA[tid] - - # Delete oldest folder if disk space is becoming sparse - if DISK_SPACE_LIMIT >= 0 and len(FINISHED_TASKS) > 0 and shutil.disk_usage('result/')[2] < DISK_SPACE_LIMIT: - tid = FINISHED_TASKS.pop(0) - try: - p = f'result/{tid}' - print(f'REMOVING OLD TASK RESULT: {p}') - shutil.rmtree(p) - except FileNotFoundError: - pass - except: - if client_process.poll() is None: - # client_process.terminate() - client_process.kill() - await runner.cleanup() - raise - -if __name__ == '__main__': - from ..args import parser - - args = parser.parse_args() - - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - try: - runner, site = loop.run_until_complete(dispatch(args.host, args.port, translation_params=vars(args))) - except KeyboardInterrupt: - pass diff --git a/manga_translator/server/ws.proto b/manga_translator/server/ws.proto deleted file mode 100644 index 61367b988..000000000 --- a/manga_translator/server/ws.proto +++ /dev/null @@ -1,36 +0,0 @@ -syntax = "proto3"; - -package ws; - -message NewTask { - string id = 1; - - string source_image = 2; - - string target_language = 3; - string detector = 4; - string direction = 5; - string translator = 6; - string size = 7; - - string translation_mask = 8; -} - -message Status { - string id = 1; - string status = 2; -} - -message FinishTask { - string id = 1; - bool success = 2; - bool has_translation_mask = 3; -} - -message WebSocketMessage { - oneof message { - NewTask new_task = 1; - Status status = 2; - FinishTask finish_task = 3; - } -} diff --git a/manga_translator/server/ws_pb2.py b/manga_translator/server/ws_pb2.py deleted file mode 100644 index 7fcfddfd8..000000000 --- a/manga_translator/server/ws_pb2.py +++ /dev/null @@ -1,32 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: ws.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import symbol_database as _symbol_database -from google.protobuf.internal import builder as _builder -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x08ws.proto\x12\x02ws\"\xe3\x01\n\x07NewTask\x12\n\n\x02id\x18\x01 \x01(\t\x12\x14\n\x0csource_image\x18\x02 \x01(\t\x12\x1b\n\x13source_image_bearer\x18\t \x01(\t\x12\x17\n\x0ftarget_language\x18\x03 \x01(\t\x12\x10\n\x08\x64\x65tector\x18\x04 \x01(\t\x12\x11\n\tdirection\x18\x05 \x01(\t\x12\x12\n\ntranslator\x18\x06 \x01(\t\x12\x0c\n\x04size\x18\x07 \x01(\t\x12\x18\n\x10translation_mask\x18\x08 \x01(\t\x12\x1f\n\x17translation_mask_bearer\x18\n \x01(\t\"$\n\x06Status\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\t\"G\n\nFinishTask\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x07success\x18\x02 \x01(\x08\x12\x1c\n\x14has_translation_mask\x18\x03 \x01(\x08\"\x83\x01\n\x10WebSocketMessage\x12\x1f\n\x08new_task\x18\x01 \x01(\x0b\x32\x0b.ws.NewTaskH\x00\x12\x1c\n\x06status\x18\x02 \x01(\x0b\x32\n.ws.StatusH\x00\x12%\n\x0b\x66inish_task\x18\x03 \x01(\x0b\x32\x0e.ws.FinishTaskH\x00\x42\t\n\x07messageb\x06proto3') - -_globals = globals() -_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'ws_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - - DESCRIPTOR._options = None - _globals['_NEWTASK']._serialized_start=17 - _globals['_NEWTASK']._serialized_end=244 - _globals['_STATUS']._serialized_start=246 - _globals['_STATUS']._serialized_end=282 - _globals['_FINISHTASK']._serialized_start=284 - _globals['_FINISHTASK']._serialized_end=355 - _globals['_WEBSOCKETMESSAGE']._serialized_start=358 - _globals['_WEBSOCKETMESSAGE']._serialized_end=489 -# @@protoc_insertion_point(module_scope) From aef0a275d83177732f0e2f4e29db9278a18c435b Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:55:54 +0100 Subject: [PATCH 58/75] changed queue a bit --- server/instance.py | 8 ++++---- server/myqueue.py | 39 +++++++++++++++++++-------------------- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/server/instance.py b/server/instance.py index e844289dd..90d6c4a1a 100644 --- a/server/instance.py +++ b/server/instance.py @@ -1,6 +1,5 @@ -import pickle from asyncio import Event, Lock -from typing import List, Optional +from typing import List from PIL import Image from pydantic import BaseModel @@ -49,10 +48,11 @@ async def find_executor(self) -> ExecutorInstance: instance.busy = True return instance - def free_executor(self, instance: ExecutorInstance): + async def free_executor(self, instance: ExecutorInstance): + from server.myqueue import task_queue instance.free_executor() self.event.set() self.event.clear() - + await task_queue.update_event() executor_instances: Executors = Executors() diff --git a/server/myqueue.py b/server/myqueue.py index b9174d936..267a97302 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -1,9 +1,8 @@ import asyncio -from typing import List, Dict +from typing import List, Dict, Optional from fastapi import HTTPException from starlette.requests import Request -from starlette.responses import StreamingResponse from manga_translator import Context from server.instance import executor_instances @@ -17,16 +16,19 @@ def __init__(self): def add_task(self, task): self.queue.append(task) - def get_pos(self, task): - return self.queue.index(task) - - def update_event(self): + def get_pos(self, task) -> Optional[int]: + try: + return self.queue.index(task) + except ValueError: + return None + async def update_event(self): + self.queue = [ctx for ctx in self.queue if not await is_client_disconnected(ctx.req)] self.queue_event.set() self.queue_event.clear() - def remove(self, task): + async def remove(self, task): self.queue.remove(task) - self.update_event() + await self.update_event() async def wait_for_event(self): await self.queue_event.wait() @@ -42,18 +44,23 @@ async def wait_in_queue(task, notify: NotifyType): """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" while True: queue_pos = task_queue.get_pos(task) + if not queue_pos: + if notify: + return + else: + raise HTTPException(500, detail="User is no longer connected") # just for the logs if notify: notify(3, str(queue_pos).encode('utf-8')) if queue_pos < executor_instances.free_executors(): if await is_client_disconnected(task.req): - task_queue.remove(task) - task_queue.update_event() + await task_queue.update_event() if notify: return else: raise HTTPException(500, detail="User is no longer connected") #just for the logs + instance = await executor_instances.find_executor() - task_queue.remove(task) + await task_queue.remove(task) if notify: notify(4, b"") if notify: @@ -61,19 +68,11 @@ async def wait_in_queue(task, notify: NotifyType): else: result = await instance.sent(task.image, task.config) - executor_instances.free_executor(instance) - task_queue.update_event() + await executor_instances.free_executor(instance) if notify: return else: return result else: - if await is_client_disconnected(task.req): - task_queue.remove(task) - task_queue.update_event() - if notify: - return - else: - raise HTTPException(500, detail="User is no longer connected") #just for the logs await task_queue.wait_for_event() \ No newline at end of file From cc37ca2371586e70c837d4e7ee0123f9dabd8a1a Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Thu, 21 Nov 2024 18:17:49 +0100 Subject: [PATCH 59/75] custom queue element with deconstructor --- server/main.py | 28 ++++++----------- server/myqueue.py | 59 ++++++++++++++++++++++++++---------- server/request_extraction.py | 24 ++++++--------- 3 files changed, 62 insertions(+), 49 deletions(-) diff --git a/server/main.py b/server/main.py index 773908f41..5797a610e 100644 --- a/server/main.py +++ b/server/main.py @@ -1,24 +1,22 @@ import io import os import secrets +import shutil import signal import subprocess import sys from argparse import Namespace -from builtins import bytes -from typing import Union -from fastapi import FastAPI, Request, HTTPException, Header, Form, UploadFile +from fastapi import FastAPI, Request, HTTPException, Header, UploadFile from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse from pathlib import Path from pydantic import BaseModel -from manga_translator import Config from server.instance import ExecutorInstance, executor_instances from server.myqueue import task_queue -from server.request_extraction import get_ctx, while_streaming, TranslateRequest +from server.request_extraction import get_ctx, while_streaming from server.to_json import to_json, Translation app = FastAPI() @@ -54,22 +52,11 @@ def transform_to_image(ctx): def transform_to_json(ctx): return str(to_json(ctx)).encode("utf-8") -async def parse_request( - req: Request, - image: Union[str, bytes] = Form(...), - config: str = Form(...), -): - if req.headers.get('content-type').startswith('multipart'): - config = json.loads(config) - return TranslateRequest(image=image, config=Config(**config)) - else: - return None @app.post("/translate/json", response_model=list[Translation], response_description="json strucure inspired by the ichigo translator extension") async def json(req: Request): ctx = await get_ctx(req) - json = to_json(ctx) - return JSONResponse(content=json) + return JSONResponse(content=to_json(ctx)) @app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks") async def bytes(req: Request): @@ -154,13 +141,16 @@ def prepare(args): nonce = args.nonce if args.start_instance: return start_translator_client_proc(args.host, args.port + 1, nonce, args) + folder_name= "upload-cache" + if os.path.exists(folder_name): + shutil.rmtree(folder_name) + os.makedirs(folder_name) #todo: restart if crash #todo: cache results #todo: cleanup cache -#todo: store images while in queue + #todo: add docs -#todo: index doesnt work properly in the list(is_client_disconnected is not executed immediatly/does not update the index) #todo: enable config in html pages if __name__ == '__main__': diff --git a/server/myqueue.py b/server/myqueue.py index 267a97302..77f681d60 100644 --- a/server/myqueue.py +++ b/server/myqueue.py @@ -1,32 +1,64 @@ import asyncio -from typing import List, Dict, Optional +import os +from typing import List, Optional +from PIL import Image from fastapi import HTTPException -from starlette.requests import Request +from fastapi.requests import Request -from manga_translator import Context +from manga_translator import Config from server.instance import executor_instances from server.sent_data_internal import NotifyType +class QueueElement: + req: Request + image: Image.Image | str + config: Config + + def __init__(self, req: Request, image: Image.Image, config: Config, length): + self.req = req + if length > 10: + #todo: store image in "upload-cache" folder + self.image = image + else: + self.image = image + self.config = config + + def get_image(self)-> Image: + if isinstance(self.image, str): + return Image.open(self.image) + else: + return self.image + + def __del__(self): + if isinstance(self.image, str): + os.remove(self.image) + + async def is_client_disconnected(self) -> bool: + if await self.req.is_disconnected(): + return True + return False + + class TaskQueue: def __init__(self): - self.queue: List[Context] = [] + self.queue: List[QueueElement] = [] self.queue_event: asyncio.Event = asyncio.Event() - def add_task(self, task): + def add_task(self, task: QueueElement): self.queue.append(task) - def get_pos(self, task) -> Optional[int]: + def get_pos(self, task: QueueElement) -> Optional[int]: try: return self.queue.index(task) except ValueError: return None async def update_event(self): - self.queue = [ctx for ctx in self.queue if not await is_client_disconnected(ctx.req)] + self.queue = [task for task in self.queue if not await task.is_client_disconnected()] self.queue_event.set() self.queue_event.clear() - async def remove(self, task): + async def remove(self, task: QueueElement): self.queue.remove(task) await self.update_event() @@ -35,16 +67,11 @@ async def wait_for_event(self): task_queue = TaskQueue() -async def is_client_disconnected(request: Request) -> bool: - if await request.is_disconnected(): - return True - return False - -async def wait_in_queue(task, notify: NotifyType): +async def wait_in_queue(task: QueueElement, notify: NotifyType): """Will get task position report it. If its in the range of translators then it will try to aquire an instance(blockig) and sent a task to it. when done the item will be removed from the queue and result will be returned""" while True: queue_pos = task_queue.get_pos(task) - if not queue_pos: + if queue_pos is None: if notify: return else: @@ -52,7 +79,7 @@ async def wait_in_queue(task, notify: NotifyType): if notify: notify(3, str(queue_pos).encode('utf-8')) if queue_pos < executor_instances.free_executors(): - if await is_client_disconnected(task.req): + if await task.is_client_disconnected(): await task_queue.update_event() if notify: return diff --git a/server/request_extraction.py b/server/request_extraction.py index 080d84cc4..decb64bff 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -12,7 +12,7 @@ from starlette.responses import StreamingResponse from manga_translator import Config, Context -from server.myqueue import task_queue, wait_in_queue +from server.myqueue import task_queue, wait_in_queue, QueueElement from server.streaming import notify, stream class TranslateRequest(BaseModel): @@ -61,29 +61,25 @@ async def multi_content_type(request: Request): async def get_ctx(req: Request): data, img = await multi_content_type(req) - ctx = Context() - ctx.image = await to_pil_image(img) - ctx.config = data - ctx.req = req - task_queue.add_task(ctx) + image = await to_pil_image(img) + task = QueueElement(req, image, data, 0) - return await wait_in_queue(ctx, None) + task_queue.add_task(task) + + return await wait_in_queue(task, None) async def while_streaming(req: Request, transform): data, img = await multi_content_type(req) - ctx = Context() - ctx.image = await to_pil_image(img) - ctx.config = data - ctx.req = req - task_queue.add_task(ctx) + image = await to_pil_image(img) + task = QueueElement(req, image, data, 0) + task_queue.add_task(task) messages = asyncio.Queue() def notify_internal(code: int, data: bytes) -> None: notify(code, data, transform, messages) - streaming_response = StreamingResponse(stream(messages), media_type="application/octet-stream") - asyncio.create_task(wait_in_queue(ctx, notify_internal)) + asyncio.create_task(wait_in_queue(task, notify_internal)) return streaming_response \ No newline at end of file From 6cd1819582dec208ac8f96b3b53f22ee5eb37f05 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Fri, 22 Nov 2024 12:14:05 +0100 Subject: [PATCH 60/75] byte response with cpp&rs examples --- manga_translator/utils/textblock.py | 2 +- server/main.py | 13 ++-- server/response.cpp | 116 +++++++++++++++++++++++++++ server/response.rs | 101 ++++++++++++++++++++++++ server/to_json.py | 117 +++++++++++++++++----------- 5 files changed, 297 insertions(+), 52 deletions(-) create mode 100644 server/response.cpp create mode 100644 server/response.rs diff --git a/manga_translator/utils/textblock.py b/manga_translator/utils/textblock.py index 8c2f390f5..5549c30a1 100644 --- a/manga_translator/utils/textblock.py +++ b/manga_translator/utils/textblock.py @@ -46,7 +46,7 @@ def __init__(self, lines: List[Tuple[int, int, int, int]], texts: List[str] = None, language: str = 'unknown', font_size: float = -1, - angle: int = 0, + angle: float = 0, translation: str = "", fg_color: Tuple[float] = (0, 0, 0), bg_color: Tuple[float] = (0, 0, 0), diff --git a/server/main.py b/server/main.py index 5797a610e..44f7b77a3 100644 --- a/server/main.py +++ b/server/main.py @@ -17,7 +17,7 @@ from server.instance import ExecutorInstance, executor_instances from server.myqueue import task_queue from server.request_extraction import get_ctx, while_streaming -from server.to_json import to_json, Translation +from server.to_json import to_translation, TranslationResponse app = FastAPI() nonce = None @@ -50,17 +50,20 @@ def transform_to_image(ctx): return img_byte_arr.getvalue() def transform_to_json(ctx): - return str(to_json(ctx)).encode("utf-8") + return to_translation(ctx).model_dump_json().encode("utf-8") +def transform_to_bytes(ctx): + return to_translation(ctx).to_bytes() -@app.post("/translate/json", response_model=list[Translation], response_description="json strucure inspired by the ichigo translator extension") +@app.post("/translate/json", response_model=TranslationResponse, response_description="json strucure inspired by the ichigo translator extension") async def json(req: Request): ctx = await get_ctx(req) - return JSONResponse(content=to_json(ctx)) + return to_translation(ctx) @app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks") async def bytes(req: Request): ctx = await get_ctx(req) + return StreamingResponse(content=to_translation(ctx).to_bytes()) @app.post("/translate/image", response_description="the result image", response_class=StreamingResponse) async def image(req: Request) -> StreamingResponse: @@ -77,7 +80,7 @@ async def stream_json(req: Request) -> StreamingResponse: @app.post("/translate/bytes/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") async def stream_bytes(req: Request)-> StreamingResponse: - return await while_streaming(req, transform_to_image) + return await while_streaming(req, transform_to_bytes) @app.post("/translate/image/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") async def stream_image(req: Request) -> StreamingResponse: diff --git a/server/response.cpp b/server/response.cpp new file mode 100644 index 000000000..3595856a3 --- /dev/null +++ b/server/response.cpp @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include +#include +#include + +struct Color { + uint8_t fg[3]; + uint8_t bg[3]; +}; + +struct Translation { + uint32_t min_x; + uint32_t min_y; + uint32_t max_x; + uint32_t max_y; + bool is_bulleted_list; + uint32_t angle; + float prob; + Color text_color; + std::unordered_map text; + std::vector background; +}; + +struct TranslationResponse { + std::vector translations; +}; + +uint32_t read_u32(const std::vector& bytes, size_t& offset) { + uint32_t value; + std::memcpy(&value, &bytes[offset], sizeof(value)); + value = value; + offset += 4; + return value; +} + +float read_f32(const std::vector& bytes, size_t& offset) { + float value; + std::memcpy(&value, &bytes[offset], sizeof(value)); + value = *reinterpret_cast(&value); + offset += 4; + return value; +} + +uint8_t read_u8(const std::vector& bytes, size_t& offset) { + uint8_t value = bytes[offset]; + offset += 1; + return value; +} + +bool read_bool(const std::vector& bytes, size_t& offset) { + bool value = bytes[offset] != 0; + offset += 1; + return value; +} + +std::vector read_chunk(const std::vector& bytes, size_t& offset) { + uint32_t size = read_u32(bytes, offset); + std::vector value(bytes.begin() + offset, bytes.begin() + offset + size); + offset += size; + return value; +} + +std::string read_str(const std::vector& bytes, size_t& offset) { + std::vector chunk = read_chunk(bytes, offset); + std::string result(chunk.begin(), chunk.end()); + return result; +} + +std::unordered_map read_map(const std::vector& bytes, size_t& offset) { + uint32_t count = read_u32(bytes, offset); + std::unordered_map map; + for (uint32_t i = 0; i < count; ++i) { + std::string key = read_str(bytes, offset); + std::string value = read_str(bytes, offset); + map[key] = value; + } + return map; +} + +Translation from_bytes(const std::vector& bytes, size_t& offset) { + Translation translation; + translation.min_x = read_u32(bytes, offset); + translation.min_y = read_u32(bytes, offset); + translation.max_x = read_u32(bytes, offset); + translation.max_y = read_u32(bytes, offset); + translation.is_bulleted_list = read_bool(bytes, offset); + translation.angle = read_u32(bytes, offset); + translation.prob = read_f32(bytes, offset); + for (int i = 0; i < 3; ++i) { + translation.text_color.fg[i] = read_u8(bytes, offset); + translation.text_color.bg[i] = read_u8(bytes, offset); + } + translation.text = read_map(bytes, offset); + translation.background = read_chunk(bytes, offset); + return translation; +} + +TranslationResponse from_bytes_response(const std::vector& bytes) { + size_t offset = 0; + uint32_t count = read_u32(bytes, offset); + TranslationResponse response; + + for (uint32_t i = 0; i < count; ++i) { + response.translations.push_back(from_bytes(bytes, offset)); + } + return response; +} + +int main() { + std::vector bytes = {/* byte data here */}; + TranslationResponse data = from_bytes_response(bytes); + return 0; +} diff --git a/server/response.rs b/server/response.rs new file mode 100644 index 000000000..81fa07810 --- /dev/null +++ b/server/response.rs @@ -0,0 +1,101 @@ +#[derive(Debug)] +struct Color { + fg: [u8; 3], + bg: [u8; 3], +} + +#[derive(Debug)] +struct Translation { + min_x: u32, + min_y: u32, + max_x: u32, + max_y: u32, + is_bulleted_list: bool, + angle: u32, + prob: f32, + text_color: Color, + text: HashMap, + background: Vec, +} + +#[derive(Debug)] +struct TranslationResponse { + translations: Vec, +} + +impl TranslationResponse { + fn from_bytes(bytes: &[u8]) -> Self { + let mut offset = 0; + let v = (0..read_u32(bytes, &mut offset)) + .map(|_| Translation::from_bytes(bytes, &mut offset)) + .collect::>(); + Self { translations: v } + } +} + +fn read_u32(bytes: &[u8], offset: &mut usize) -> u32 { + let value = u32::from_le_bytes(bytes[*offset..*offset + 4].try_into().unwrap()); + *offset += 4; + value +} + +impl Translation { + fn from_bytes(bytes: &[u8], offset: &mut usize) -> Self { + let read_f32 = |bytes: &[u8], offset: &mut usize| -> f32 { + let value = f32::from_le_bytes(bytes[*offset..*offset + 4].try_into().unwrap()); + *offset += 4; + value + }; + + let read_u8 = |bytes: &[u8], offset: &mut usize| -> u8 { + let value = bytes[*offset]; + *offset += 1; + value + }; + + let read_bool = |bytes: &[u8], offset: &mut usize| -> bool { + let value = bytes[*offset] != 0; + *offset += 1; + value + }; + + let read_chunk = |bytes: &[u8], offset: &mut usize| -> Vec { + let size = read_u32(bytes, offset); + let value = &bytes[*offset..*offset + size as usize]; + *offset += size as usize; + value.to_vec() + }; + let read_str = |bytes: &[u8], offset: &mut usize| -> String { + String::from_utf8(read_chunk(bytes, offset)).expect("Invalid UTF-8") + }; + let read_map = |bytes: &[u8], offset: &mut usize| -> HashMap { + (0..read_u32(bytes, offset)) + .into_iter() + .map(|_| (read_str(bytes, offset), read_str(bytes, offset))) + .collect::>() + }; + Self { + min_x: read_u32(bytes, offset), + min_y: read_u32(bytes, offset), + max_x: read_u32(bytes, offset), + max_y: read_u32(bytes, offset), + is_bulleted_list: read_bool(bytes, offset), + angle: read_u32(bytes, offset), + prob: read_f32(bytes, offset), + text_color: Color { + fg: [ + read_u8(bytes, offset), + read_u8(bytes, offset), + read_u8(bytes, offset), + ], + bg: [ + read_u8(bytes, offset), + read_u8(bytes, offset), + read_u8(bytes, offset), + ], + }, + text: read_map(bytes, offset), + background: read_chunk(bytes, offset), + } + } +} diff --git a/server/to_json.py b/server/to_json.py index 911718d8d..f6bcdc6e2 100644 --- a/server/to_json.py +++ b/server/to_json.py @@ -1,8 +1,10 @@ import base64 +import struct from typing import Dict, List import cv2 -from pydantic import BaseModel +import numpy as np +from pydantic import BaseModel, Field from manga_translator import Context from manga_translator.utils import TextBlock @@ -22,60 +24,83 @@ #img_rendered: array #mask_raw: array #mask:array -def to_json(ctx: Context): + +class TextColor(BaseModel): + fg: tuple[int, int, int] + bg: tuple[int, int, int] + +class Translation(BaseModel): + minX: int + minY: int + maxX: int + maxY: int + is_bulleted_list: bool + angle: float | int + prob: float + text_color: TextColor + text: dict[str, str] + background: np.ndarray = Field( + ..., + description="Background image encoded as a base64 string", + examples=["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..."] + ) + + class Config: + arbitrary_types_allowed = True + json_encoders = { + np.ndarray: lambda array: Translation.encode_background(array) + } + + @staticmethod + def encode_background(array: np.ndarray) -> str: + retval, buffer = cv2.imencode('.png', array) + jpg_as_text = base64.b64encode(buffer).decode("utf-8") + background = f"data:image/png;base64,{jpg_as_text}" + return background + + def to_bytes(self): + coords_bytes = struct.pack('4i', self.minX, self.minY, self.maxX, self.maxY) + is_bulleted_list_byte = struct.pack('?', self.is_bulleted_list) + angle_bytes = struct.pack('f', float(self.angle) if isinstance(self.angle, int) else self.angle) + prob_bytes = struct.pack('f', self.prob) + fg = struct.pack('3B', self.text_color.fg[0], self.text_color.fg[1], self.text_color.fg[2]) + bg = struct.pack('3B', self.text_color.bg[0], self.text_color.bg[1], self.text_color.bg[2]) + text_bytes = struct.pack('i', len(self.text.items())) + for key, value in self.text.items(): + text_bytes += struct.pack('I', len(key.encode('utf-8'))) + key.encode('utf-8') + text_bytes += struct.pack('I', len(value.encode('utf-8'))) + value.encode('utf-8') + background_bytes = struct.pack('I', len(self.background.tobytes())) + self.background.tobytes() + return coords_bytes +is_bulleted_list_byte+ angle_bytes+prob_bytes+fg + bg + text_bytes + background_bytes + +class TranslationResponse(BaseModel): + translations: List[Translation] + + def to_bytes(self): + items= [v.to_bytes() for v in self.translations] + return struct.pack('i', len(items)) + b''.join(items) + +def to_translation(ctx: Context) -> TranslationResponse: text_regions:list[TextBlock] = ctx.text_regions inpaint = ctx.img_inpainted translations:Dict[str, List[str]] = ctx.translations results = [] - if 'overlay_ext' in ctx: - #todo: unreachable - overlay_ext = ctx['overlay_ext'] - else: - overlay_ext = 'jpg' for i, blk in enumerate(text_regions): minX, minY, maxX, maxY = blk.xyxy + text_region = text_regions[i] if 'translations' in ctx: trans = {key: value[i] for key, value in translations.items()} else: trans = {} - trans["originalText"] = text_regions[i].text - if inpaint is not None: - overlay = inpaint[minY:maxY, minX:maxX] - - retval, buffer = cv2.imencode('.' + overlay_ext, overlay) - jpg_as_text = base64.b64encode(buffer) - background = "data:image/" + overlay_ext + ";base64," + jpg_as_text.decode("utf-8") - else: - background = None - text_region = text_regions[i] + trans[text_region.source_lang] = text_regions[i].text text_region.adjust_bg_color = False color1, color2 = text_region.get_font_colors() - - results.append({ - 'text': trans, - 'minX': int(minX), - 'minY': int(minY), - 'maxX': int(maxX), - 'maxY': int(maxY), - 'textColor': { - 'fg': color1.tolist(), - 'bg': color2.tolist() - }, - 'language': text_regions[i].source_lang, - 'background': background - }) - return results - -class TextColor(BaseModel): - fg: tuple[int, int, int] - bg: tuple[int, int, int] - -class Translation(BaseModel): - text: dict[str, str] - minX: int - minY: int - maxX: int - maxY: int - textColor:TextColor - language: str - background: str + results.append(Translation(text=trans, + minX=int(minX),minY=int(minY),maxX=int(maxX),maxY=int(maxY), + background=inpaint[minY:maxY, minX:maxX], + is_bulleted_list=text_region.is_bulleted_list, + text_color=TextColor(fg=color1.tolist(), bg=color2.tolist()), + prob=text_region.prob, + angle=text_region.angle + )) + #todo: background angle + return TranslationResponse(translations=results) From cf7412e4f676927b83e2fb0eb4ef7940c1dc6262 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Fri, 22 Nov 2024 12:15:38 +0100 Subject: [PATCH 61/75] move files into examples --- {server => examples}/response.cpp | 0 {server => examples}/response.rs | 0 server/main.py | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) rename {server => examples}/response.cpp (100%) rename {server => examples}/response.rs (100%) diff --git a/server/response.cpp b/examples/response.cpp similarity index 100% rename from server/response.cpp rename to examples/response.cpp diff --git a/server/response.rs b/examples/response.rs similarity index 100% rename from server/response.rs rename to examples/response.rs diff --git a/server/main.py b/server/main.py index 44f7b77a3..3beef79c5 100644 --- a/server/main.py +++ b/server/main.py @@ -60,7 +60,7 @@ async def json(req: Request): ctx = await get_ctx(req) return to_translation(ctx) -@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure following the stream encoding, but with json first and then the image bytes as chunks") +@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure for decoding look at examples in 'examples/response.*'") async def bytes(req: Request): ctx = await get_ctx(req) return StreamingResponse(content=to_translation(ctx).to_bytes()) From aac997e00f3c69f4a3349d056666bf6c1d42a0f8 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Fri, 22 Nov 2024 15:44:01 +0100 Subject: [PATCH 62/75] fix docs --- server/index.html | 2 +- server/main.py | 98 +++++++++++++++++++++++------------- server/manual.html | 2 +- server/request_extraction.py | 40 ++++----------- server/sent_data_internal.py | 1 - server/to_json.py | 10 ++-- 6 files changed, 81 insertions(+), 72 deletions(-) diff --git a/server/index.html b/server/index.html index edf617609..fd1a49a11 100644 --- a/server/index.html +++ b/server/index.html @@ -357,7 +357,7 @@

Image/Manga Translator

} const uploadWithProgress = async (formData) => { try { - const response = await fetch(`${BASE_URI}translate/image/stream`, { + const response = await fetch(`${BASE_URI}translate/with-form/image/stream`, { method: 'POST', body: formData, }); diff --git a/server/main.py b/server/main.py index 3beef79c5..5563f0ce5 100644 --- a/server/main.py +++ b/server/main.py @@ -7,16 +7,15 @@ import sys from argparse import Namespace -from fastapi import FastAPI, Request, HTTPException, Header, UploadFile +from fastapi import FastAPI, Request, HTTPException, Header, UploadFile, File, Form from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse +from fastapi.responses import StreamingResponse, HTMLResponse from pathlib import Path -from pydantic import BaseModel - +from manga_translator import Config from server.instance import ExecutorInstance, executor_instances from server.myqueue import task_queue -from server.request_extraction import get_ctx, while_streaming +from server.request_extraction import get_ctx, while_streaming, TranslateRequest from server.to_json import to_translation, TranslationResponse app = FastAPI() @@ -30,14 +29,7 @@ allow_headers=["*"], ) -class TranslateRequestForm(BaseModel): - """This request can be a multipart or a json request""" - image: UploadFile - """can be a url, base64 encoded image or a multipart image""" - config: str - """in case it is a multipart this needs to be a string(json.stringify)""" - -@app.post("/register", response_description="no response") +@app.post("/register", response_description="no response", tags=["internal-api"]) async def register_instance(instance: ExecutorInstance, req: Request, req_nonce: str = Header(alias="X-Nonce")): if req_nonce != nonce: raise HTTPException(401, detail="Invalid nonce") @@ -55,48 +47,85 @@ def transform_to_json(ctx): def transform_to_bytes(ctx): return to_translation(ctx).to_bytes() -@app.post("/translate/json", response_model=TranslationResponse, response_description="json strucure inspired by the ichigo translator extension") -async def json(req: Request): - ctx = await get_ctx(req) +@app.post("/translate/json", response_model=TranslationResponse, tags=["api", "json"],response_description="json strucure inspired by the ichigo translator extension") +async def json(req: Request, data: TranslateRequest): + ctx = await get_ctx(req, data.config, data.image) + return to_translation(ctx) + +@app.post("/translate/bytes", response_class=StreamingResponse, tags=["api", "json"],response_description="custom byte structure for decoding look at examples in 'examples/response.*'") +async def bytes(req: Request, data: TranslateRequest): + ctx = await get_ctx(req, data.config, data.image) + return StreamingResponse(content=to_translation(ctx).to_bytes()) + +@app.post("/translate/image", response_description="the result image", tags=["api", "json"],response_class=StreamingResponse) +async def image(req: Request, data: TranslateRequest) -> StreamingResponse: + ctx = await get_ctx(req, data.config, data.image) + img_byte_arr = io.BytesIO() + ctx.result.save(img_byte_arr, format="PNG") + img_byte_arr.seek(0) + + return StreamingResponse(img_byte_arr, media_type="image/png") + +@app.post("/translate/json/stream", response_class=StreamingResponse,tags=["api", "json"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_json(req: Request, data: TranslateRequest) -> StreamingResponse: + return await while_streaming(req, transform_to_json, data.config, data.image) + +@app.post("/translate/bytes/stream", response_class=StreamingResponse, tags=["api", "json"],response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_bytes(req: Request, data: TranslateRequest)-> StreamingResponse: + return await while_streaming(req, transform_to_bytes,data.config, data.image) + +@app.post("/translate/image/stream", response_class=StreamingResponse, tags=["api", "json"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_image(req: Request, data: TranslateRequest) -> StreamingResponse: + return await while_streaming(req, transform_to_image, data.config, data.image) + +@app.post("/translate/with-form/json", response_model=TranslationResponse, tags=["api", "form"],response_description="json strucure inspired by the ichigo translator extension") +async def json_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")): + img = await image.read() + ctx = await get_ctx(req, Config.parse_raw(config), img) return to_translation(ctx) -@app.post("/translate/bytes", response_class=StreamingResponse, response_description="custom byte structure for decoding look at examples in 'examples/response.*'") -async def bytes(req: Request): - ctx = await get_ctx(req) +@app.post("/translate/with-form/bytes", response_class=StreamingResponse, tags=["api", "form"],response_description="custom byte structure for decoding look at examples in 'examples/response.*'") +async def bytes_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")): + img = await image.read() + ctx = await get_ctx(req, Config.parse_raw(config), img) return StreamingResponse(content=to_translation(ctx).to_bytes()) -@app.post("/translate/image", response_description="the result image", response_class=StreamingResponse) -async def image(req: Request) -> StreamingResponse: - ctx = await get_ctx(req) +@app.post("/translate/with-form/image", response_description="the result image", tags=["api", "form"],response_class=StreamingResponse) +async def image_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")) -> StreamingResponse: + img = await image.read() + ctx = await get_ctx(req, Config.parse_raw(config), img) img_byte_arr = io.BytesIO() ctx.result.save(img_byte_arr, format="PNG") img_byte_arr.seek(0) return StreamingResponse(img_byte_arr, media_type="image/png") -@app.post("/translate/json/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") -async def stream_json(req: Request) -> StreamingResponse: - return await while_streaming(req, transform_to_json) +@app.post("/translate/with-form/json/stream", response_class=StreamingResponse, tags=["api", "form"],response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_json_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")) -> StreamingResponse: + img = await image.read() + return await while_streaming(req, transform_to_json, Config.parse_raw(config), img) -@app.post("/translate/bytes/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") -async def stream_bytes(req: Request)-> StreamingResponse: - return await while_streaming(req, transform_to_bytes) +@app.post("/translate/with-form/bytes/stream", response_class=StreamingResponse,tags=["api", "form"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_bytes_form(req: Request, image: UploadFile = File(...), config: str = Form("{}"))-> StreamingResponse: + img = await image.read() + return await while_streaming(req, transform_to_bytes, Config.parse_raw(config), img) -@app.post("/translate/image/stream", response_class=StreamingResponse, response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") -async def stream_image(req: Request) -> StreamingResponse: - return await while_streaming(req, transform_to_image) +@app.post("/translate/with-form/image/stream", response_class=StreamingResponse, tags=["api", "form"], response_description="A stream over elements with strucure(1byte status, 4 byte size, n byte data) status code are 0,1,2,3,4 0 is result data, 1 is progress report, 2 is error, 3 is waiting queue position, 4 is waiting for translator instance") +async def stream_image_form(req: Request, image: UploadFile = File(...), config: str = Form("{}")) -> StreamingResponse: + img = await image.read() + return await while_streaming(req, transform_to_image, Config.parse_raw(config), img) -@app.post("/queue-size", response_model=int) +@app.post("/queue-size", response_model=int, tags=["api", "json"]) async def queue_size() -> int: return len(task_queue.queue) -@app.get("/", response_class=HTMLResponse) +@app.get("/", response_class=HTMLResponse,tags=["ui"]) async def index() -> HTMLResponse: html_file = Path("index.html") html_content = html_file.read_text() return HTMLResponse(content=html_content) -@app.get("/manual", response_class=HTMLResponse) +@app.get("/manual", response_class=HTMLResponse, tags=["ui"]) async def manual(): html_file = Path("manual.html") html_content = html_file.read_text() @@ -153,7 +182,6 @@ def prepare(args): #todo: cache results #todo: cleanup cache -#todo: add docs #todo: enable config in html pages if __name__ == '__main__': diff --git a/server/manual.html b/server/manual.html index 1335b0c51..2983feb64 100644 --- a/server/manual.html +++ b/server/manual.html @@ -72,7 +72,7 @@

formData.append('image', file); try { - const response = await fetch( generateImage.checked ? '/translate/image/stream' : '/translate/json/stream', { + const response = await fetch( generateImage.checked ? '/translate/with-form/image/stream' : '/translate/with-form/json/stream', { method: 'POST', body: formData, /*headers: { diff --git a/server/request_extraction.py b/server/request_extraction.py index decb64bff..54516fa39 100644 --- a/server/request_extraction.py +++ b/server/request_extraction.py @@ -9,9 +9,9 @@ from PIL import Image from fastapi import Request, HTTPException from pydantic import BaseModel -from starlette.responses import StreamingResponse +from fastapi.responses import StreamingResponse -from manga_translator import Config, Context +from manga_translator import Config from server.myqueue import task_queue, wait_in_queue, QueueElement from server.streaming import notify, stream @@ -19,7 +19,7 @@ class TranslateRequest(BaseModel): """This request can be a multipart or a json request""" image: bytes|str """can be a url, base64 encoded image or a multipart image""" - config: Config + config: Config = Config() """in case it is a multipart this needs to be a string(json.stringify)""" async def to_pil_image(image: Union[str, bytes]) -> Image.Image: @@ -41,39 +41,17 @@ async def to_pil_image(image: Union[str, bytes]) -> Image.Image: raise HTTPException(status_code=422, detail=str(e)) -async def multi_content_type(request: Request): - content_type = request.headers.get("content-type") - if content_type and content_type.startswith("multipart/form-data"): - form = await request.form() - config = form.get("config", "{}") - image = form.get("image") - image_content = await image.read() - config = Config.parse_raw(config) - return config, image_content - elif content_type and content_type.startswith("application/json"): - body = await request.json() - config = Config(**body.get("config", {})) - image = body.get("image") - return config, image - - else: - raise HTTPException(status_code=400, detail="Unsupported Content-Type") - -async def get_ctx(req: Request): - data, img = await multi_content_type(req) - - image = await to_pil_image(img) - task = QueueElement(req, image, data, 0) +async def get_ctx(req: Request, config: Config, image: str|bytes): + image = await to_pil_image(image) + task = QueueElement(req, image, config, 0) task_queue.add_task(task) return await wait_in_queue(task, None) -async def while_streaming(req: Request, transform): - data, img = await multi_content_type(req) - - image = await to_pil_image(img) - task = QueueElement(req, image, data, 0) +async def while_streaming(req: Request, transform, config: Config, image: bytes | str): + image = await to_pil_image(image) + task = QueueElement(req, image, config, 0) task_queue.add_task(task) messages = asyncio.Queue() diff --git a/server/sent_data_internal.py b/server/sent_data_internal.py index 5720cb6c5..99f8c3730 100644 --- a/server/sent_data_internal.py +++ b/server/sent_data_internal.py @@ -1,4 +1,3 @@ -import asyncio import pickle from typing import Mapping, Optional, Callable diff --git a/server/to_json.py b/server/to_json.py index f6bcdc6e2..bdf1d4cab 100644 --- a/server/to_json.py +++ b/server/to_json.py @@ -1,10 +1,10 @@ import base64 import struct -from typing import Dict, List +from typing import Dict, List, Annotated import cv2 import numpy as np -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, WithJsonSchema from manga_translator import Context from manga_translator.utils import TextBlock @@ -24,6 +24,10 @@ #img_rendered: array #mask_raw: array #mask:array +NumpyNdarray = Annotated[ + np.ndarray, + WithJsonSchema({'type': 'string', "format": "base64","examples": ["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..."]}), +] class TextColor(BaseModel): fg: tuple[int, int, int] @@ -39,7 +43,7 @@ class Translation(BaseModel): prob: float text_color: TextColor text: dict[str, str] - background: np.ndarray = Field( + background: NumpyNdarray = Field( ..., description="Background image encoded as a base64 string", examples=["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAA..."] From 589e0370bf360e82777b46b69a8bb00dad5245c7 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:08:02 +0100 Subject: [PATCH 63/75] configs --- server/index.html | 43 +++++++++++++++++++++++++++---------------- server/main.py | 2 -- server/manual.html | 9 +++++++++ 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/server/index.html b/server/index.html index fd1a49a11..177885d79 100644 --- a/server/index.html +++ b/server/index.html @@ -33,10 +33,10 @@

Image/Manga Translator

@@ -47,7 +47,7 @@

Image/Manga Translator

Image/Manga Translator @@ -199,13 +199,13 @@

Image/Manga Translator

get fileUri() { return this.file ? URL.createObjectURL(this.file) : null }, - detectionResolution: 'M', - textDetector: 'auto', + detectionResolution: '1536', + textDetector: 'default', renderTextDirection: 'auto', translator: 'youdao', - validTranslators: ['youdao', 'baidu', 'google', 'deepl', 'papago', 'caiyun', 'offline', 'gpt3.5', 'none'], + validTranslators: ['youdao', 'baidu', 'deepl', 'papago', 'caiyun', 'offline', 'gpt3.5', 'none'], getTranslatorName(key) { - if (key == 'none') + if (key === 'none') return "No Text" return key ? key[0].toUpperCase() + key.slice(1) : ""; }, @@ -307,11 +307,22 @@

Image/Manga Translator

const formData = new FormData() formData.append('image', this.file) - /*formData.append('size', this.detectionResolution) - formData.append('detector', this.textDetector) - formData.append('direction', this.renderTextDirection) - formData.append('translator', this.translator) - formData.append('target_lang', this.targetLanguage)*/ + const config = `{ + "detector": { + "detector": "${this.textDetector}", + "detection_size": ${this.detectionResolution} + }, + "render": { + "direction": "${this.renderTextDirection}" + }, + "translator": { + "translator": "${this.translator}", + "target_lang": "${this.targetLanguage}" + } + }`; + + formData.append('config', config) + const processChunk = (value) => { if (this.error) return; diff --git a/server/main.py b/server/main.py index 5563f0ce5..20d3d5833 100644 --- a/server/main.py +++ b/server/main.py @@ -182,8 +182,6 @@ def prepare(args): #todo: cache results #todo: cleanup cache -#todo: enable config in html pages - if __name__ == '__main__': import uvicorn from args import parse_arguments diff --git a/server/manual.html b/server/manual.html index 2983feb64..45c801070 100644 --- a/server/manual.html +++ b/server/manual.html @@ -15,6 +15,8 @@

Upload Image and Translate

+
+

@@ -55,10 +57,12 @@

let submitButton = document.getElementById("submit-button") const fileInput = document.getElementById('fileInput'); const generateImage = document.getElementById('generate-image'); + const configField = document.getElementById('config-json'); submitButton.classList.add("hidden"); fileInput.classList.add("hidden"); generateImage.classList.add("hidden"); + configField.classList.add("hidden"); statusField.innerHTML = ''; errorField.innerHTML = ''; const file = fileInput.files[0]; @@ -70,6 +74,10 @@

const formData = new FormData(); formData.append('image', file); + const text = configField.innerText; + if (text.length > 2) { + formData.append('config', text) + } try { const response = await fetch( generateImage.checked ? '/translate/with-form/image/stream' : '/translate/with-form/json/stream', { @@ -94,6 +102,7 @@

submitButton.classList.remove("hidden"); fileInput.classList.remove("hidden"); generateImage.classList.remove("hidden"); + configField.classList.remove("hidden"); } } From 6c6a4b558ae583437a34a1cb8ffc96b39dd9102a Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 23 Nov 2024 13:10:34 +0100 Subject: [PATCH 64/75] delete old code & move file --- manga_translator/__main__.py | 12 +----------- manga_translator/args.py | 12 ------------ manga_translator/mode/__init__.py | 0 manga_translator/{ => mode}/share.py | 0 4 files changed, 1 insertion(+), 23 deletions(-) create mode 100644 manga_translator/mode/__init__.py rename manga_translator/{ => mode}/share.py (100%) diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py index eb348798f..dca4740a7 100644 --- a/manga_translator/__main__.py +++ b/manga_translator/__main__.py @@ -3,7 +3,6 @@ import logging from argparse import Namespace -from manga_translator.share import MangaShare from .manga_translator import ( set_main_logger, load_dictionary, apply_dictionary, ) @@ -65,26 +64,17 @@ async def dispatch(args: Namespace): except Exception : pass - elif args.mode == 'web': - from .server.web_main import dispatch - await dispatch(args.host, args.port, translation_params=args_dict) - - elif args.mode == 'web_client': - from manga_translator.mode.web import MangaTranslatorWeb - translator = MangaTranslatorWeb(args_dict) - await translator.listen(args_dict) - elif args.mode == 'ws': from manga_translator.mode.ws import MangaTranslatorWS translator = MangaTranslatorWS(args_dict) await translator.listen(args_dict) elif args.mode == 'shared': + from manga_translator.mode.share import MangaShare translator = MangaShare(args_dict) await translator.listen(args_dict) - if __name__ == '__main__': args = None init_logging() diff --git a/manga_translator/args.py b/manga_translator/args.py index 562059ad6..472311c6a 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -107,18 +107,6 @@ def _format_action_invocation(self, action: argparse.Action) -> str: parser_batch.add_argument('--save-quality', default=100, type=int, help='Quality of saved JPEG image, range from 0 to 100 with 100 being best') parser.add_argument('--config-file', default=None, type=str, help='path to the config file') -# Web mode -parser_web = subparsers.add_parser('web', help='Run in web service mode') -parser_web.add_argument('--host', default='127.0.0.1', type=str, help='Host for web service') -parser_web.add_argument('--port', default=5003, type=int, help='Port for web service') -parser_web.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web server communication') - -# Web client mode -parser_web_client = subparsers.add_parser('web_client', help='Run in web client mode') -parser_web_client.add_argument('--host', default='127.0.0.1', type=str, help='Host for web client') -parser_web_client.add_argument('--port', default=5003, type=int, help='Port for web client') -parser_web_client.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal web client communication') - # WebSocket mode parser_ws = subparsers.add_parser('ws', help='Run in WebSocket mode') parser_ws.add_argument('--host', default='127.0.0.1', type=str, help='Host for WebSocket service') diff --git a/manga_translator/mode/__init__.py b/manga_translator/mode/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/manga_translator/share.py b/manga_translator/mode/share.py similarity index 100% rename from manga_translator/share.py rename to manga_translator/mode/share.py From cb98b6dafa5d05d40f0c6bf2f675cc4a34705d63 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 23 Nov 2024 13:17:41 +0100 Subject: [PATCH 65/75] gpt 3.5 fix --- manga_translator/config.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/manga_translator/config.py b/manga_translator/config.py index adfe367bd..17e51111e 100644 --- a/manga_translator/config.py +++ b/manga_translator/config.py @@ -22,7 +22,10 @@ def __init__(self, string: str): self.target_lang = None for g in string.split(';'): trans, lang = g.split(':') - translator = Translator[trans] + if trans == "gpt3.5": + translator = Translator["gpt3_5"] + else: + translator = Translator[trans] if translator not in TRANSLATORS: raise ValueError(f'Invalid choice: %s (choose from %s)' % (trans, ', '.join(map(repr, TRANSLATORS)))) if lang not in VALID_LANGUAGES: From e4062281696f2c570becb9e6ecc486f591de49d3 Mon Sep 17 00:00:00 2001 From: zyddnys Date: Fri, 29 Nov 2024 21:55:50 -0500 Subject: [PATCH 66/75] fix #703 --- manga_translator/rendering/text_render.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/manga_translator/rendering/text_render.py b/manga_translator/rendering/text_render.py index a8be534e7..da6301a9c 100644 --- a/manga_translator/rendering/text_render.py +++ b/manga_translator/rendering/text_render.py @@ -346,10 +346,7 @@ def put_text_vertical(font_size: int, text: str, h: int, alignment: str, fg: Tup canvas_border = np.clip(canvas_border, 0, 255) line_box = add_color(canvas_text, fg, canvas_border, bg) # rect - if bg is None : - x, y, w, h = cv2.boundingRect(canvas_text) - else : - x, y, w, h = cv2.boundingRect(canvas_border) + x, y, w, h = cv2.boundingRect(canvas_border) return line_box[y:y+h, x:x+w] def select_hyphenator(lang: str): From 1fc572cd21d541a87511d43ad99bd5223a8bb25e Mon Sep 17 00:00:00 2001 From: zyddnys Date: Fri, 29 Nov 2024 21:57:38 -0500 Subject: [PATCH 67/75] fix --- manga_translator/rendering/text_render.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/manga_translator/rendering/text_render.py b/manga_translator/rendering/text_render.py index da6301a9c..64eebe632 100644 --- a/manga_translator/rendering/text_render.py +++ b/manga_translator/rendering/text_render.py @@ -733,12 +733,8 @@ def put_text_horizontal(font_size: int, text: str, width: int, height: int, alig canvas_border = np.clip(canvas_border, 0, 255) line_box = add_color(canvas_text, fg, canvas_border, bg) - # rect - if bg is None : - x, y, w, h = cv2.boundingRect(canvas_text) - else : - x, y, w, h = cv2.boundingRect(canvas_border) - return line_box[y:y+height, x:x+width] + x, y, w, h = cv2.boundingRect(canvas_border) + return line_box[y:y+h, x:x+w] # def put_text(img: np.ndarray, text: str, line_count: int, x: int, y: int, w: int, h: int, fg: Tuple[int, int, int], bg: Optional[Tuple[int, int, int]]): # pass From 260dd0afbce95fa36b9cc0461dfb7e7f3e04e3c0 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 30 Nov 2024 14:16:12 +0100 Subject: [PATCH 68/75] some docs --- README.md | 819 +++++++++++++++++++++++------------ README_CN.md | 139 +----- examples/config-example.json | 61 +++ manga_translator/__main__.py | 6 + manga_translator/args.py | 2 + 5 files changed, 630 insertions(+), 397 deletions(-) create mode 100644 examples/config-example.json diff --git a/README.md b/README.md index 50f7bf7aa..989316cba 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ Some manga/images will never be translated, therefore this project is born. - [Options](#options) - [Language Code Reference](#language-code-reference) - [Translators Reference](#translators-reference) + - [Config Documentation](#config-file) - [GPT Config Reference](#gpt-config-reference) - [Using Gimp for rendering](#using-gimp-for-rendering) - [Api Documentation](#api-documentation) @@ -270,43 +271,22 @@ make run-web-server ## Usage -### Batch mode (default) +### Local mode ```bash -# use `--use-gpu` for speedup if you have a compatible NVIDIA GPU. -# use `--target-lang ` to specify a target language. -# use `--inpainter=none` to disable inpainting. -# use `--translator=none` if you only want to use inpainting (blank bubbles) # replace with the path to the image folder or file. -$ python -m manga_translator -v --translator=google -l ENG -i +$ python -m manga_translator local -v -i # results can be found under `-translated`. ``` -### Demo mode - -```bash -# saves singular image into /result folder for demonstration purposes -# use `--mode demo` to enable demo translation. -# replace with the path to the image file. -$ python -m manga_translator --mode demo -v --translator=google -l ENG -i -# result can be found in `result/`. -``` - ### Web Mode ```bash # use `--mode web` to start a web server. -$ python -m manga_translator -v --mode web --use-gpu +$ cd server && python main.py --use-gpu # the demo will be serving on http://127.0.0.1:5003 ``` -### Api Mode - -```bash -# use `--mode web` to start a web server. -$ python -m manga_translator -v --mode api --use-gpu -# the demo will be serving on http://127.0.0.1:5003 -``` ## Related Projects GUI implementation: [BallonsTranslator](https://github.com/dmMaze/BallonsTranslator) @@ -318,7 +298,7 @@ Detector: - JPN: ?? - CHS: ?? - KOR: ?? -- Using `--detector ctd` can increase the amount of text lines detected +- Using `{"detector":{"detector": "ctd"}}` can increase the amount of text lines detected OCR: @@ -352,139 +332,26 @@ Colorizer: **mc2** ### Options ```text --h, --help show this help message and exit --m, --mode {demo,batch,web,web_client,ws,api} - Run demo in single image demo mode (demo), batch - translation mode (batch), web service mode (web) --i, --input INPUT [INPUT ...] Path to an image file if using demo mode, or path to an - image folder if using batch mode --o, --dest DEST Path to the destination folder for translated images in - batch mode --l, --target-lang {CHS,CHT,CSY,NLD,ENG,FRA,DEU,HUN,ITA,JPN,KOR,PLK,PTB,ROM,RUS,ESP,TRK,UKR,VIN,ARA,CNR,SRP,HRV,THA,IND,FIL} - Destination language --v, --verbose Print debug info and save intermediate images in result - folder --f, --format {png,webp,jpg,xcf,psd,pdf} Output format of the translation. ---attempts ATTEMPTS Retry attempts on encountered error. -1 means infinite - times. ---ignore-errors Skip image on encountered error. ---overwrite Overwrite already translated images in batch mode. ---skip-no-text Skip image without text (Will not be saved). ---model-dir MODEL_DIR Model directory (by default ./models in project root) ---use-gpu Turn on/off gpu ---use-gpu-limited Turn on/off gpu (excluding offline translator) ---detector {default,ctd,craft,none} Text detector used for creating a text mask from an - image, DO NOT use craft for manga, it's not designed - for it ---ocr {32px,48px,48px_ctc,mocr} Optical character recognition (OCR) model to use ---use-mocr-merge Use bbox merge when Manga OCR inference. ---inpainter {default,lama_large,lama_mpe,sd,none,original} - Inpainting model to use ---upscaler {waifu2x,esrgan,4xultrasharp} Upscaler to use. --upscale-ratio has to be set for it - to take effect ---upscale-ratio UPSCALE_RATIO Image upscale ratio applied before detection. Can - improve text detection. ---colorizer {mc2} Colorization model to use. ---translator {google,youdao,baidu,deepl,papago,caiyun,gpt3,gpt3.5,gpt4,none,original,offline,nllb,nllb_big,sugoi,jparacrawl,jparacrawl_big,m2m100,m2m100_big,sakura} - Language translator to use ---translator-chain TRANSLATOR_CHAIN Output of one translator goes in another. Example: - --translator-chain "google:JPN;sugoi:ENG". ---selective-translation SELECTIVE_TRANSLATION - Select a translator based on detected language in - image. Note the first translation service acts as - default if the language isn't defined. Example: - --translator-chain "google:JPN;sugoi:ENG". ---revert-upscaling Downscales the previously upscaled image after - translation back to original size (Use with --upscale- - ratio). ---detection-size DETECTION_SIZE Size of image used for detection ---det-rotate Rotate the image for detection. Might improve - detection. ---det-auto-rotate Rotate the image for detection to prefer vertical - textlines. Might improve detection. ---det-invert Invert the image colors for detection. Might improve - detection. ---det-gamma-correct Applies gamma correction for detection. Might improve - detection. ---unclip-ratio UNCLIP_RATIO How much to extend text skeleton to form bounding box ---box-threshold BOX_THRESHOLD Threshold for bbox generation ---text-threshold TEXT_THRESHOLD Threshold for text detection ---min-text-length MIN_TEXT_LENGTH Minimum text length of a text region ---no-text-lang-skip Dont skip text that is seemingly already in the target - language. ---inpainting-size INPAINTING_SIZE Size of image used for inpainting (too large will - result in OOM) ---inpainting-precision {fp32,fp16,bf16} Inpainting precision for lama, use bf16 while you can. ---colorization-size COLORIZATION_SIZE Size of image used for colorization. Set to -1 to use - full image size ---denoise-sigma DENOISE_SIGMA Used by colorizer and affects color strength, range - from 0 to 255 (default 30). -1 turns it off. ---mask-dilation-offset MASK_DILATION_OFFSET By how much to extend the text mask to remove left-over - text pixels of the original image. ---font-size FONT_SIZE Use fixed font size for rendering ---font-size-offset FONT_SIZE_OFFSET Offset font size by a given amount, positive number - increase font size and vice versa ---font-size-minimum FONT_SIZE_MINIMUM Minimum output font size. Default is - image_sides_sum/200 ---font-color FONT_COLOR Overwrite the text fg/bg color detected by the OCR - model. Use hex string without the "#" such as FFFFFF - for a white foreground or FFFFFF:000000 to also have a - black background around the text. ---line-spacing LINE_SPACING Line spacing is font_size * this value. Default is 0.01 - for horizontal text and 0.2 for vertical. ---force-horizontal Force text to be rendered horizontally ---force-vertical Force text to be rendered vertically ---align-left Align rendered text left ---align-center Align rendered text centered ---align-right Align rendered text right ---uppercase Change text to uppercase ---lowercase Change text to lowercase ---no-hyphenation If renderer should be splitting up words using a hyphen - character (-) ---manga2eng Render english text translated from manga with some - additional typesetting. Ignores some other argument - options ---gpt-config GPT_CONFIG Path to GPT config file, more info in README ---use-mtpe Turn on/off machine translation post editing (MTPE) on - the command line (works only on linux right now) ---save-text Save extracted text and translations into a text file. ---save-text-file SAVE_TEXT_FILE Like --save-text but with a specified file path. ---filter-text FILTER_TEXT Filter regions by their text with a regex. Example - usage: --text-filter ".*badtext.*" ---pre-dict FILE_PATH Path to the pre-translation dictionary file. One entry per line, - Comments can be added with `#` and `//`. - usage: //Example - dog cat #Example - abc def - abc ---post-dict FILE_PATH Path to the post-translation dictionary file. Same as above. ---skip-lang Skip translation if source image is one of the provide languages, - use comma to separate multiple languages. Example: JPN,ENG ---prep-manual Prepare for manual typesetting by outputting blank, - inpainted images, plus copies of the original for - reference ---font-path FONT_PATH Path to font file ---gimp-font GIMP_FONT Font family to use for gimp rendering. ---host HOST Used by web module to decide which host to attach to ---port PORT Used by web module to decide which port to attach to ---nonce NONCE Used by web module as secret for securing internal web - server communication ---ws-url WS_URL Server URL for WebSocket mode ---save-quality SAVE_QUALITY Quality of saved JPEG image, range from 0 to 100 with - 100 being best ---ignore-bubble IGNORE_BUBBLE The threshold for ignoring text in non bubble areas, - with valid values ranging from 1 to 50, does not ignore - others. Recommendation 5 to 10. If it is too low, - normal bubble areas may be ignored, and if it is too - large, non bubble areas may be considered normal - bubbles +-h, --help show this help message and exit +-v, --verbose Print debug info and save intermediate images in result folder +--attempts ATTEMPTS Retry attempts on encountered error. -1 means infinite times. +--ignore-errors Skip image on encountered error. +--model-dir MODEL_DIR Model directory (by default ./models in project root) +--use-gpu Turn on/off gpu (auto switch between mps and cuda) +--use-gpu-limited Turn on/off gpu (excluding offline translator) +--font-path FONT_PATH Path to font file +--pre-dict PRE_DICT Path to the pre-translation dictionary file +--post-dict POST_DICT Path to the post-translation dictionary file +--kernel-size KERNEL_SIZE Set the convolution kernel size of the text erasure area to + completely clean up text residues +--config-file CONFIG_FILE path to the config file ``` ### Language Code Reference -Used by the `--target-lang` or `-l` argument. +Used by the `translator/language` in the config ```yaml CHS: Chinese (Simplified) @@ -546,7 +413,535 @@ DEEPL_AUTH_KEY=xxxxxxxx... - Offline: Whether the translator can be used offline. - Sugoi is created by mingshiba, please support him in https://www.patreon.com/mingshiba +### Config file +run `python -m manga_translator config-help >> config-info.json` + +an example can be found in example/config-example.json +```json +{ + "$defs": { + "Alignment": { + "enum": [ + "auto", + "left", + "center", + "right" + ], + "title": "Alignment", + "type": "string" + }, + "Colorizer": { + "enum": [ + "none", + "mc2" + ], + "title": "Colorizer", + "type": "string" + }, + "ColorizerConfig": { + "properties": { + "colorization_size": { + "default": 576, + "title": "Colorization Size", + "type": "integer" + }, + "denoise_sigma": { + "default": 30, + "title": "Denoise Sigma", + "type": "integer" + }, + "colorizer": { + "$ref": "#/$defs/Colorizer", + "default": "none" + } + }, + "title": "ColorizerConfig", + "type": "object" + }, + "Detector": { + "enum": [ + "default", + "dbconvnext", + "ctd", + "craft", + "none" + ], + "title": "Detector", + "type": "string" + }, + "DetectorConfig": { + "properties": { + "detector": { + "$ref": "#/$defs/Detector", + "default": "default" + }, + "detection_size": { + "default": 1536, + "title": "Detection Size", + "type": "integer" + }, + "text_threshold": { + "default": 0.5, + "title": "Text Threshold", + "type": "number" + }, + "det_rotate": { + "default": false, + "title": "Det Rotate", + "type": "boolean" + }, + "det_auto_rotate": { + "default": false, + "title": "Det Auto Rotate", + "type": "boolean" + }, + "det_invert": { + "default": false, + "title": "Det Invert", + "type": "boolean" + }, + "det_gamma_correct": { + "default": false, + "title": "Det Gamma Correct", + "type": "boolean" + }, + "box_threshold": { + "default": 0.7, + "title": "Box Threshold", + "type": "number" + }, + "unclip_ratio": { + "default": 2.3, + "title": "Unclip Ratio", + "type": "number" + } + }, + "title": "DetectorConfig", + "type": "object" + }, + "Direction": { + "enum": [ + "auto", + "horizontal", + "vertical" + ], + "title": "Direction", + "type": "string" + }, + "InpaintPrecision": { + "enum": [ + "fp32", + "fp16", + "bf16" + ], + "title": "InpaintPrecision", + "type": "string" + }, + "Inpainter": { + "enum": [ + "default", + "lama_large", + "lama_mpe", + "sd", + "none", + "original" + ], + "title": "Inpainter", + "type": "string" + }, + "InpainterConfig": { + "properties": { + "inpainter": { + "$ref": "#/$defs/Inpainter", + "default": "none" + }, + "inpainting_size": { + "default": 2048, + "title": "Inpainting Size", + "type": "integer" + }, + "inpainting_precision": { + "$ref": "#/$defs/InpaintPrecision", + "default": "fp32" + } + }, + "title": "InpainterConfig", + "type": "object" + }, + "Ocr": { + "enum": [ + "32px", + "48px", + "48px_ctc", + "mocr" + ], + "title": "Ocr", + "type": "string" + }, + "OcrConfig": { + "properties": { + "use_mocr_merge": { + "default": false, + "title": "Use Mocr Merge", + "type": "boolean" + }, + "ocr": { + "$ref": "#/$defs/Ocr", + "default": "48px" + }, + "min_text_length": { + "default": 0, + "title": "Min Text Length", + "type": "integer" + }, + "ignore_bubble": { + "default": 0, + "title": "Ignore Bubble", + "type": "integer" + } + }, + "title": "OcrConfig", + "type": "object" + }, + "RenderConfig": { + "properties": { + "renderer": { + "$ref": "#/$defs/Renderer", + "default": "default" + }, + "alignment": { + "$ref": "#/$defs/Alignment", + "default": "auto" + }, + "disable_font_border": { + "default": false, + "title": "Disable Font Border", + "type": "boolean" + }, + "font_size_offset": { + "default": 0, + "title": "Font Size Offset", + "type": "integer" + }, + "font_size_minimum": { + "default": -1, + "title": "Font Size Minimum", + "type": "integer" + }, + "direction": { + "$ref": "#/$defs/Direction", + "default": "auto" + }, + "uppercase": { + "default": false, + "title": "Uppercase", + "type": "boolean" + }, + "lowercase": { + "default": false, + "title": "Lowercase", + "type": "boolean" + }, + "gimp_font": { + "default": "Sans-serif", + "title": "Gimp Font", + "type": "string" + }, + "no_hyphenation": { + "default": false, + "title": "No Hyphenation", + "type": "boolean" + }, + "font_color": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Font Color" + }, + "line_spacing": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Line Spacing" + }, + "font_size": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Font Size" + } + }, + "title": "RenderConfig", + "type": "object" + }, + "Renderer": { + "enum": [ + "default", + "manga2eng", + "none" + ], + "title": "Renderer", + "type": "string" + }, + "Translator": { + "enum": [ + "youdao", + "baidu", + "deepl", + "papago", + "caiyun", + "gpt3", + "gpt3.5", + "gpt4", + "none", + "original", + "sakura", + "deepseek", + "groq", + "offline", + "nllb", + "nllb_big", + "sugoi", + "jparacrawl", + "jparacrawl_big", + "m2m100", + "m2m100_big", + "mbart50", + "qwen2", + "qwen2_big" + ], + "title": "Translator", + "type": "string" + }, + "TranslatorConfig": { + "properties": { + "translator": { + "$ref": "#/$defs/Translator", + "default": "sugoi" + }, + "target_lang": { + "default": "ENG", + "title": "Target Lang", + "type": "string" + }, + "no_text_lang_skip": { + "default": false, + "title": "No Text Lang Skip", + "type": "boolean" + }, + "skip_lang": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Skip Lang" + }, + "gpt_config": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Gpt Config" + }, + "translator_chain": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Translator Chain" + }, + "selective_translation": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Selective Translation" + } + }, + "title": "TranslatorConfig", + "type": "object" + }, + "UpscaleConfig": { + "properties": { + "upscaler": { + "$ref": "#/$defs/Upscaler", + "default": "esrgan" + }, + "revert_upscaling": { + "default": false, + "title": "Revert Upscaling", + "type": "boolean" + }, + "upscale_ratio": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Upscale Ratio" + } + }, + "title": "UpscaleConfig", + "type": "object" + }, + "Upscaler": { + "enum": [ + "waifu2x", + "esrgan", + "4xultrasharp" + ], + "title": "Upscaler", + "type": "string" + } + }, + "properties": { + "filter_text": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Filter Text" + }, + "render": { + "$ref": "#/$defs/RenderConfig", + "default": { + "renderer": "default", + "alignment": "auto", + "disable_font_border": false, + "font_size_offset": 0, + "font_size_minimum": -1, + "direction": "auto", + "uppercase": false, + "lowercase": false, + "gimp_font": "Sans-serif", + "no_hyphenation": false, + "font_color": null, + "line_spacing": null, + "font_size": null + } + }, + "upscale": { + "$ref": "#/$defs/UpscaleConfig", + "default": { + "upscaler": "esrgan", + "revert_upscaling": false, + "upscale_ratio": null + } + }, + "translator": { + "$ref": "#/$defs/TranslatorConfig", + "default": { + "translator": "sugoi", + "target_lang": "ENG", + "no_text_lang_skip": false, + "skip_lang": null, + "gpt_config": null, + "translator_chain": null, + "selective_translation": null + } + }, + "detector": { + "$ref": "#/$defs/DetectorConfig", + "default": { + "detector": "default", + "detection_size": 1536, + "text_threshold": 0.5, + "det_rotate": false, + "det_auto_rotate": false, + "det_invert": false, + "det_gamma_correct": false, + "box_threshold": 0.7, + "unclip_ratio": 2.3 + } + }, + "colorizer": { + "$ref": "#/$defs/ColorizerConfig", + "default": { + "colorization_size": 576, + "denoise_sigma": 30, + "colorizer": "none" + } + }, + "inpainter": { + "$ref": "#/$defs/InpainterConfig", + "default": { + "inpainter": "none", + "inpainting_size": 2048, + "inpainting_precision": "fp32" + } + }, + "ocr": { + "$ref": "#/$defs/OcrConfig", + "default": { + "use_mocr_merge": false, + "ocr": "48px", + "min_text_length": 0, + "ignore_bubble": 0 + } + }, + "kernel_size": { + "default": 3, + "title": "Kernel Size", + "type": "integer" + }, + "mask_dilation_offset": { + "default": 0, + "title": "Mask Dilation Offset", + "type": "integer" + } + }, + "title": "Config", + "type": "object" +} +``` ### GPT Config Reference Used by the `--gpt-config` argument. @@ -622,125 +1017,7 @@ Limitations: ### Api Documentation -
-API V2 -
- -```bash -# use `--mode api` to start a web server. -$ python -m manga_translator -v --mode api --use-gpu -# the api will be serving on http://127.0.0.1:5003 -``` - -Api is accepting json(post) and multipart. -
-Api endpoints are `/colorize_translate`, `/inpaint_translate`, `/translate`, `/get_text`. -
-Valid arguments for the api are: - -``` -// These are taken from args.py. For more info see README.md -detector: String -ocr: String -inpainter: String -upscaler: String -translator: String -target_language: String -upscale_ratio: Integer -translator_chain: String -selective_translation: String -attempts: Integer -detection_size: Integer // 1024 => 'S', 1536 => 'M', 2048 => 'L', 2560 => 'X' -text_threshold: Float -box_threshold: Float -unclip_ratio: Float -inpainting_size: Integer -det_rotate: Bool -det_auto_rotate: Bool -det_invert: Bool -det_gamma_correct: Bool -min_text_length: Integer -colorization_size: Integer -denoise_sigma: Integer -mask_dilation_offset: Integer -ignore_bubble: Integer -gpt_config: String -filter_text: String -overlay_type: String - -// These are api specific args -direction: String // {'auto', 'h', 'v'} -base64Images: String //Image in base64 format -image: Multipart // image upload from multipart -url: String // an url string -``` - -
- -Manual translation replaces machine translation with human translators. -Basic manual translation demo can be found at when using web mode. -
-API -
- -Two modes of translation service are provided by the demo: synchronous mode and asynchronous mode.\ -In synchronous mode your HTTP POST request will finish once the translation task is finished.\ -In asynchronous mode your HTTP POST request will respond with a `task_id` immediately, you can use this `task_id` to -poll for translation task state. - -#### Synchronous mode - -1. POST a form request with form data `file:` to -2. Wait for response -3. Use the resultant `task_id` to find translation result in `result/` directory, e.g. using Nginx to expose `result/` - -#### Asynchronous mode - -1. POST a form request with form data `file:` to -2. Acquire translation `task_id` -3. Poll for translation task state by posting JSON `{"taskid": }` to -4. Translation is finished when the resultant state is either `finished`, `error` or `error-lang` -5. Find translation result in `result/` directory, e.g. using Nginx to expose `result/` - -#### Manual translation - -POST a form request with form data `file:` to -and wait for response. - -You will obtain a JSON response like this: - -```json -{ - "task_id": "12c779c9431f954971cae720eb104499", - "status": "pending", - "trans_result": [ - { - "s": "☆上司来ちゃった……", - "t": "" - } - ] -} -``` - -Fill in translated texts: - -```json -{ - "task_id": "12c779c9431f954971cae720eb104499", - "status": "pending", - "trans_result": [ - { - "s": "☆上司来ちゃった……", - "t": "☆Boss is here..." - } - ] -} -``` - -Post translated JSON to and wait for response.\ -Then you can find the translation result in `result/` directory, e.g. using Nginx to expose `result/`. - -
+Read openapi docs: `127.0.0.1:5003/docs` ## Next steps diff --git a/README_CN.md b/README_CN.md index 46f674e50..e4a9a70be 100644 --- a/README_CN.md +++ b/README_CN.md @@ -106,132 +106,19 @@ FIL: Filipino (Tagalog) ## 选项 ```text --h, --help show this help message and exit --m, --mode {demo,batch,web,web_client,ws,api} - Run demo in single image demo mode (demo), batch - translation mode (batch), web service mode (web) --i, --input INPUT [INPUT ...] Path to an image file if using demo mode, or path to an - image folder if using batch mode --o, --dest DEST Path to the destination folder for translated images in - batch mode --l, --target-lang {CHS,CHT,CSY,NLD,ENG,FRA,DEU,HUN,ITA,JPN,KOR,PLK,PTB,ROM,RUS,ESP,TRK,UKR,VIN,ARA,CNR,SRP,HRV,THA,IND,FIL} - Destination language --v, --verbose Print debug info and save intermediate images in result - folder --f, --format {png,webp,jpg,xcf,psd,pdf} Output format of the translation. ---attempts ATTEMPTS Retry attempts on encountered error. -1 means infinite - times. ---ignore-errors Skip image on encountered error. ---overwrite Overwrite already translated images in batch mode. ---skip-no-text Skip image without text (Will not be saved). ---model-dir MODEL_DIR Model directory (by default ./models in project root) ---use-gpu Turn on/off gpu (automatic selection between mps or cuda) ---use-gpu-limited Turn on/off gpu (excluding offline translator) ---detector {default,ctd,craft,none} Text detector used for creating a text mask from an - image, DO NOT use craft for manga, it's not designed - for it ---ocr {32px,48px,48px_ctc,mocr} Optical character recognition (OCR) model to use ---use-mocr-merge Use bbox merge when Manga OCR inference. ---inpainter {default,lama_large,lama_mpe,sd,none,original} - Inpainting model to use ---upscaler {waifu2x,esrgan,4xultrasharp} Upscaler to use. --upscale-ratio has to be set for it - to take effect ---upscale-ratio UPSCALE_RATIO Image upscale ratio applied before detection. Can - improve text detection. ---colorizer {mc2} Colorization model to use. ---translator {google,youdao,baidu,deepl,papago,caiyun,gpt3,gpt3.5,gpt4,deepseek,none,original,offline,nllb,nllb_big,sugoi,jparacrawl,jparacrawl_big,m2m100,sakura} - Language translator to use ---translator-chain TRANSLATOR_CHAIN Output of one translator goes in another. Example: - --translator-chain "google:JPN;sugoi:ENG". ---selective-translation SELECTIVE_TRANSLATION - Select a translator based on detected language in - image. Note the first translation service acts as - default if the language isn't defined. Example: - --translator-chain "google:JPN;sugoi:ENG". ---revert-upscaling Downscales the previously upscaled image after - translation back to original size (Use with --upscale- - ratio). ---detection-size DETECTION_SIZE Size of image used for detection ---det-rotate Rotate the image for detection. Might improve - detection. ---det-auto-rotate Rotate the image for detection to prefer vertical - textlines. Might improve detection. ---det-invert Invert the image colors for detection. Might improve - detection. ---det-gamma-correct Applies gamma correction for detection. Might improve - detection. ---unclip-ratio UNCLIP_RATIO How much to extend text skeleton to form bounding box ---box-threshold BOX_THRESHOLD Threshold for bbox generation ---text-threshold TEXT_THRESHOLD Threshold for text detection ---min-text-length MIN_TEXT_LENGTH Minimum text length of a text region ---no-text-lang-skip Dont skip text that is seemingly already in the target - language. ---inpainting-size INPAINTING_SIZE Size of image used for inpainting (too large will - result in OOM) ---inpainting-precision {fp32,fp16,bf16} Inpainting precision for lama, use bf16 while you can. ---colorization-size COLORIZATION_SIZE Size of image used for colorization. Set to -1 to use - full image size ---denoise-sigma DENOISE_SIGMA Used by colorizer and affects color strength, range - from 0 to 255 (default 30). -1 turns it off. ---mask-dilation-offset MASK_DILATION_OFFSET By how much to extend the text mask to remove left-over - text pixels of the original image. ---font-size FONT_SIZE Use fixed font size for rendering ---font-size-offset FONT_SIZE_OFFSET Offset font size by a given amount, positive number - increase font size and vice versa ---font-size-minimum FONT_SIZE_MINIMUM Minimum output font size. Default is - image_sides_sum/200 ---font-color FONT_COLOR Overwrite the text fg/bg color detected by the OCR - model. Use hex string without the "#" such as FFFFFF - for a white foreground or FFFFFF:000000 to also have a - black background around the text. ---line-spacing LINE_SPACING Line spacing is font_size * this value. Default is 0.01 - for horizontal text and 0.2 for vertical. ---force-horizontal Force text to be rendered horizontally ---force-vertical Force text to be rendered vertically ---align-left Align rendered text left ---align-center Align rendered text centered ---align-right Align rendered text right ---uppercase Change text to uppercase ---lowercase Change text to lowercase ---no-hyphenation If renderer should be splitting up words using a hyphen - character (-) ---manga2eng Render english text translated from manga with some - additional typesetting. Ignores some other argument - options ---gpt-config GPT_CONFIG Path to GPT config file, more info in README ---use-mtpe Turn on/off machine translation post editing (MTPE) on - the command line (works only on linux right now) ---save-text Save extracted text and translations into a text file. ---save-text-file SAVE_TEXT_FILE Like --save-text but with a specified file path. ---filter-text FILTER_TEXT Filter regions by their text with a regex. Example - usage: --text-filter ".*badtext.*" ---pre-dict FILE_PATH Path to the pre-translation dictionary file. One entry per line, - Comments can be added with `#` and `//`. - usage: //Example - dog cat #Example - abc def - abc ---post-dict FILE_PATH Path to the post-translation dictionary file. Same as above. ---skip-lang Skip translation if source image is one of the provide languages, - use comma to separate multiple languages. Example: JPN,ENG ---prep-manual Prepare for manual typesetting by outputting blank, - inpainted images, plus copies of the original for - reference ---font-path FONT_PATH Path to font file ---gimp-font GIMP_FONT Font family to use for gimp rendering. ---host HOST Used by web module to decide which host to attach to ---port PORT Used by web module to decide which port to attach to ---nonce NONCE Used by web module as secret for securing internal web - server communication ---ws-url WS_URL Server URL for WebSocket mode ---save-quality SAVE_QUALITY Quality of saved JPEG image, range from 0 to 100 with - 100 being best ---ignore-bubble IGNORE_BUBBLE The threshold for ignoring text in non bubble areas, - with valid values ranging from 1 to 50, does not ignore - others. Recommendation 5 to 10. If it is too low, - normal bubble areas may be ignored, and if it is too - large, non bubble areas may be considered normal - bubbles +-h, --help show this help message and exit +-v, --verbose Print debug info and save intermediate images in result folder +--attempts ATTEMPTS Retry attempts on encountered error. -1 means infinite times. +--ignore-errors Skip image on encountered error. +--model-dir MODEL_DIR Model directory (by default ./models in project root) +--use-gpu Turn on/off gpu (auto switch between mps and cuda) +--use-gpu-limited Turn on/off gpu (excluding offline translator) +--font-path FONT_PATH Path to font file +--pre-dict PRE_DICT Path to the pre-translation dictionary file +--post-dict POST_DICT Path to the post-translation dictionary file +--kernel-size KERNEL_SIZE Set the convolution kernel size of the text erasure area to + completely clean up text residues +--config-file CONFIG_FILE path to the config file ``` diff --git a/examples/config-example.json b/examples/config-example.json new file mode 100644 index 000000000..08c7b5008 --- /dev/null +++ b/examples/config-example.json @@ -0,0 +1,61 @@ +{ + "filter_text": null, + "render": { + "renderer": "default", + "alignment": "auto", + "disable_font_border": false, + "font_size_offset": 0, + "font_size_minimum": -1, + "direction": "auto", + "uppercase": false, + "lowercase": false, + "gimp_font": "Sans-serif", + "no_hyphenation": false, + "font_color": null, + "line_spacing": null, + "font_size": null + }, + "upscale": { + "upscaler": "esrgan", + "revert_upscaling": false, + "upscale_ratio": null + }, + "translator": { + "translator": "sugoi", + "target_lang": "ENG", + "no_text_lang_skip": false, + "skip_lang": null, + "gpt_config": null, + "translator_chain": null, + "selective_translation": null + }, + "detector": { + "detector": "default", + "detection_size": 1536, + "text_threshold": 0.5, + "det_rotate": false, + "det_auto_rotate": false, + "det_invert": false, + "det_gamma_correct": false, + "box_threshold": 0.7, + "unclip_ratio": 2.3 + }, + "colorizer": { + "colorization_size": 576, + "denoise_sigma": 30, + "colorizer": "none" + }, + "inpainter": { + "inpainter": "none", + "inpainting_size": 2048, + "inpainting_precision": "fp32" + }, + "ocr": { + "use_mocr_merge": false, + "ocr": "48px", + "min_text_length": 0, + "ignore_bubble": 0 + }, + "kernel_size": 3, + "mask_dilation_offset": 0 +} diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py index dca4740a7..4edeec4ff 100644 --- a/manga_translator/__main__.py +++ b/manga_translator/__main__.py @@ -3,6 +3,7 @@ import logging from argparse import Namespace +from manga_translator import Config from .manga_translator import ( set_main_logger, load_dictionary, apply_dictionary, ) @@ -73,6 +74,11 @@ async def dispatch(args: Namespace): from manga_translator.mode.share import MangaShare translator = MangaShare(args_dict) await translator.listen(args_dict) + elif args.mode == 'config-help': + import json + config = Config.schema() + print(json.dumps(config, indent=2)) + if __name__ == '__main__': diff --git a/manga_translator/args.py b/manga_translator/args.py index 472311c6a..172fa1914 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -120,3 +120,5 @@ def _format_action_invocation(self, action: argparse.Action) -> str: parser_api.add_argument('--port', default=5003, type=int, help='Port for API service') parser_api.add_argument('--nonce', default=os.getenv('MT_WEB_NONCE', ''), type=str, help='Nonce for securing internal API server communication') parser_api.add_argument("--report", default=None,type=str, help='reports to server to register instance') + +subparsers.add_parser('config-help', help='Print help information for config file') \ No newline at end of file From a458b7fa96340d3944fb9c2b990204ac5a9d0b4d Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 30 Nov 2024 15:33:20 +0100 Subject: [PATCH 69/75] add toml support --- examples/config-example.toml | 50 ++++++++++++++++++++++++++++++++++ manga_translator/mode/local.py | 19 +++++++++++-- 2 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 examples/config-example.toml diff --git a/examples/config-example.toml b/examples/config-example.toml new file mode 100644 index 000000000..a17b10650 --- /dev/null +++ b/examples/config-example.toml @@ -0,0 +1,50 @@ +mask_dilation_offset = 0 + +[render] +renderer = "default" +alignment = "auto" +disable_font_border = false +font_size_offset = 0 +font_size_minimum = -1 +direction = "auto" +uppercase = false +lowercase = false +gimp_font = "Sans-serif" +no_hyphenation = false + +[upscale] +upscaler = "esrgan" +revert_upscaling = false + +[translator] +translator = "sugoi" +target_lang = "ENG" +no_text_lang_skip = false + +[detector] +detector = "default" +detection_size = 1536 +text_threshold = 0.5 +det_rotate = false +det_auto_rotate = false +det_invert = false +det_gamma_correct = false +box_threshold = 0.7 +unclip_ratio = 2.3 + +[colorizer] +colorization_size = 576 +denoise_sigma = 30 +colorizer = "none" + +[inpainter] +inpainter = "none" +inpainting_size = 2048 +inpainting_precision = "fp32" + +[ocr] +use_mocr_merge = false +ocr = "48px" +min_text_length = 0 +ignore_bubble = 0 + diff --git a/manga_translator/mode/local.py b/manga_translator/mode/local.py index 564efbc0f..384e7f1fe 100644 --- a/manga_translator/mode/local.py +++ b/manga_translator/mode/local.py @@ -35,7 +35,6 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un dest = os.path.abspath(os.path.expanduser(dest)) if dest else '' params = params or {} config_file_path = params.get("config_file", None) - config_content = "{}" if config_file_path: try: @@ -43,9 +42,23 @@ async def translate_path(self, path: str, dest: str = None, params: dict[str, Un config_content = file.read() except Exception as e: print("Couldnt read file") - pass - config = Config(**json.loads(config_content)) + raise e + config_extension = os.path.splitext(config_file_path)[1].lower() + try: + if config_extension == ".toml": + import tomllib + config_dict = tomllib.loads(config_content) + elif config_extension == ".json": + config_dict = json.loads(config_content) + else: + raise ValueError("Unsupported configuration file format") + except Exception as e: + print("Failed to load configuration file") + raise e + config = Config(**config_dict) + else: + config = Config() # Handle format file_ext = params.get('format') if params.get('save_quality', 100) < 100: From cb1a5a305f0797207e9fa58b1ea7b8f5d392c52b Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 30 Nov 2024 17:00:21 +0100 Subject: [PATCH 70/75] issues with main.py when executing with command line --- server/main.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/server/main.py b/server/main.py index 20d3d5833..88081bfdf 100644 --- a/server/main.py +++ b/server/main.py @@ -7,6 +7,9 @@ import sys from argparse import Namespace +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + + from fastapi import FastAPI, Request, HTTPException, Header, UploadFile, File, Form from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse, HTMLResponse @@ -121,13 +124,15 @@ async def queue_size() -> int: @app.get("/", response_class=HTMLResponse,tags=["ui"]) async def index() -> HTMLResponse: - html_file = Path("index.html") + script_directory = Path(__file__).parent + html_file = script_directory / "index.html" html_content = html_file.read_text() return HTMLResponse(content=html_content) @app.get("/manual", response_class=HTMLResponse, tags=["ui"]) async def manual(): - html_file = Path("manual.html") + script_directory = Path(__file__).parent + html_file = script_directory / "manual.html" html_content = html_file.read_text() return HTMLResponse(content=html_content) From de441a42e42c82c28f1498e0cb8d27f981a0fb83 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 30 Nov 2024 22:21:52 +0100 Subject: [PATCH 71/75] typo --- manga_translator/args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manga_translator/args.py b/manga_translator/args.py index 172fa1914..06846b944 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -105,7 +105,7 @@ def _format_action_invocation(self, action: argparse.Action) -> str: g_batch.add_argument('--save-text-file', default='', type=str, help='Like --save-text but with a specified file path.') parser_batch.add_argument('--prep-manual', action='store_true', help='Prepare for manual typesetting by outputting blank, inpainted images, plus copies of the original for reference') parser_batch.add_argument('--save-quality', default=100, type=int, help='Quality of saved JPEG image, range from 0 to 100 with 100 being best') -parser.add_argument('--config-file', default=None, type=str, help='path to the config file') +parser_batch.add_argument('--config-file', default=None, type=str, help='path to the config file') # WebSocket mode parser_ws = subparsers.add_parser('ws', help='Run in WebSocket mode') From 1b441b86756d8565dfc1e8c0c850e8e0c1e5da80 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 30 Nov 2024 22:38:12 +0100 Subject: [PATCH 72/75] why does the order matter? --- manga_translator/args.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/manga_translator/args.py b/manga_translator/args.py index 06846b944..72a717a1c 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -78,6 +78,8 @@ def _format_action_invocation(self, action: argparse.Action) -> str: parser = argparse.ArgumentParser(prog='manga_translator', description='Seamlessly translate mangas into a chosen language', formatter_class=HelpFormatter) +subparsers = parser.add_subparsers(dest='mode', required=True, help='Mode of operation') + parser.add_argument('-v', '--verbose', action='store_true', help='Print debug info and save intermediate images in result folder') parser.add_argument('--attempts', default=0, type=int, help='Retry attempts on encountered error. -1 means infinite times.') parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.') @@ -90,8 +92,6 @@ def _format_action_invocation(self, action: argparse.Action) -> str: parser.add_argument('--post-dict', default=None, type=file_path, help='Path to the post-translation dictionary file') parser.add_argument('--kernel-size', default=3, type=int, help='Set the convolution kernel size of the text erasure area to completely clean up text residues') -subparsers = parser.add_subparsers(dest='mode', required=True, help='Mode of operation') - # Batch mode parser_batch = subparsers.add_parser('local', help='Run in batch translation mode') parser_batch.add_argument('-i', '--input', required=True, type=path, nargs='+', help='Path to an image folder') From cb2ed2442e8c33a68a2a850c9fca25674f85672b Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 30 Nov 2024 22:56:10 +0100 Subject: [PATCH 73/75] why does the order matter? --- manga_translator/__main__.py | 5 ++++- manga_translator/args.py | 39 +++++++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py index 4edeec4ff..9e4970ba1 100644 --- a/manga_translator/__main__.py +++ b/manga_translator/__main__.py @@ -4,6 +4,7 @@ from argparse import Namespace from manga_translator import Config +from manga_translator.args import parser, reparse from .manga_translator import ( set_main_logger, load_dictionary, apply_dictionary, ) @@ -85,7 +86,9 @@ async def dispatch(args: Namespace): args = None init_logging() try: - args = parser.parse_args() + args, unknown = parser.parse_known_args() + parser.add_argument_group() + args = Namespace(**{**vars(args), **vars(reparse(unknown))}) set_log_level(level=logging.DEBUG if args.verbose else logging.INFO) logger = get_logger(args.mode) set_main_logger(logger) diff --git a/manga_translator/args.py b/manga_translator/args.py index 72a717a1c..5b5a3ec83 100644 --- a/manga_translator/args.py +++ b/manga_translator/args.py @@ -76,22 +76,37 @@ def _format_action_invocation(self, action: argparse.Action) -> str: else: return super()._format_action_invocation(action) +def general_parser(g_parser): + g_parser.add_argument('-v', '--verbose', action='store_true', + help='Print debug info and save intermediate images in result folder') + g_parser.add_argument('--attempts', default=0, type=int, + help='Retry attempts on encountered error. -1 means infinite times.') + g_parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.') + g_parser.add_argument('--model-dir', default=None, type=dir_path, + help='Model directory (by default ./models in project root)') + g = g_parser.add_mutually_exclusive_group() + g.add_argument('--use-gpu', action='store_true', help='Turn on/off gpu (auto switch between mps and cuda)') + g.add_argument('--use-gpu-limited', action='store_true', help='Turn on/off gpu (excluding offline translator)') + g_parser.add_argument('--font-path', default='', type=file_path, help='Path to font file') + g_parser.add_argument('--pre-dict', default=None, type=file_path, help='Path to the pre-translation dictionary file') + g_parser.add_argument('--post-dict', default=None, type=file_path, + help='Path to the post-translation dictionary file') + g_parser.add_argument('--kernel-size', default=3, type=int, + help='Set the convolution kernel size of the text erasure area to completely clean up text residues') + + + +def reparse(arr: list): + p = argparse.ArgumentParser(prog='manga_translator', + description='Seamlessly translate mangas into a chosen language', + formatter_class=HelpFormatter) + general_parser(p) + return p.parse_args(arr) parser = argparse.ArgumentParser(prog='manga_translator', description='Seamlessly translate mangas into a chosen language', formatter_class=HelpFormatter) +general_parser(parser) subparsers = parser.add_subparsers(dest='mode', required=True, help='Mode of operation') -parser.add_argument('-v', '--verbose', action='store_true', help='Print debug info and save intermediate images in result folder') -parser.add_argument('--attempts', default=0, type=int, help='Retry attempts on encountered error. -1 means infinite times.') -parser.add_argument('--ignore-errors', action='store_true', help='Skip image on encountered error.') -parser.add_argument('--model-dir', default=None, type=dir_path, help='Model directory (by default ./models in project root)') -g = parser.add_mutually_exclusive_group() -g.add_argument('--use-gpu', action='store_true', help='Turn on/off gpu (auto switch between mps and cuda)') -g.add_argument('--use-gpu-limited', action='store_true', help='Turn on/off gpu (excluding offline translator)') -parser.add_argument('--font-path', default='', type=file_path, help='Path to font file') -parser.add_argument('--pre-dict', default=None, type=file_path, help='Path to the pre-translation dictionary file') -parser.add_argument('--post-dict', default=None, type=file_path, help='Path to the post-translation dictionary file') -parser.add_argument('--kernel-size', default=3, type=int, help='Set the convolution kernel size of the text erasure area to completely clean up text residues') - # Batch mode parser_batch = subparsers.add_parser('local', help='Run in batch translation mode') parser_batch.add_argument('-i', '--input', required=True, type=path, nargs='+', help='Path to an image folder') From 9a19389e73c5abec2d1535ceadbba14f79ec61d1 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sat, 30 Nov 2024 22:58:05 +0100 Subject: [PATCH 74/75] this shouldnt be here --- manga_translator/__main__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/manga_translator/__main__.py b/manga_translator/__main__.py index 9e4970ba1..43a5ca863 100644 --- a/manga_translator/__main__.py +++ b/manga_translator/__main__.py @@ -87,7 +87,6 @@ async def dispatch(args: Namespace): init_logging() try: args, unknown = parser.parse_known_args() - parser.add_argument_group() args = Namespace(**{**vars(args), **vars(reparse(unknown))}) set_log_level(level=logging.DEBUG if args.verbose else logging.INFO) logger = get_logger(args.mode) From e979c4386f0dcccf1be175ee86f2704a5ffe34c2 Mon Sep 17 00:00:00 2001 From: frederik-uni <147479464+frederik-uni@users.noreply.github.com> Date: Sun, 1 Dec 2024 01:53:24 +0100 Subject: [PATCH 75/75] fixes #760 --- manga_translator/manga_translator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/manga_translator/manga_translator.py b/manga_translator/manga_translator.py index 7fa85c7b2..324a87fc7 100644 --- a/manga_translator/manga_translator.py +++ b/manga_translator/manga_translator.py @@ -279,7 +279,7 @@ async def _translate(self, config: Config, ctx: Context) -> Context: if self.verbose: inpaint_input_img = await dispatch_inpainting(Inpainter.none, ctx.img_rgb, ctx.mask, config.inpainter,config.inpainter.inpainting_size, - self.using_gpu, self.verbose) + self.device, self.verbose) cv2.imwrite(self._result_path('inpaint_input.png'), cv2.cvtColor(inpaint_input_img, cv2.COLOR_RGB2BGR)) cv2.imwrite(self._result_path('mask_final.png'), ctx.mask) @@ -534,7 +534,7 @@ async def _run_mask_refinement(self, config: Config, ctx: Context): config.mask_dilation_offset, config.ocr.ignore_bubble, self.verbose,self.kernel_size) async def _run_inpainting(self, config: Config, ctx: Context): - return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, self.device, + return await dispatch_inpainting(config.inpainter.inpainter, ctx.img_rgb, ctx.mask, config.inpainter, config.inpainter.inpainting_size, self.device, self.verbose) async def _run_text_rendering(self, config: Config, ctx: Context):